18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 1993 Linus Torvalds 48c2ecf20Sopenharmony_ci * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 58c2ecf20Sopenharmony_ci * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000 68c2ecf20Sopenharmony_ci * Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002 78c2ecf20Sopenharmony_ci * Numa awareness, Christoph Lameter, SGI, June 2005 88c2ecf20Sopenharmony_ci * Improving global KVA allocator, Uladzislau Rezki, Sony, May 2019 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 128c2ecf20Sopenharmony_ci#include <linux/mm.h> 138c2ecf20Sopenharmony_ci#include <linux/module.h> 148c2ecf20Sopenharmony_ci#include <linux/highmem.h> 158c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 168c2ecf20Sopenharmony_ci#include <linux/slab.h> 178c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 188c2ecf20Sopenharmony_ci#include <linux/interrupt.h> 198c2ecf20Sopenharmony_ci#include <linux/proc_fs.h> 208c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 218c2ecf20Sopenharmony_ci#include <linux/set_memory.h> 228c2ecf20Sopenharmony_ci#include <linux/debugobjects.h> 238c2ecf20Sopenharmony_ci#include <linux/kallsyms.h> 248c2ecf20Sopenharmony_ci#include <linux/list.h> 258c2ecf20Sopenharmony_ci#include <linux/notifier.h> 268c2ecf20Sopenharmony_ci#include <linux/rbtree.h> 278c2ecf20Sopenharmony_ci#include <linux/xarray.h> 288c2ecf20Sopenharmony_ci#include <linux/rcupdate.h> 298c2ecf20Sopenharmony_ci#include <linux/pfn.h> 308c2ecf20Sopenharmony_ci#include <linux/kmemleak.h> 318c2ecf20Sopenharmony_ci#include <linux/atomic.h> 328c2ecf20Sopenharmony_ci#include <linux/compiler.h> 338c2ecf20Sopenharmony_ci#include <linux/llist.h> 348c2ecf20Sopenharmony_ci#include <linux/bitops.h> 358c2ecf20Sopenharmony_ci#include <linux/rbtree_augmented.h> 368c2ecf20Sopenharmony_ci#include <linux/overflow.h> 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 398c2ecf20Sopenharmony_ci#include <asm/tlbflush.h> 408c2ecf20Sopenharmony_ci#include <asm/shmparam.h> 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#include "internal.h" 438c2ecf20Sopenharmony_ci#include "pgalloc-track.h" 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cibool is_vmalloc_addr(const void *x) 468c2ecf20Sopenharmony_ci{ 478c2ecf20Sopenharmony_ci unsigned long addr = (unsigned long)x; 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci return addr >= VMALLOC_START && addr < VMALLOC_END; 508c2ecf20Sopenharmony_ci} 518c2ecf20Sopenharmony_ciEXPORT_SYMBOL(is_vmalloc_addr); 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_cistruct vfree_deferred { 548c2ecf20Sopenharmony_ci struct llist_head list; 558c2ecf20Sopenharmony_ci struct work_struct wq; 568c2ecf20Sopenharmony_ci}; 578c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred); 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_cistatic void __vunmap(const void *, int); 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_cistatic void free_work(struct work_struct *w) 628c2ecf20Sopenharmony_ci{ 638c2ecf20Sopenharmony_ci struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq); 648c2ecf20Sopenharmony_ci struct llist_node *t, *llnode; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci llist_for_each_safe(llnode, t, llist_del_all(&p->list)) 678c2ecf20Sopenharmony_ci __vunmap((void *)llnode, 1); 688c2ecf20Sopenharmony_ci} 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci/*** Page table manipulation functions ***/ 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_cistatic void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 738c2ecf20Sopenharmony_ci pgtbl_mod_mask *mask) 748c2ecf20Sopenharmony_ci{ 758c2ecf20Sopenharmony_ci pte_t *pte; 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci pte = pte_offset_kernel(pmd, addr); 788c2ecf20Sopenharmony_ci do { 798c2ecf20Sopenharmony_ci pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); 808c2ecf20Sopenharmony_ci WARN_ON(!pte_none(ptent) && !pte_present(ptent)); 818c2ecf20Sopenharmony_ci } while (pte++, addr += PAGE_SIZE, addr != end); 828c2ecf20Sopenharmony_ci *mask |= PGTBL_PTE_MODIFIED; 838c2ecf20Sopenharmony_ci} 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_cistatic void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, 868c2ecf20Sopenharmony_ci pgtbl_mod_mask *mask) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci pmd_t *pmd; 898c2ecf20Sopenharmony_ci unsigned long next; 908c2ecf20Sopenharmony_ci int cleared; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci pmd = pmd_offset(pud, addr); 938c2ecf20Sopenharmony_ci do { 948c2ecf20Sopenharmony_ci next = pmd_addr_end(addr, end); 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci cleared = pmd_clear_huge(pmd); 978c2ecf20Sopenharmony_ci if (cleared || pmd_bad(*pmd)) 988c2ecf20Sopenharmony_ci *mask |= PGTBL_PMD_MODIFIED; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci if (cleared) 1018c2ecf20Sopenharmony_ci continue; 1028c2ecf20Sopenharmony_ci if (pmd_none_or_clear_bad(pmd)) 1038c2ecf20Sopenharmony_ci continue; 1048c2ecf20Sopenharmony_ci vunmap_pte_range(pmd, addr, next, mask); 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci cond_resched(); 1078c2ecf20Sopenharmony_ci } while (pmd++, addr = next, addr != end); 1088c2ecf20Sopenharmony_ci} 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_cistatic void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, 1118c2ecf20Sopenharmony_ci pgtbl_mod_mask *mask) 1128c2ecf20Sopenharmony_ci{ 1138c2ecf20Sopenharmony_ci pud_t *pud; 1148c2ecf20Sopenharmony_ci unsigned long next; 1158c2ecf20Sopenharmony_ci int cleared; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci pud = pud_offset(p4d, addr); 1188c2ecf20Sopenharmony_ci do { 1198c2ecf20Sopenharmony_ci next = pud_addr_end(addr, end); 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci cleared = pud_clear_huge(pud); 1228c2ecf20Sopenharmony_ci if (cleared || pud_bad(*pud)) 1238c2ecf20Sopenharmony_ci *mask |= PGTBL_PUD_MODIFIED; 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci if (cleared) 1268c2ecf20Sopenharmony_ci continue; 1278c2ecf20Sopenharmony_ci if (pud_none_or_clear_bad(pud)) 1288c2ecf20Sopenharmony_ci continue; 1298c2ecf20Sopenharmony_ci vunmap_pmd_range(pud, addr, next, mask); 1308c2ecf20Sopenharmony_ci } while (pud++, addr = next, addr != end); 1318c2ecf20Sopenharmony_ci} 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_cistatic void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, 1348c2ecf20Sopenharmony_ci pgtbl_mod_mask *mask) 1358c2ecf20Sopenharmony_ci{ 1368c2ecf20Sopenharmony_ci p4d_t *p4d; 1378c2ecf20Sopenharmony_ci unsigned long next; 1388c2ecf20Sopenharmony_ci int cleared; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci p4d = p4d_offset(pgd, addr); 1418c2ecf20Sopenharmony_ci do { 1428c2ecf20Sopenharmony_ci next = p4d_addr_end(addr, end); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci cleared = p4d_clear_huge(p4d); 1458c2ecf20Sopenharmony_ci if (cleared || p4d_bad(*p4d)) 1468c2ecf20Sopenharmony_ci *mask |= PGTBL_P4D_MODIFIED; 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci if (cleared) 1498c2ecf20Sopenharmony_ci continue; 1508c2ecf20Sopenharmony_ci if (p4d_none_or_clear_bad(p4d)) 1518c2ecf20Sopenharmony_ci continue; 1528c2ecf20Sopenharmony_ci vunmap_pud_range(p4d, addr, next, mask); 1538c2ecf20Sopenharmony_ci } while (p4d++, addr = next, addr != end); 1548c2ecf20Sopenharmony_ci} 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci/** 1578c2ecf20Sopenharmony_ci * unmap_kernel_range_noflush - unmap kernel VM area 1588c2ecf20Sopenharmony_ci * @start: start of the VM area to unmap 1598c2ecf20Sopenharmony_ci * @size: size of the VM area to unmap 1608c2ecf20Sopenharmony_ci * 1618c2ecf20Sopenharmony_ci * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size specify 1628c2ecf20Sopenharmony_ci * should have been allocated using get_vm_area() and its friends. 1638c2ecf20Sopenharmony_ci * 1648c2ecf20Sopenharmony_ci * NOTE: 1658c2ecf20Sopenharmony_ci * This function does NOT do any cache flushing. The caller is responsible 1668c2ecf20Sopenharmony_ci * for calling flush_cache_vunmap() on to-be-mapped areas before calling this 1678c2ecf20Sopenharmony_ci * function and flush_tlb_kernel_range() after. 1688c2ecf20Sopenharmony_ci */ 1698c2ecf20Sopenharmony_civoid unmap_kernel_range_noflush(unsigned long start, unsigned long size) 1708c2ecf20Sopenharmony_ci{ 1718c2ecf20Sopenharmony_ci unsigned long end = start + size; 1728c2ecf20Sopenharmony_ci unsigned long next; 1738c2ecf20Sopenharmony_ci pgd_t *pgd; 1748c2ecf20Sopenharmony_ci unsigned long addr = start; 1758c2ecf20Sopenharmony_ci pgtbl_mod_mask mask = 0; 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci BUG_ON(addr >= end); 1788c2ecf20Sopenharmony_ci pgd = pgd_offset_k(addr); 1798c2ecf20Sopenharmony_ci do { 1808c2ecf20Sopenharmony_ci next = pgd_addr_end(addr, end); 1818c2ecf20Sopenharmony_ci if (pgd_bad(*pgd)) 1828c2ecf20Sopenharmony_ci mask |= PGTBL_PGD_MODIFIED; 1838c2ecf20Sopenharmony_ci if (pgd_none_or_clear_bad(pgd)) 1848c2ecf20Sopenharmony_ci continue; 1858c2ecf20Sopenharmony_ci vunmap_p4d_range(pgd, addr, next, &mask); 1868c2ecf20Sopenharmony_ci } while (pgd++, addr = next, addr != end); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci if (mask & ARCH_PAGE_TABLE_SYNC_MASK) 1898c2ecf20Sopenharmony_ci arch_sync_kernel_mappings(start, end); 1908c2ecf20Sopenharmony_ci} 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_cistatic int vmap_pte_range(pmd_t *pmd, unsigned long addr, 1938c2ecf20Sopenharmony_ci unsigned long end, pgprot_t prot, struct page **pages, int *nr, 1948c2ecf20Sopenharmony_ci pgtbl_mod_mask *mask) 1958c2ecf20Sopenharmony_ci{ 1968c2ecf20Sopenharmony_ci pte_t *pte; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci /* 1998c2ecf20Sopenharmony_ci * nr is a running index into the array which helps higher level 2008c2ecf20Sopenharmony_ci * callers keep track of where we're up to. 2018c2ecf20Sopenharmony_ci */ 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci pte = pte_alloc_kernel_track(pmd, addr, mask); 2048c2ecf20Sopenharmony_ci if (!pte) 2058c2ecf20Sopenharmony_ci return -ENOMEM; 2068c2ecf20Sopenharmony_ci do { 2078c2ecf20Sopenharmony_ci struct page *page = pages[*nr]; 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci if (WARN_ON(!pte_none(*pte))) 2108c2ecf20Sopenharmony_ci return -EBUSY; 2118c2ecf20Sopenharmony_ci if (WARN_ON(!page)) 2128c2ecf20Sopenharmony_ci return -ENOMEM; 2138c2ecf20Sopenharmony_ci set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); 2148c2ecf20Sopenharmony_ci (*nr)++; 2158c2ecf20Sopenharmony_ci } while (pte++, addr += PAGE_SIZE, addr != end); 2168c2ecf20Sopenharmony_ci *mask |= PGTBL_PTE_MODIFIED; 2178c2ecf20Sopenharmony_ci return 0; 2188c2ecf20Sopenharmony_ci} 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_cistatic int vmap_pmd_range(pud_t *pud, unsigned long addr, 2218c2ecf20Sopenharmony_ci unsigned long end, pgprot_t prot, struct page **pages, int *nr, 2228c2ecf20Sopenharmony_ci pgtbl_mod_mask *mask) 2238c2ecf20Sopenharmony_ci{ 2248c2ecf20Sopenharmony_ci pmd_t *pmd; 2258c2ecf20Sopenharmony_ci unsigned long next; 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci pmd = pmd_alloc_track(&init_mm, pud, addr, mask); 2288c2ecf20Sopenharmony_ci if (!pmd) 2298c2ecf20Sopenharmony_ci return -ENOMEM; 2308c2ecf20Sopenharmony_ci do { 2318c2ecf20Sopenharmony_ci next = pmd_addr_end(addr, end); 2328c2ecf20Sopenharmony_ci if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask)) 2338c2ecf20Sopenharmony_ci return -ENOMEM; 2348c2ecf20Sopenharmony_ci } while (pmd++, addr = next, addr != end); 2358c2ecf20Sopenharmony_ci return 0; 2368c2ecf20Sopenharmony_ci} 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_cistatic int vmap_pud_range(p4d_t *p4d, unsigned long addr, 2398c2ecf20Sopenharmony_ci unsigned long end, pgprot_t prot, struct page **pages, int *nr, 2408c2ecf20Sopenharmony_ci pgtbl_mod_mask *mask) 2418c2ecf20Sopenharmony_ci{ 2428c2ecf20Sopenharmony_ci pud_t *pud; 2438c2ecf20Sopenharmony_ci unsigned long next; 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci pud = pud_alloc_track(&init_mm, p4d, addr, mask); 2468c2ecf20Sopenharmony_ci if (!pud) 2478c2ecf20Sopenharmony_ci return -ENOMEM; 2488c2ecf20Sopenharmony_ci do { 2498c2ecf20Sopenharmony_ci next = pud_addr_end(addr, end); 2508c2ecf20Sopenharmony_ci if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask)) 2518c2ecf20Sopenharmony_ci return -ENOMEM; 2528c2ecf20Sopenharmony_ci } while (pud++, addr = next, addr != end); 2538c2ecf20Sopenharmony_ci return 0; 2548c2ecf20Sopenharmony_ci} 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_cistatic int vmap_p4d_range(pgd_t *pgd, unsigned long addr, 2578c2ecf20Sopenharmony_ci unsigned long end, pgprot_t prot, struct page **pages, int *nr, 2588c2ecf20Sopenharmony_ci pgtbl_mod_mask *mask) 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci p4d_t *p4d; 2618c2ecf20Sopenharmony_ci unsigned long next; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci p4d = p4d_alloc_track(&init_mm, pgd, addr, mask); 2648c2ecf20Sopenharmony_ci if (!p4d) 2658c2ecf20Sopenharmony_ci return -ENOMEM; 2668c2ecf20Sopenharmony_ci do { 2678c2ecf20Sopenharmony_ci next = p4d_addr_end(addr, end); 2688c2ecf20Sopenharmony_ci if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask)) 2698c2ecf20Sopenharmony_ci return -ENOMEM; 2708c2ecf20Sopenharmony_ci } while (p4d++, addr = next, addr != end); 2718c2ecf20Sopenharmony_ci return 0; 2728c2ecf20Sopenharmony_ci} 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci/** 2758c2ecf20Sopenharmony_ci * map_kernel_range_noflush - map kernel VM area with the specified pages 2768c2ecf20Sopenharmony_ci * @addr: start of the VM area to map 2778c2ecf20Sopenharmony_ci * @size: size of the VM area to map 2788c2ecf20Sopenharmony_ci * @prot: page protection flags to use 2798c2ecf20Sopenharmony_ci * @pages: pages to map 2808c2ecf20Sopenharmony_ci * 2818c2ecf20Sopenharmony_ci * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size specify should 2828c2ecf20Sopenharmony_ci * have been allocated using get_vm_area() and its friends. 2838c2ecf20Sopenharmony_ci * 2848c2ecf20Sopenharmony_ci * NOTE: 2858c2ecf20Sopenharmony_ci * This function does NOT do any cache flushing. The caller is responsible for 2868c2ecf20Sopenharmony_ci * calling flush_cache_vmap() on to-be-mapped areas before calling this 2878c2ecf20Sopenharmony_ci * function. 2888c2ecf20Sopenharmony_ci * 2898c2ecf20Sopenharmony_ci * RETURNS: 2908c2ecf20Sopenharmony_ci * 0 on success, -errno on failure. 2918c2ecf20Sopenharmony_ci */ 2928c2ecf20Sopenharmony_ciint map_kernel_range_noflush(unsigned long addr, unsigned long size, 2938c2ecf20Sopenharmony_ci pgprot_t prot, struct page **pages) 2948c2ecf20Sopenharmony_ci{ 2958c2ecf20Sopenharmony_ci unsigned long start = addr; 2968c2ecf20Sopenharmony_ci unsigned long end = addr + size; 2978c2ecf20Sopenharmony_ci unsigned long next; 2988c2ecf20Sopenharmony_ci pgd_t *pgd; 2998c2ecf20Sopenharmony_ci int err = 0; 3008c2ecf20Sopenharmony_ci int nr = 0; 3018c2ecf20Sopenharmony_ci pgtbl_mod_mask mask = 0; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci BUG_ON(addr >= end); 3048c2ecf20Sopenharmony_ci pgd = pgd_offset_k(addr); 3058c2ecf20Sopenharmony_ci do { 3068c2ecf20Sopenharmony_ci next = pgd_addr_end(addr, end); 3078c2ecf20Sopenharmony_ci if (pgd_bad(*pgd)) 3088c2ecf20Sopenharmony_ci mask |= PGTBL_PGD_MODIFIED; 3098c2ecf20Sopenharmony_ci err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); 3108c2ecf20Sopenharmony_ci if (err) 3118c2ecf20Sopenharmony_ci return err; 3128c2ecf20Sopenharmony_ci } while (pgd++, addr = next, addr != end); 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci if (mask & ARCH_PAGE_TABLE_SYNC_MASK) 3158c2ecf20Sopenharmony_ci arch_sync_kernel_mappings(start, end); 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci return 0; 3188c2ecf20Sopenharmony_ci} 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ciint map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, 3218c2ecf20Sopenharmony_ci struct page **pages) 3228c2ecf20Sopenharmony_ci{ 3238c2ecf20Sopenharmony_ci int ret; 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci ret = map_kernel_range_noflush(start, size, prot, pages); 3268c2ecf20Sopenharmony_ci flush_cache_vmap(start, start + size); 3278c2ecf20Sopenharmony_ci return ret; 3288c2ecf20Sopenharmony_ci} 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ciint is_vmalloc_or_module_addr(const void *x) 3318c2ecf20Sopenharmony_ci{ 3328c2ecf20Sopenharmony_ci /* 3338c2ecf20Sopenharmony_ci * ARM, x86-64 and sparc64 put modules in a special place, 3348c2ecf20Sopenharmony_ci * and fall back on vmalloc() if that fails. Others 3358c2ecf20Sopenharmony_ci * just put it in the vmalloc space. 3368c2ecf20Sopenharmony_ci */ 3378c2ecf20Sopenharmony_ci#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) 3388c2ecf20Sopenharmony_ci unsigned long addr = (unsigned long)x; 3398c2ecf20Sopenharmony_ci if (addr >= MODULES_VADDR && addr < MODULES_END) 3408c2ecf20Sopenharmony_ci return 1; 3418c2ecf20Sopenharmony_ci#endif 3428c2ecf20Sopenharmony_ci return is_vmalloc_addr(x); 3438c2ecf20Sopenharmony_ci} 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci/* 3468c2ecf20Sopenharmony_ci * Walk a vmap address to the struct page it maps. 3478c2ecf20Sopenharmony_ci */ 3488c2ecf20Sopenharmony_cistruct page *vmalloc_to_page(const void *vmalloc_addr) 3498c2ecf20Sopenharmony_ci{ 3508c2ecf20Sopenharmony_ci unsigned long addr = (unsigned long) vmalloc_addr; 3518c2ecf20Sopenharmony_ci struct page *page = NULL; 3528c2ecf20Sopenharmony_ci pgd_t *pgd = pgd_offset_k(addr); 3538c2ecf20Sopenharmony_ci p4d_t *p4d; 3548c2ecf20Sopenharmony_ci pud_t *pud; 3558c2ecf20Sopenharmony_ci pmd_t *pmd; 3568c2ecf20Sopenharmony_ci pte_t *ptep, pte; 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci /* 3598c2ecf20Sopenharmony_ci * XXX we might need to change this if we add VIRTUAL_BUG_ON for 3608c2ecf20Sopenharmony_ci * architectures that do not vmalloc module space 3618c2ecf20Sopenharmony_ci */ 3628c2ecf20Sopenharmony_ci VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci if (pgd_none(*pgd)) 3658c2ecf20Sopenharmony_ci return NULL; 3668c2ecf20Sopenharmony_ci p4d = p4d_offset(pgd, addr); 3678c2ecf20Sopenharmony_ci if (p4d_none(*p4d)) 3688c2ecf20Sopenharmony_ci return NULL; 3698c2ecf20Sopenharmony_ci pud = pud_offset(p4d, addr); 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci /* 3728c2ecf20Sopenharmony_ci * Don't dereference bad PUD or PMD (below) entries. This will also 3738c2ecf20Sopenharmony_ci * identify huge mappings, which we may encounter on architectures 3748c2ecf20Sopenharmony_ci * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be 3758c2ecf20Sopenharmony_ci * identified as vmalloc addresses by is_vmalloc_addr(), but are 3768c2ecf20Sopenharmony_ci * not [unambiguously] associated with a struct page, so there is 3778c2ecf20Sopenharmony_ci * no correct value to return for them. 3788c2ecf20Sopenharmony_ci */ 3798c2ecf20Sopenharmony_ci WARN_ON_ONCE(pud_bad(*pud)); 3808c2ecf20Sopenharmony_ci if (pud_none(*pud) || pud_bad(*pud)) 3818c2ecf20Sopenharmony_ci return NULL; 3828c2ecf20Sopenharmony_ci pmd = pmd_offset(pud, addr); 3838c2ecf20Sopenharmony_ci WARN_ON_ONCE(pmd_bad(*pmd)); 3848c2ecf20Sopenharmony_ci if (pmd_none(*pmd) || pmd_bad(*pmd)) 3858c2ecf20Sopenharmony_ci return NULL; 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci ptep = pte_offset_map(pmd, addr); 3888c2ecf20Sopenharmony_ci pte = *ptep; 3898c2ecf20Sopenharmony_ci if (pte_present(pte)) 3908c2ecf20Sopenharmony_ci page = pte_page(pte); 3918c2ecf20Sopenharmony_ci pte_unmap(ptep); 3928c2ecf20Sopenharmony_ci return page; 3938c2ecf20Sopenharmony_ci} 3948c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vmalloc_to_page); 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci/* 3978c2ecf20Sopenharmony_ci * Map a vmalloc()-space virtual address to the physical page frame number. 3988c2ecf20Sopenharmony_ci */ 3998c2ecf20Sopenharmony_ciunsigned long vmalloc_to_pfn(const void *vmalloc_addr) 4008c2ecf20Sopenharmony_ci{ 4018c2ecf20Sopenharmony_ci return page_to_pfn(vmalloc_to_page(vmalloc_addr)); 4028c2ecf20Sopenharmony_ci} 4038c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vmalloc_to_pfn); 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci/*** Global kva allocator ***/ 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci#define DEBUG_AUGMENT_PROPAGATE_CHECK 0 4098c2ecf20Sopenharmony_ci#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(vmap_area_lock); 4138c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(free_vmap_area_lock); 4148c2ecf20Sopenharmony_ci/* Export for kexec only */ 4158c2ecf20Sopenharmony_ciLIST_HEAD(vmap_area_list); 4168c2ecf20Sopenharmony_cistatic LLIST_HEAD(vmap_purge_list); 4178c2ecf20Sopenharmony_cistatic struct rb_root vmap_area_root = RB_ROOT; 4188c2ecf20Sopenharmony_cistatic bool vmap_initialized __read_mostly; 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci/* 4218c2ecf20Sopenharmony_ci * This kmem_cache is used for vmap_area objects. Instead of 4228c2ecf20Sopenharmony_ci * allocating from slab we reuse an object from this cache to 4238c2ecf20Sopenharmony_ci * make things faster. Especially in "no edge" splitting of 4248c2ecf20Sopenharmony_ci * free block. 4258c2ecf20Sopenharmony_ci */ 4268c2ecf20Sopenharmony_cistatic struct kmem_cache *vmap_area_cachep; 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci/* 4298c2ecf20Sopenharmony_ci * This linked list is used in pair with free_vmap_area_root. 4308c2ecf20Sopenharmony_ci * It gives O(1) access to prev/next to perform fast coalescing. 4318c2ecf20Sopenharmony_ci */ 4328c2ecf20Sopenharmony_cistatic LIST_HEAD(free_vmap_area_list); 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci/* 4358c2ecf20Sopenharmony_ci * This augment red-black tree represents the free vmap space. 4368c2ecf20Sopenharmony_ci * All vmap_area objects in this tree are sorted by va->va_start 4378c2ecf20Sopenharmony_ci * address. It is used for allocation and merging when a vmap 4388c2ecf20Sopenharmony_ci * object is released. 4398c2ecf20Sopenharmony_ci * 4408c2ecf20Sopenharmony_ci * Each vmap_area node contains a maximum available free block 4418c2ecf20Sopenharmony_ci * of its sub-tree, right or left. Therefore it is possible to 4428c2ecf20Sopenharmony_ci * find a lowest match of free area. 4438c2ecf20Sopenharmony_ci */ 4448c2ecf20Sopenharmony_cistatic struct rb_root free_vmap_area_root = RB_ROOT; 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci/* 4478c2ecf20Sopenharmony_ci * Preload a CPU with one object for "no edge" split case. The 4488c2ecf20Sopenharmony_ci * aim is to get rid of allocations from the atomic context, thus 4498c2ecf20Sopenharmony_ci * to use more permissive allocation masks. 4508c2ecf20Sopenharmony_ci */ 4518c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node); 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_cistatic __always_inline unsigned long 4548c2ecf20Sopenharmony_civa_size(struct vmap_area *va) 4558c2ecf20Sopenharmony_ci{ 4568c2ecf20Sopenharmony_ci return (va->va_end - va->va_start); 4578c2ecf20Sopenharmony_ci} 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_cistatic __always_inline unsigned long 4608c2ecf20Sopenharmony_ciget_subtree_max_size(struct rb_node *node) 4618c2ecf20Sopenharmony_ci{ 4628c2ecf20Sopenharmony_ci struct vmap_area *va; 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci va = rb_entry_safe(node, struct vmap_area, rb_node); 4658c2ecf20Sopenharmony_ci return va ? va->subtree_max_size : 0; 4668c2ecf20Sopenharmony_ci} 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci/* 4698c2ecf20Sopenharmony_ci * Gets called when remove the node and rotate. 4708c2ecf20Sopenharmony_ci */ 4718c2ecf20Sopenharmony_cistatic __always_inline unsigned long 4728c2ecf20Sopenharmony_cicompute_subtree_max_size(struct vmap_area *va) 4738c2ecf20Sopenharmony_ci{ 4748c2ecf20Sopenharmony_ci return max3(va_size(va), 4758c2ecf20Sopenharmony_ci get_subtree_max_size(va->rb_node.rb_left), 4768c2ecf20Sopenharmony_ci get_subtree_max_size(va->rb_node.rb_right)); 4778c2ecf20Sopenharmony_ci} 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ciRB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb, 4808c2ecf20Sopenharmony_ci struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size) 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_cistatic void purge_vmap_area_lazy(void); 4838c2ecf20Sopenharmony_cistatic BLOCKING_NOTIFIER_HEAD(vmap_notify_list); 4848c2ecf20Sopenharmony_cistatic unsigned long lazy_max_pages(void); 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_cistatic atomic_long_t nr_vmalloc_pages; 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ciunsigned long vmalloc_nr_pages(void) 4898c2ecf20Sopenharmony_ci{ 4908c2ecf20Sopenharmony_ci return atomic_long_read(&nr_vmalloc_pages); 4918c2ecf20Sopenharmony_ci} 4928c2ecf20Sopenharmony_ci 4938c2ecf20Sopenharmony_cistatic struct vmap_area *__find_vmap_area(unsigned long addr) 4948c2ecf20Sopenharmony_ci{ 4958c2ecf20Sopenharmony_ci struct rb_node *n = vmap_area_root.rb_node; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci while (n) { 4988c2ecf20Sopenharmony_ci struct vmap_area *va; 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci va = rb_entry(n, struct vmap_area, rb_node); 5018c2ecf20Sopenharmony_ci if (addr < va->va_start) 5028c2ecf20Sopenharmony_ci n = n->rb_left; 5038c2ecf20Sopenharmony_ci else if (addr >= va->va_end) 5048c2ecf20Sopenharmony_ci n = n->rb_right; 5058c2ecf20Sopenharmony_ci else 5068c2ecf20Sopenharmony_ci return va; 5078c2ecf20Sopenharmony_ci } 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci return NULL; 5108c2ecf20Sopenharmony_ci} 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci/* 5138c2ecf20Sopenharmony_ci * This function returns back addresses of parent node 5148c2ecf20Sopenharmony_ci * and its left or right link for further processing. 5158c2ecf20Sopenharmony_ci * 5168c2ecf20Sopenharmony_ci * Otherwise NULL is returned. In that case all further 5178c2ecf20Sopenharmony_ci * steps regarding inserting of conflicting overlap range 5188c2ecf20Sopenharmony_ci * have to be declined and actually considered as a bug. 5198c2ecf20Sopenharmony_ci */ 5208c2ecf20Sopenharmony_cistatic __always_inline struct rb_node ** 5218c2ecf20Sopenharmony_cifind_va_links(struct vmap_area *va, 5228c2ecf20Sopenharmony_ci struct rb_root *root, struct rb_node *from, 5238c2ecf20Sopenharmony_ci struct rb_node **parent) 5248c2ecf20Sopenharmony_ci{ 5258c2ecf20Sopenharmony_ci struct vmap_area *tmp_va; 5268c2ecf20Sopenharmony_ci struct rb_node **link; 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_ci if (root) { 5298c2ecf20Sopenharmony_ci link = &root->rb_node; 5308c2ecf20Sopenharmony_ci if (unlikely(!*link)) { 5318c2ecf20Sopenharmony_ci *parent = NULL; 5328c2ecf20Sopenharmony_ci return link; 5338c2ecf20Sopenharmony_ci } 5348c2ecf20Sopenharmony_ci } else { 5358c2ecf20Sopenharmony_ci link = &from; 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci /* 5398c2ecf20Sopenharmony_ci * Go to the bottom of the tree. When we hit the last point 5408c2ecf20Sopenharmony_ci * we end up with parent rb_node and correct direction, i name 5418c2ecf20Sopenharmony_ci * it link, where the new va->rb_node will be attached to. 5428c2ecf20Sopenharmony_ci */ 5438c2ecf20Sopenharmony_ci do { 5448c2ecf20Sopenharmony_ci tmp_va = rb_entry(*link, struct vmap_area, rb_node); 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci /* 5478c2ecf20Sopenharmony_ci * During the traversal we also do some sanity check. 5488c2ecf20Sopenharmony_ci * Trigger the BUG() if there are sides(left/right) 5498c2ecf20Sopenharmony_ci * or full overlaps. 5508c2ecf20Sopenharmony_ci */ 5518c2ecf20Sopenharmony_ci if (va->va_start < tmp_va->va_end && 5528c2ecf20Sopenharmony_ci va->va_end <= tmp_va->va_start) 5538c2ecf20Sopenharmony_ci link = &(*link)->rb_left; 5548c2ecf20Sopenharmony_ci else if (va->va_end > tmp_va->va_start && 5558c2ecf20Sopenharmony_ci va->va_start >= tmp_va->va_end) 5568c2ecf20Sopenharmony_ci link = &(*link)->rb_right; 5578c2ecf20Sopenharmony_ci else { 5588c2ecf20Sopenharmony_ci WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n", 5598c2ecf20Sopenharmony_ci va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end); 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci return NULL; 5628c2ecf20Sopenharmony_ci } 5638c2ecf20Sopenharmony_ci } while (*link); 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci *parent = &tmp_va->rb_node; 5668c2ecf20Sopenharmony_ci return link; 5678c2ecf20Sopenharmony_ci} 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_cistatic __always_inline struct list_head * 5708c2ecf20Sopenharmony_ciget_va_next_sibling(struct rb_node *parent, struct rb_node **link) 5718c2ecf20Sopenharmony_ci{ 5728c2ecf20Sopenharmony_ci struct list_head *list; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci if (unlikely(!parent)) 5758c2ecf20Sopenharmony_ci /* 5768c2ecf20Sopenharmony_ci * The red-black tree where we try to find VA neighbors 5778c2ecf20Sopenharmony_ci * before merging or inserting is empty, i.e. it means 5788c2ecf20Sopenharmony_ci * there is no free vmap space. Normally it does not 5798c2ecf20Sopenharmony_ci * happen but we handle this case anyway. 5808c2ecf20Sopenharmony_ci */ 5818c2ecf20Sopenharmony_ci return NULL; 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_ci list = &rb_entry(parent, struct vmap_area, rb_node)->list; 5848c2ecf20Sopenharmony_ci return (&parent->rb_right == link ? list->next : list); 5858c2ecf20Sopenharmony_ci} 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_cistatic __always_inline void 5888c2ecf20Sopenharmony_cilink_va(struct vmap_area *va, struct rb_root *root, 5898c2ecf20Sopenharmony_ci struct rb_node *parent, struct rb_node **link, struct list_head *head) 5908c2ecf20Sopenharmony_ci{ 5918c2ecf20Sopenharmony_ci /* 5928c2ecf20Sopenharmony_ci * VA is still not in the list, but we can 5938c2ecf20Sopenharmony_ci * identify its future previous list_head node. 5948c2ecf20Sopenharmony_ci */ 5958c2ecf20Sopenharmony_ci if (likely(parent)) { 5968c2ecf20Sopenharmony_ci head = &rb_entry(parent, struct vmap_area, rb_node)->list; 5978c2ecf20Sopenharmony_ci if (&parent->rb_right != link) 5988c2ecf20Sopenharmony_ci head = head->prev; 5998c2ecf20Sopenharmony_ci } 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci /* Insert to the rb-tree */ 6028c2ecf20Sopenharmony_ci rb_link_node(&va->rb_node, parent, link); 6038c2ecf20Sopenharmony_ci if (root == &free_vmap_area_root) { 6048c2ecf20Sopenharmony_ci /* 6058c2ecf20Sopenharmony_ci * Some explanation here. Just perform simple insertion 6068c2ecf20Sopenharmony_ci * to the tree. We do not set va->subtree_max_size to 6078c2ecf20Sopenharmony_ci * its current size before calling rb_insert_augmented(). 6088c2ecf20Sopenharmony_ci * It is because of we populate the tree from the bottom 6098c2ecf20Sopenharmony_ci * to parent levels when the node _is_ in the tree. 6108c2ecf20Sopenharmony_ci * 6118c2ecf20Sopenharmony_ci * Therefore we set subtree_max_size to zero after insertion, 6128c2ecf20Sopenharmony_ci * to let __augment_tree_propagate_from() puts everything to 6138c2ecf20Sopenharmony_ci * the correct order later on. 6148c2ecf20Sopenharmony_ci */ 6158c2ecf20Sopenharmony_ci rb_insert_augmented(&va->rb_node, 6168c2ecf20Sopenharmony_ci root, &free_vmap_area_rb_augment_cb); 6178c2ecf20Sopenharmony_ci va->subtree_max_size = 0; 6188c2ecf20Sopenharmony_ci } else { 6198c2ecf20Sopenharmony_ci rb_insert_color(&va->rb_node, root); 6208c2ecf20Sopenharmony_ci } 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci /* Address-sort this list */ 6238c2ecf20Sopenharmony_ci list_add(&va->list, head); 6248c2ecf20Sopenharmony_ci} 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_cistatic __always_inline void 6278c2ecf20Sopenharmony_ciunlink_va(struct vmap_area *va, struct rb_root *root) 6288c2ecf20Sopenharmony_ci{ 6298c2ecf20Sopenharmony_ci if (WARN_ON(RB_EMPTY_NODE(&va->rb_node))) 6308c2ecf20Sopenharmony_ci return; 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci if (root == &free_vmap_area_root) 6338c2ecf20Sopenharmony_ci rb_erase_augmented(&va->rb_node, 6348c2ecf20Sopenharmony_ci root, &free_vmap_area_rb_augment_cb); 6358c2ecf20Sopenharmony_ci else 6368c2ecf20Sopenharmony_ci rb_erase(&va->rb_node, root); 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci list_del(&va->list); 6398c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&va->rb_node); 6408c2ecf20Sopenharmony_ci} 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci#if DEBUG_AUGMENT_PROPAGATE_CHECK 6438c2ecf20Sopenharmony_cistatic void 6448c2ecf20Sopenharmony_ciaugment_tree_propagate_check(void) 6458c2ecf20Sopenharmony_ci{ 6468c2ecf20Sopenharmony_ci struct vmap_area *va; 6478c2ecf20Sopenharmony_ci unsigned long computed_size; 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci list_for_each_entry(va, &free_vmap_area_list, list) { 6508c2ecf20Sopenharmony_ci computed_size = compute_subtree_max_size(va); 6518c2ecf20Sopenharmony_ci if (computed_size != va->subtree_max_size) 6528c2ecf20Sopenharmony_ci pr_emerg("tree is corrupted: %lu, %lu\n", 6538c2ecf20Sopenharmony_ci va_size(va), va->subtree_max_size); 6548c2ecf20Sopenharmony_ci } 6558c2ecf20Sopenharmony_ci} 6568c2ecf20Sopenharmony_ci#endif 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci/* 6598c2ecf20Sopenharmony_ci * This function populates subtree_max_size from bottom to upper 6608c2ecf20Sopenharmony_ci * levels starting from VA point. The propagation must be done 6618c2ecf20Sopenharmony_ci * when VA size is modified by changing its va_start/va_end. Or 6628c2ecf20Sopenharmony_ci * in case of newly inserting of VA to the tree. 6638c2ecf20Sopenharmony_ci * 6648c2ecf20Sopenharmony_ci * It means that __augment_tree_propagate_from() must be called: 6658c2ecf20Sopenharmony_ci * - After VA has been inserted to the tree(free path); 6668c2ecf20Sopenharmony_ci * - After VA has been shrunk(allocation path); 6678c2ecf20Sopenharmony_ci * - After VA has been increased(merging path). 6688c2ecf20Sopenharmony_ci * 6698c2ecf20Sopenharmony_ci * Please note that, it does not mean that upper parent nodes 6708c2ecf20Sopenharmony_ci * and their subtree_max_size are recalculated all the time up 6718c2ecf20Sopenharmony_ci * to the root node. 6728c2ecf20Sopenharmony_ci * 6738c2ecf20Sopenharmony_ci * 4--8 6748c2ecf20Sopenharmony_ci * /\ 6758c2ecf20Sopenharmony_ci * / \ 6768c2ecf20Sopenharmony_ci * / \ 6778c2ecf20Sopenharmony_ci * 2--2 8--8 6788c2ecf20Sopenharmony_ci * 6798c2ecf20Sopenharmony_ci * For example if we modify the node 4, shrinking it to 2, then 6808c2ecf20Sopenharmony_ci * no any modification is required. If we shrink the node 2 to 1 6818c2ecf20Sopenharmony_ci * its subtree_max_size is updated only, and set to 1. If we shrink 6828c2ecf20Sopenharmony_ci * the node 8 to 6, then its subtree_max_size is set to 6 and parent 6838c2ecf20Sopenharmony_ci * node becomes 4--6. 6848c2ecf20Sopenharmony_ci */ 6858c2ecf20Sopenharmony_cistatic __always_inline void 6868c2ecf20Sopenharmony_ciaugment_tree_propagate_from(struct vmap_area *va) 6878c2ecf20Sopenharmony_ci{ 6888c2ecf20Sopenharmony_ci /* 6898c2ecf20Sopenharmony_ci * Populate the tree from bottom towards the root until 6908c2ecf20Sopenharmony_ci * the calculated maximum available size of checked node 6918c2ecf20Sopenharmony_ci * is equal to its current one. 6928c2ecf20Sopenharmony_ci */ 6938c2ecf20Sopenharmony_ci free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL); 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_ci#if DEBUG_AUGMENT_PROPAGATE_CHECK 6968c2ecf20Sopenharmony_ci augment_tree_propagate_check(); 6978c2ecf20Sopenharmony_ci#endif 6988c2ecf20Sopenharmony_ci} 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_cistatic void 7018c2ecf20Sopenharmony_ciinsert_vmap_area(struct vmap_area *va, 7028c2ecf20Sopenharmony_ci struct rb_root *root, struct list_head *head) 7038c2ecf20Sopenharmony_ci{ 7048c2ecf20Sopenharmony_ci struct rb_node **link; 7058c2ecf20Sopenharmony_ci struct rb_node *parent; 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci link = find_va_links(va, root, NULL, &parent); 7088c2ecf20Sopenharmony_ci if (link) 7098c2ecf20Sopenharmony_ci link_va(va, root, parent, link, head); 7108c2ecf20Sopenharmony_ci} 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_cistatic void 7138c2ecf20Sopenharmony_ciinsert_vmap_area_augment(struct vmap_area *va, 7148c2ecf20Sopenharmony_ci struct rb_node *from, struct rb_root *root, 7158c2ecf20Sopenharmony_ci struct list_head *head) 7168c2ecf20Sopenharmony_ci{ 7178c2ecf20Sopenharmony_ci struct rb_node **link; 7188c2ecf20Sopenharmony_ci struct rb_node *parent; 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_ci if (from) 7218c2ecf20Sopenharmony_ci link = find_va_links(va, NULL, from, &parent); 7228c2ecf20Sopenharmony_ci else 7238c2ecf20Sopenharmony_ci link = find_va_links(va, root, NULL, &parent); 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci if (link) { 7268c2ecf20Sopenharmony_ci link_va(va, root, parent, link, head); 7278c2ecf20Sopenharmony_ci augment_tree_propagate_from(va); 7288c2ecf20Sopenharmony_ci } 7298c2ecf20Sopenharmony_ci} 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_ci/* 7328c2ecf20Sopenharmony_ci * Merge de-allocated chunk of VA memory with previous 7338c2ecf20Sopenharmony_ci * and next free blocks. If coalesce is not done a new 7348c2ecf20Sopenharmony_ci * free area is inserted. If VA has been merged, it is 7358c2ecf20Sopenharmony_ci * freed. 7368c2ecf20Sopenharmony_ci * 7378c2ecf20Sopenharmony_ci * Please note, it can return NULL in case of overlap 7388c2ecf20Sopenharmony_ci * ranges, followed by WARN() report. Despite it is a 7398c2ecf20Sopenharmony_ci * buggy behaviour, a system can be alive and keep 7408c2ecf20Sopenharmony_ci * ongoing. 7418c2ecf20Sopenharmony_ci */ 7428c2ecf20Sopenharmony_cistatic __always_inline struct vmap_area * 7438c2ecf20Sopenharmony_cimerge_or_add_vmap_area(struct vmap_area *va, 7448c2ecf20Sopenharmony_ci struct rb_root *root, struct list_head *head) 7458c2ecf20Sopenharmony_ci{ 7468c2ecf20Sopenharmony_ci struct vmap_area *sibling; 7478c2ecf20Sopenharmony_ci struct list_head *next; 7488c2ecf20Sopenharmony_ci struct rb_node **link; 7498c2ecf20Sopenharmony_ci struct rb_node *parent; 7508c2ecf20Sopenharmony_ci bool merged = false; 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci /* 7538c2ecf20Sopenharmony_ci * Find a place in the tree where VA potentially will be 7548c2ecf20Sopenharmony_ci * inserted, unless it is merged with its sibling/siblings. 7558c2ecf20Sopenharmony_ci */ 7568c2ecf20Sopenharmony_ci link = find_va_links(va, root, NULL, &parent); 7578c2ecf20Sopenharmony_ci if (!link) 7588c2ecf20Sopenharmony_ci return NULL; 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci /* 7618c2ecf20Sopenharmony_ci * Get next node of VA to check if merging can be done. 7628c2ecf20Sopenharmony_ci */ 7638c2ecf20Sopenharmony_ci next = get_va_next_sibling(parent, link); 7648c2ecf20Sopenharmony_ci if (unlikely(next == NULL)) 7658c2ecf20Sopenharmony_ci goto insert; 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci /* 7688c2ecf20Sopenharmony_ci * start end 7698c2ecf20Sopenharmony_ci * | | 7708c2ecf20Sopenharmony_ci * |<------VA------>|<-----Next----->| 7718c2ecf20Sopenharmony_ci * | | 7728c2ecf20Sopenharmony_ci * start end 7738c2ecf20Sopenharmony_ci */ 7748c2ecf20Sopenharmony_ci if (next != head) { 7758c2ecf20Sopenharmony_ci sibling = list_entry(next, struct vmap_area, list); 7768c2ecf20Sopenharmony_ci if (sibling->va_start == va->va_end) { 7778c2ecf20Sopenharmony_ci sibling->va_start = va->va_start; 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci /* Free vmap_area object. */ 7808c2ecf20Sopenharmony_ci kmem_cache_free(vmap_area_cachep, va); 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_ci /* Point to the new merged area. */ 7838c2ecf20Sopenharmony_ci va = sibling; 7848c2ecf20Sopenharmony_ci merged = true; 7858c2ecf20Sopenharmony_ci } 7868c2ecf20Sopenharmony_ci } 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci /* 7898c2ecf20Sopenharmony_ci * start end 7908c2ecf20Sopenharmony_ci * | | 7918c2ecf20Sopenharmony_ci * |<-----Prev----->|<------VA------>| 7928c2ecf20Sopenharmony_ci * | | 7938c2ecf20Sopenharmony_ci * start end 7948c2ecf20Sopenharmony_ci */ 7958c2ecf20Sopenharmony_ci if (next->prev != head) { 7968c2ecf20Sopenharmony_ci sibling = list_entry(next->prev, struct vmap_area, list); 7978c2ecf20Sopenharmony_ci if (sibling->va_end == va->va_start) { 7988c2ecf20Sopenharmony_ci /* 7998c2ecf20Sopenharmony_ci * If both neighbors are coalesced, it is important 8008c2ecf20Sopenharmony_ci * to unlink the "next" node first, followed by merging 8018c2ecf20Sopenharmony_ci * with "previous" one. Otherwise the tree might not be 8028c2ecf20Sopenharmony_ci * fully populated if a sibling's augmented value is 8038c2ecf20Sopenharmony_ci * "normalized" because of rotation operations. 8048c2ecf20Sopenharmony_ci */ 8058c2ecf20Sopenharmony_ci if (merged) 8068c2ecf20Sopenharmony_ci unlink_va(va, root); 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci sibling->va_end = va->va_end; 8098c2ecf20Sopenharmony_ci 8108c2ecf20Sopenharmony_ci /* Free vmap_area object. */ 8118c2ecf20Sopenharmony_ci kmem_cache_free(vmap_area_cachep, va); 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_ci /* Point to the new merged area. */ 8148c2ecf20Sopenharmony_ci va = sibling; 8158c2ecf20Sopenharmony_ci merged = true; 8168c2ecf20Sopenharmony_ci } 8178c2ecf20Sopenharmony_ci } 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_ciinsert: 8208c2ecf20Sopenharmony_ci if (!merged) 8218c2ecf20Sopenharmony_ci link_va(va, root, parent, link, head); 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci /* 8248c2ecf20Sopenharmony_ci * Last step is to check and update the tree. 8258c2ecf20Sopenharmony_ci */ 8268c2ecf20Sopenharmony_ci augment_tree_propagate_from(va); 8278c2ecf20Sopenharmony_ci return va; 8288c2ecf20Sopenharmony_ci} 8298c2ecf20Sopenharmony_ci 8308c2ecf20Sopenharmony_cistatic __always_inline bool 8318c2ecf20Sopenharmony_ciis_within_this_va(struct vmap_area *va, unsigned long size, 8328c2ecf20Sopenharmony_ci unsigned long align, unsigned long vstart) 8338c2ecf20Sopenharmony_ci{ 8348c2ecf20Sopenharmony_ci unsigned long nva_start_addr; 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci if (va->va_start > vstart) 8378c2ecf20Sopenharmony_ci nva_start_addr = ALIGN(va->va_start, align); 8388c2ecf20Sopenharmony_ci else 8398c2ecf20Sopenharmony_ci nva_start_addr = ALIGN(vstart, align); 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci /* Can be overflowed due to big size or alignment. */ 8428c2ecf20Sopenharmony_ci if (nva_start_addr + size < nva_start_addr || 8438c2ecf20Sopenharmony_ci nva_start_addr < vstart) 8448c2ecf20Sopenharmony_ci return false; 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci return (nva_start_addr + size <= va->va_end); 8478c2ecf20Sopenharmony_ci} 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ci/* 8508c2ecf20Sopenharmony_ci * Find the first free block(lowest start address) in the tree, 8518c2ecf20Sopenharmony_ci * that will accomplish the request corresponding to passing 8528c2ecf20Sopenharmony_ci * parameters. 8538c2ecf20Sopenharmony_ci */ 8548c2ecf20Sopenharmony_cistatic __always_inline struct vmap_area * 8558c2ecf20Sopenharmony_cifind_vmap_lowest_match(unsigned long size, 8568c2ecf20Sopenharmony_ci unsigned long align, unsigned long vstart) 8578c2ecf20Sopenharmony_ci{ 8588c2ecf20Sopenharmony_ci struct vmap_area *va; 8598c2ecf20Sopenharmony_ci struct rb_node *node; 8608c2ecf20Sopenharmony_ci unsigned long length; 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_ci /* Start from the root. */ 8638c2ecf20Sopenharmony_ci node = free_vmap_area_root.rb_node; 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_ci /* Adjust the search size for alignment overhead. */ 8668c2ecf20Sopenharmony_ci length = size + align - 1; 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci while (node) { 8698c2ecf20Sopenharmony_ci va = rb_entry(node, struct vmap_area, rb_node); 8708c2ecf20Sopenharmony_ci 8718c2ecf20Sopenharmony_ci if (get_subtree_max_size(node->rb_left) >= length && 8728c2ecf20Sopenharmony_ci vstart < va->va_start) { 8738c2ecf20Sopenharmony_ci node = node->rb_left; 8748c2ecf20Sopenharmony_ci } else { 8758c2ecf20Sopenharmony_ci if (is_within_this_va(va, size, align, vstart)) 8768c2ecf20Sopenharmony_ci return va; 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci /* 8798c2ecf20Sopenharmony_ci * Does not make sense to go deeper towards the right 8808c2ecf20Sopenharmony_ci * sub-tree if it does not have a free block that is 8818c2ecf20Sopenharmony_ci * equal or bigger to the requested search length. 8828c2ecf20Sopenharmony_ci */ 8838c2ecf20Sopenharmony_ci if (get_subtree_max_size(node->rb_right) >= length) { 8848c2ecf20Sopenharmony_ci node = node->rb_right; 8858c2ecf20Sopenharmony_ci continue; 8868c2ecf20Sopenharmony_ci } 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci /* 8898c2ecf20Sopenharmony_ci * OK. We roll back and find the first right sub-tree, 8908c2ecf20Sopenharmony_ci * that will satisfy the search criteria. It can happen 8918c2ecf20Sopenharmony_ci * only once due to "vstart" restriction. 8928c2ecf20Sopenharmony_ci */ 8938c2ecf20Sopenharmony_ci while ((node = rb_parent(node))) { 8948c2ecf20Sopenharmony_ci va = rb_entry(node, struct vmap_area, rb_node); 8958c2ecf20Sopenharmony_ci if (is_within_this_va(va, size, align, vstart)) 8968c2ecf20Sopenharmony_ci return va; 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci if (get_subtree_max_size(node->rb_right) >= length && 8998c2ecf20Sopenharmony_ci vstart <= va->va_start) { 9008c2ecf20Sopenharmony_ci node = node->rb_right; 9018c2ecf20Sopenharmony_ci break; 9028c2ecf20Sopenharmony_ci } 9038c2ecf20Sopenharmony_ci } 9048c2ecf20Sopenharmony_ci } 9058c2ecf20Sopenharmony_ci } 9068c2ecf20Sopenharmony_ci 9078c2ecf20Sopenharmony_ci return NULL; 9088c2ecf20Sopenharmony_ci} 9098c2ecf20Sopenharmony_ci 9108c2ecf20Sopenharmony_ci#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK 9118c2ecf20Sopenharmony_ci#include <linux/random.h> 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_cistatic struct vmap_area * 9148c2ecf20Sopenharmony_cifind_vmap_lowest_linear_match(unsigned long size, 9158c2ecf20Sopenharmony_ci unsigned long align, unsigned long vstart) 9168c2ecf20Sopenharmony_ci{ 9178c2ecf20Sopenharmony_ci struct vmap_area *va; 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci list_for_each_entry(va, &free_vmap_area_list, list) { 9208c2ecf20Sopenharmony_ci if (!is_within_this_va(va, size, align, vstart)) 9218c2ecf20Sopenharmony_ci continue; 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci return va; 9248c2ecf20Sopenharmony_ci } 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci return NULL; 9278c2ecf20Sopenharmony_ci} 9288c2ecf20Sopenharmony_ci 9298c2ecf20Sopenharmony_cistatic void 9308c2ecf20Sopenharmony_cifind_vmap_lowest_match_check(unsigned long size) 9318c2ecf20Sopenharmony_ci{ 9328c2ecf20Sopenharmony_ci struct vmap_area *va_1, *va_2; 9338c2ecf20Sopenharmony_ci unsigned long vstart; 9348c2ecf20Sopenharmony_ci unsigned int rnd; 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ci get_random_bytes(&rnd, sizeof(rnd)); 9378c2ecf20Sopenharmony_ci vstart = VMALLOC_START + rnd; 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci va_1 = find_vmap_lowest_match(size, 1, vstart); 9408c2ecf20Sopenharmony_ci va_2 = find_vmap_lowest_linear_match(size, 1, vstart); 9418c2ecf20Sopenharmony_ci 9428c2ecf20Sopenharmony_ci if (va_1 != va_2) 9438c2ecf20Sopenharmony_ci pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n", 9448c2ecf20Sopenharmony_ci va_1, va_2, vstart); 9458c2ecf20Sopenharmony_ci} 9468c2ecf20Sopenharmony_ci#endif 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_cienum fit_type { 9498c2ecf20Sopenharmony_ci NOTHING_FIT = 0, 9508c2ecf20Sopenharmony_ci FL_FIT_TYPE = 1, /* full fit */ 9518c2ecf20Sopenharmony_ci LE_FIT_TYPE = 2, /* left edge fit */ 9528c2ecf20Sopenharmony_ci RE_FIT_TYPE = 3, /* right edge fit */ 9538c2ecf20Sopenharmony_ci NE_FIT_TYPE = 4 /* no edge fit */ 9548c2ecf20Sopenharmony_ci}; 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_cistatic __always_inline enum fit_type 9578c2ecf20Sopenharmony_ciclassify_va_fit_type(struct vmap_area *va, 9588c2ecf20Sopenharmony_ci unsigned long nva_start_addr, unsigned long size) 9598c2ecf20Sopenharmony_ci{ 9608c2ecf20Sopenharmony_ci enum fit_type type; 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci /* Check if it is within VA. */ 9638c2ecf20Sopenharmony_ci if (nva_start_addr < va->va_start || 9648c2ecf20Sopenharmony_ci nva_start_addr + size > va->va_end) 9658c2ecf20Sopenharmony_ci return NOTHING_FIT; 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_ci /* Now classify. */ 9688c2ecf20Sopenharmony_ci if (va->va_start == nva_start_addr) { 9698c2ecf20Sopenharmony_ci if (va->va_end == nva_start_addr + size) 9708c2ecf20Sopenharmony_ci type = FL_FIT_TYPE; 9718c2ecf20Sopenharmony_ci else 9728c2ecf20Sopenharmony_ci type = LE_FIT_TYPE; 9738c2ecf20Sopenharmony_ci } else if (va->va_end == nva_start_addr + size) { 9748c2ecf20Sopenharmony_ci type = RE_FIT_TYPE; 9758c2ecf20Sopenharmony_ci } else { 9768c2ecf20Sopenharmony_ci type = NE_FIT_TYPE; 9778c2ecf20Sopenharmony_ci } 9788c2ecf20Sopenharmony_ci 9798c2ecf20Sopenharmony_ci return type; 9808c2ecf20Sopenharmony_ci} 9818c2ecf20Sopenharmony_ci 9828c2ecf20Sopenharmony_cistatic __always_inline int 9838c2ecf20Sopenharmony_ciadjust_va_to_fit_type(struct vmap_area *va, 9848c2ecf20Sopenharmony_ci unsigned long nva_start_addr, unsigned long size, 9858c2ecf20Sopenharmony_ci enum fit_type type) 9868c2ecf20Sopenharmony_ci{ 9878c2ecf20Sopenharmony_ci struct vmap_area *lva = NULL; 9888c2ecf20Sopenharmony_ci 9898c2ecf20Sopenharmony_ci if (type == FL_FIT_TYPE) { 9908c2ecf20Sopenharmony_ci /* 9918c2ecf20Sopenharmony_ci * No need to split VA, it fully fits. 9928c2ecf20Sopenharmony_ci * 9938c2ecf20Sopenharmony_ci * | | 9948c2ecf20Sopenharmony_ci * V NVA V 9958c2ecf20Sopenharmony_ci * |---------------| 9968c2ecf20Sopenharmony_ci */ 9978c2ecf20Sopenharmony_ci unlink_va(va, &free_vmap_area_root); 9988c2ecf20Sopenharmony_ci kmem_cache_free(vmap_area_cachep, va); 9998c2ecf20Sopenharmony_ci } else if (type == LE_FIT_TYPE) { 10008c2ecf20Sopenharmony_ci /* 10018c2ecf20Sopenharmony_ci * Split left edge of fit VA. 10028c2ecf20Sopenharmony_ci * 10038c2ecf20Sopenharmony_ci * | | 10048c2ecf20Sopenharmony_ci * V NVA V R 10058c2ecf20Sopenharmony_ci * |-------|-------| 10068c2ecf20Sopenharmony_ci */ 10078c2ecf20Sopenharmony_ci va->va_start += size; 10088c2ecf20Sopenharmony_ci } else if (type == RE_FIT_TYPE) { 10098c2ecf20Sopenharmony_ci /* 10108c2ecf20Sopenharmony_ci * Split right edge of fit VA. 10118c2ecf20Sopenharmony_ci * 10128c2ecf20Sopenharmony_ci * | | 10138c2ecf20Sopenharmony_ci * L V NVA V 10148c2ecf20Sopenharmony_ci * |-------|-------| 10158c2ecf20Sopenharmony_ci */ 10168c2ecf20Sopenharmony_ci va->va_end = nva_start_addr; 10178c2ecf20Sopenharmony_ci } else if (type == NE_FIT_TYPE) { 10188c2ecf20Sopenharmony_ci /* 10198c2ecf20Sopenharmony_ci * Split no edge of fit VA. 10208c2ecf20Sopenharmony_ci * 10218c2ecf20Sopenharmony_ci * | | 10228c2ecf20Sopenharmony_ci * L V NVA V R 10238c2ecf20Sopenharmony_ci * |---|-------|---| 10248c2ecf20Sopenharmony_ci */ 10258c2ecf20Sopenharmony_ci lva = __this_cpu_xchg(ne_fit_preload_node, NULL); 10268c2ecf20Sopenharmony_ci if (unlikely(!lva)) { 10278c2ecf20Sopenharmony_ci /* 10288c2ecf20Sopenharmony_ci * For percpu allocator we do not do any pre-allocation 10298c2ecf20Sopenharmony_ci * and leave it as it is. The reason is it most likely 10308c2ecf20Sopenharmony_ci * never ends up with NE_FIT_TYPE splitting. In case of 10318c2ecf20Sopenharmony_ci * percpu allocations offsets and sizes are aligned to 10328c2ecf20Sopenharmony_ci * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE 10338c2ecf20Sopenharmony_ci * are its main fitting cases. 10348c2ecf20Sopenharmony_ci * 10358c2ecf20Sopenharmony_ci * There are a few exceptions though, as an example it is 10368c2ecf20Sopenharmony_ci * a first allocation (early boot up) when we have "one" 10378c2ecf20Sopenharmony_ci * big free space that has to be split. 10388c2ecf20Sopenharmony_ci * 10398c2ecf20Sopenharmony_ci * Also we can hit this path in case of regular "vmap" 10408c2ecf20Sopenharmony_ci * allocations, if "this" current CPU was not preloaded. 10418c2ecf20Sopenharmony_ci * See the comment in alloc_vmap_area() why. If so, then 10428c2ecf20Sopenharmony_ci * GFP_NOWAIT is used instead to get an extra object for 10438c2ecf20Sopenharmony_ci * split purpose. That is rare and most time does not 10448c2ecf20Sopenharmony_ci * occur. 10458c2ecf20Sopenharmony_ci * 10468c2ecf20Sopenharmony_ci * What happens if an allocation gets failed. Basically, 10478c2ecf20Sopenharmony_ci * an "overflow" path is triggered to purge lazily freed 10488c2ecf20Sopenharmony_ci * areas to free some memory, then, the "retry" path is 10498c2ecf20Sopenharmony_ci * triggered to repeat one more time. See more details 10508c2ecf20Sopenharmony_ci * in alloc_vmap_area() function. 10518c2ecf20Sopenharmony_ci */ 10528c2ecf20Sopenharmony_ci lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT); 10538c2ecf20Sopenharmony_ci if (!lva) 10548c2ecf20Sopenharmony_ci return -1; 10558c2ecf20Sopenharmony_ci } 10568c2ecf20Sopenharmony_ci 10578c2ecf20Sopenharmony_ci /* 10588c2ecf20Sopenharmony_ci * Build the remainder. 10598c2ecf20Sopenharmony_ci */ 10608c2ecf20Sopenharmony_ci lva->va_start = va->va_start; 10618c2ecf20Sopenharmony_ci lva->va_end = nva_start_addr; 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci /* 10648c2ecf20Sopenharmony_ci * Shrink this VA to remaining size. 10658c2ecf20Sopenharmony_ci */ 10668c2ecf20Sopenharmony_ci va->va_start = nva_start_addr + size; 10678c2ecf20Sopenharmony_ci } else { 10688c2ecf20Sopenharmony_ci return -1; 10698c2ecf20Sopenharmony_ci } 10708c2ecf20Sopenharmony_ci 10718c2ecf20Sopenharmony_ci if (type != FL_FIT_TYPE) { 10728c2ecf20Sopenharmony_ci augment_tree_propagate_from(va); 10738c2ecf20Sopenharmony_ci 10748c2ecf20Sopenharmony_ci if (lva) /* type == NE_FIT_TYPE */ 10758c2ecf20Sopenharmony_ci insert_vmap_area_augment(lva, &va->rb_node, 10768c2ecf20Sopenharmony_ci &free_vmap_area_root, &free_vmap_area_list); 10778c2ecf20Sopenharmony_ci } 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci return 0; 10808c2ecf20Sopenharmony_ci} 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci/* 10838c2ecf20Sopenharmony_ci * Returns a start address of the newly allocated area, if success. 10848c2ecf20Sopenharmony_ci * Otherwise a vend is returned that indicates failure. 10858c2ecf20Sopenharmony_ci */ 10868c2ecf20Sopenharmony_cistatic __always_inline unsigned long 10878c2ecf20Sopenharmony_ci__alloc_vmap_area(unsigned long size, unsigned long align, 10888c2ecf20Sopenharmony_ci unsigned long vstart, unsigned long vend) 10898c2ecf20Sopenharmony_ci{ 10908c2ecf20Sopenharmony_ci unsigned long nva_start_addr; 10918c2ecf20Sopenharmony_ci struct vmap_area *va; 10928c2ecf20Sopenharmony_ci enum fit_type type; 10938c2ecf20Sopenharmony_ci int ret; 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci va = find_vmap_lowest_match(size, align, vstart); 10968c2ecf20Sopenharmony_ci if (unlikely(!va)) 10978c2ecf20Sopenharmony_ci return vend; 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci if (va->va_start > vstart) 11008c2ecf20Sopenharmony_ci nva_start_addr = ALIGN(va->va_start, align); 11018c2ecf20Sopenharmony_ci else 11028c2ecf20Sopenharmony_ci nva_start_addr = ALIGN(vstart, align); 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci /* Check the "vend" restriction. */ 11058c2ecf20Sopenharmony_ci if (nva_start_addr + size > vend) 11068c2ecf20Sopenharmony_ci return vend; 11078c2ecf20Sopenharmony_ci 11088c2ecf20Sopenharmony_ci /* Classify what we have found. */ 11098c2ecf20Sopenharmony_ci type = classify_va_fit_type(va, nva_start_addr, size); 11108c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(type == NOTHING_FIT)) 11118c2ecf20Sopenharmony_ci return vend; 11128c2ecf20Sopenharmony_ci 11138c2ecf20Sopenharmony_ci /* Update the free vmap_area. */ 11148c2ecf20Sopenharmony_ci ret = adjust_va_to_fit_type(va, nva_start_addr, size, type); 11158c2ecf20Sopenharmony_ci if (ret) 11168c2ecf20Sopenharmony_ci return vend; 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ci#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK 11198c2ecf20Sopenharmony_ci find_vmap_lowest_match_check(size); 11208c2ecf20Sopenharmony_ci#endif 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci return nva_start_addr; 11238c2ecf20Sopenharmony_ci} 11248c2ecf20Sopenharmony_ci 11258c2ecf20Sopenharmony_ci/* 11268c2ecf20Sopenharmony_ci * Free a region of KVA allocated by alloc_vmap_area 11278c2ecf20Sopenharmony_ci */ 11288c2ecf20Sopenharmony_cistatic void free_vmap_area(struct vmap_area *va) 11298c2ecf20Sopenharmony_ci{ 11308c2ecf20Sopenharmony_ci /* 11318c2ecf20Sopenharmony_ci * Remove from the busy tree/list. 11328c2ecf20Sopenharmony_ci */ 11338c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 11348c2ecf20Sopenharmony_ci unlink_va(va, &vmap_area_root); 11358c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci /* 11388c2ecf20Sopenharmony_ci * Insert/Merge it back to the free tree/list. 11398c2ecf20Sopenharmony_ci */ 11408c2ecf20Sopenharmony_ci spin_lock(&free_vmap_area_lock); 11418c2ecf20Sopenharmony_ci merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list); 11428c2ecf20Sopenharmony_ci spin_unlock(&free_vmap_area_lock); 11438c2ecf20Sopenharmony_ci} 11448c2ecf20Sopenharmony_ci 11458c2ecf20Sopenharmony_ci/* 11468c2ecf20Sopenharmony_ci * Allocate a region of KVA of the specified size and alignment, within the 11478c2ecf20Sopenharmony_ci * vstart and vend. 11488c2ecf20Sopenharmony_ci */ 11498c2ecf20Sopenharmony_cistatic struct vmap_area *alloc_vmap_area(unsigned long size, 11508c2ecf20Sopenharmony_ci unsigned long align, 11518c2ecf20Sopenharmony_ci unsigned long vstart, unsigned long vend, 11528c2ecf20Sopenharmony_ci int node, gfp_t gfp_mask) 11538c2ecf20Sopenharmony_ci{ 11548c2ecf20Sopenharmony_ci struct vmap_area *va, *pva; 11558c2ecf20Sopenharmony_ci unsigned long addr; 11568c2ecf20Sopenharmony_ci int purged = 0; 11578c2ecf20Sopenharmony_ci int ret; 11588c2ecf20Sopenharmony_ci 11598c2ecf20Sopenharmony_ci BUG_ON(!size); 11608c2ecf20Sopenharmony_ci BUG_ON(offset_in_page(size)); 11618c2ecf20Sopenharmony_ci BUG_ON(!is_power_of_2(align)); 11628c2ecf20Sopenharmony_ci 11638c2ecf20Sopenharmony_ci if (unlikely(!vmap_initialized)) 11648c2ecf20Sopenharmony_ci return ERR_PTR(-EBUSY); 11658c2ecf20Sopenharmony_ci 11668c2ecf20Sopenharmony_ci might_sleep(); 11678c2ecf20Sopenharmony_ci gfp_mask = gfp_mask & GFP_RECLAIM_MASK; 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node); 11708c2ecf20Sopenharmony_ci if (unlikely(!va)) 11718c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 11728c2ecf20Sopenharmony_ci 11738c2ecf20Sopenharmony_ci /* 11748c2ecf20Sopenharmony_ci * Only scan the relevant parts containing pointers to other objects 11758c2ecf20Sopenharmony_ci * to avoid false negatives. 11768c2ecf20Sopenharmony_ci */ 11778c2ecf20Sopenharmony_ci kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask); 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ciretry: 11808c2ecf20Sopenharmony_ci /* 11818c2ecf20Sopenharmony_ci * Preload this CPU with one extra vmap_area object. It is used 11828c2ecf20Sopenharmony_ci * when fit type of free area is NE_FIT_TYPE. Please note, it 11838c2ecf20Sopenharmony_ci * does not guarantee that an allocation occurs on a CPU that 11848c2ecf20Sopenharmony_ci * is preloaded, instead we minimize the case when it is not. 11858c2ecf20Sopenharmony_ci * It can happen because of cpu migration, because there is a 11868c2ecf20Sopenharmony_ci * race until the below spinlock is taken. 11878c2ecf20Sopenharmony_ci * 11888c2ecf20Sopenharmony_ci * The preload is done in non-atomic context, thus it allows us 11898c2ecf20Sopenharmony_ci * to use more permissive allocation masks to be more stable under 11908c2ecf20Sopenharmony_ci * low memory condition and high memory pressure. In rare case, 11918c2ecf20Sopenharmony_ci * if not preloaded, GFP_NOWAIT is used. 11928c2ecf20Sopenharmony_ci * 11938c2ecf20Sopenharmony_ci * Set "pva" to NULL here, because of "retry" path. 11948c2ecf20Sopenharmony_ci */ 11958c2ecf20Sopenharmony_ci pva = NULL; 11968c2ecf20Sopenharmony_ci 11978c2ecf20Sopenharmony_ci if (!this_cpu_read(ne_fit_preload_node)) 11988c2ecf20Sopenharmony_ci /* 11998c2ecf20Sopenharmony_ci * Even if it fails we do not really care about that. 12008c2ecf20Sopenharmony_ci * Just proceed as it is. If needed "overflow" path 12018c2ecf20Sopenharmony_ci * will refill the cache we allocate from. 12028c2ecf20Sopenharmony_ci */ 12038c2ecf20Sopenharmony_ci pva = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node); 12048c2ecf20Sopenharmony_ci 12058c2ecf20Sopenharmony_ci spin_lock(&free_vmap_area_lock); 12068c2ecf20Sopenharmony_ci 12078c2ecf20Sopenharmony_ci if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) 12088c2ecf20Sopenharmony_ci kmem_cache_free(vmap_area_cachep, pva); 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci /* 12118c2ecf20Sopenharmony_ci * If an allocation fails, the "vend" address is 12128c2ecf20Sopenharmony_ci * returned. Therefore trigger the overflow path. 12138c2ecf20Sopenharmony_ci */ 12148c2ecf20Sopenharmony_ci addr = __alloc_vmap_area(size, align, vstart, vend); 12158c2ecf20Sopenharmony_ci spin_unlock(&free_vmap_area_lock); 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ci if (unlikely(addr == vend)) 12188c2ecf20Sopenharmony_ci goto overflow; 12198c2ecf20Sopenharmony_ci 12208c2ecf20Sopenharmony_ci va->va_start = addr; 12218c2ecf20Sopenharmony_ci va->va_end = addr + size; 12228c2ecf20Sopenharmony_ci va->vm = NULL; 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_ci 12258c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 12268c2ecf20Sopenharmony_ci insert_vmap_area(va, &vmap_area_root, &vmap_area_list); 12278c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 12288c2ecf20Sopenharmony_ci 12298c2ecf20Sopenharmony_ci BUG_ON(!IS_ALIGNED(va->va_start, align)); 12308c2ecf20Sopenharmony_ci BUG_ON(va->va_start < vstart); 12318c2ecf20Sopenharmony_ci BUG_ON(va->va_end > vend); 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_ci ret = kasan_populate_vmalloc(addr, size); 12348c2ecf20Sopenharmony_ci if (ret) { 12358c2ecf20Sopenharmony_ci free_vmap_area(va); 12368c2ecf20Sopenharmony_ci return ERR_PTR(ret); 12378c2ecf20Sopenharmony_ci } 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci return va; 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_cioverflow: 12428c2ecf20Sopenharmony_ci if (!purged) { 12438c2ecf20Sopenharmony_ci purge_vmap_area_lazy(); 12448c2ecf20Sopenharmony_ci purged = 1; 12458c2ecf20Sopenharmony_ci goto retry; 12468c2ecf20Sopenharmony_ci } 12478c2ecf20Sopenharmony_ci 12488c2ecf20Sopenharmony_ci if (gfpflags_allow_blocking(gfp_mask)) { 12498c2ecf20Sopenharmony_ci unsigned long freed = 0; 12508c2ecf20Sopenharmony_ci blocking_notifier_call_chain(&vmap_notify_list, 0, &freed); 12518c2ecf20Sopenharmony_ci if (freed > 0) { 12528c2ecf20Sopenharmony_ci purged = 0; 12538c2ecf20Sopenharmony_ci goto retry; 12548c2ecf20Sopenharmony_ci } 12558c2ecf20Sopenharmony_ci } 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_ci if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) 12588c2ecf20Sopenharmony_ci pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n", 12598c2ecf20Sopenharmony_ci size); 12608c2ecf20Sopenharmony_ci 12618c2ecf20Sopenharmony_ci kmem_cache_free(vmap_area_cachep, va); 12628c2ecf20Sopenharmony_ci return ERR_PTR(-EBUSY); 12638c2ecf20Sopenharmony_ci} 12648c2ecf20Sopenharmony_ci 12658c2ecf20Sopenharmony_ciint register_vmap_purge_notifier(struct notifier_block *nb) 12668c2ecf20Sopenharmony_ci{ 12678c2ecf20Sopenharmony_ci return blocking_notifier_chain_register(&vmap_notify_list, nb); 12688c2ecf20Sopenharmony_ci} 12698c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(register_vmap_purge_notifier); 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_ciint unregister_vmap_purge_notifier(struct notifier_block *nb) 12728c2ecf20Sopenharmony_ci{ 12738c2ecf20Sopenharmony_ci return blocking_notifier_chain_unregister(&vmap_notify_list, nb); 12748c2ecf20Sopenharmony_ci} 12758c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier); 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci/* 12788c2ecf20Sopenharmony_ci * lazy_max_pages is the maximum amount of virtual address space we gather up 12798c2ecf20Sopenharmony_ci * before attempting to purge with a TLB flush. 12808c2ecf20Sopenharmony_ci * 12818c2ecf20Sopenharmony_ci * There is a tradeoff here: a larger number will cover more kernel page tables 12828c2ecf20Sopenharmony_ci * and take slightly longer to purge, but it will linearly reduce the number of 12838c2ecf20Sopenharmony_ci * global TLB flushes that must be performed. It would seem natural to scale 12848c2ecf20Sopenharmony_ci * this number up linearly with the number of CPUs (because vmapping activity 12858c2ecf20Sopenharmony_ci * could also scale linearly with the number of CPUs), however it is likely 12868c2ecf20Sopenharmony_ci * that in practice, workloads might be constrained in other ways that mean 12878c2ecf20Sopenharmony_ci * vmap activity will not scale linearly with CPUs. Also, I want to be 12888c2ecf20Sopenharmony_ci * conservative and not introduce a big latency on huge systems, so go with 12898c2ecf20Sopenharmony_ci * a less aggressive log scale. It will still be an improvement over the old 12908c2ecf20Sopenharmony_ci * code, and it will be simple to change the scale factor if we find that it 12918c2ecf20Sopenharmony_ci * becomes a problem on bigger systems. 12928c2ecf20Sopenharmony_ci */ 12938c2ecf20Sopenharmony_cistatic unsigned long lazy_max_pages(void) 12948c2ecf20Sopenharmony_ci{ 12958c2ecf20Sopenharmony_ci unsigned int log; 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci log = fls(num_online_cpus()); 12988c2ecf20Sopenharmony_ci 12998c2ecf20Sopenharmony_ci return log * (32UL * 1024 * 1024 / PAGE_SIZE); 13008c2ecf20Sopenharmony_ci} 13018c2ecf20Sopenharmony_ci 13028c2ecf20Sopenharmony_cistatic atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0); 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci/* 13058c2ecf20Sopenharmony_ci * Serialize vmap purging. There is no actual criticial section protected 13068c2ecf20Sopenharmony_ci * by this look, but we want to avoid concurrent calls for performance 13078c2ecf20Sopenharmony_ci * reasons and to make the pcpu_get_vm_areas more deterministic. 13088c2ecf20Sopenharmony_ci */ 13098c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(vmap_purge_lock); 13108c2ecf20Sopenharmony_ci 13118c2ecf20Sopenharmony_ci/* for per-CPU blocks */ 13128c2ecf20Sopenharmony_cistatic void purge_fragmented_blocks_allcpus(void); 13138c2ecf20Sopenharmony_ci 13148c2ecf20Sopenharmony_ci/* 13158c2ecf20Sopenharmony_ci * called before a call to iounmap() if the caller wants vm_area_struct's 13168c2ecf20Sopenharmony_ci * immediately freed. 13178c2ecf20Sopenharmony_ci */ 13188c2ecf20Sopenharmony_civoid set_iounmap_nonlazy(void) 13198c2ecf20Sopenharmony_ci{ 13208c2ecf20Sopenharmony_ci atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1); 13218c2ecf20Sopenharmony_ci} 13228c2ecf20Sopenharmony_ci 13238c2ecf20Sopenharmony_ci/* 13248c2ecf20Sopenharmony_ci * Purges all lazily-freed vmap areas. 13258c2ecf20Sopenharmony_ci */ 13268c2ecf20Sopenharmony_cistatic bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) 13278c2ecf20Sopenharmony_ci{ 13288c2ecf20Sopenharmony_ci unsigned long resched_threshold; 13298c2ecf20Sopenharmony_ci struct llist_node *valist; 13308c2ecf20Sopenharmony_ci struct vmap_area *va; 13318c2ecf20Sopenharmony_ci struct vmap_area *n_va; 13328c2ecf20Sopenharmony_ci 13338c2ecf20Sopenharmony_ci lockdep_assert_held(&vmap_purge_lock); 13348c2ecf20Sopenharmony_ci 13358c2ecf20Sopenharmony_ci valist = llist_del_all(&vmap_purge_list); 13368c2ecf20Sopenharmony_ci if (unlikely(valist == NULL)) 13378c2ecf20Sopenharmony_ci return false; 13388c2ecf20Sopenharmony_ci 13398c2ecf20Sopenharmony_ci /* 13408c2ecf20Sopenharmony_ci * TODO: to calculate a flush range without looping. 13418c2ecf20Sopenharmony_ci * The list can be up to lazy_max_pages() elements. 13428c2ecf20Sopenharmony_ci */ 13438c2ecf20Sopenharmony_ci llist_for_each_entry(va, valist, purge_list) { 13448c2ecf20Sopenharmony_ci if (va->va_start < start) 13458c2ecf20Sopenharmony_ci start = va->va_start; 13468c2ecf20Sopenharmony_ci if (va->va_end > end) 13478c2ecf20Sopenharmony_ci end = va->va_end; 13488c2ecf20Sopenharmony_ci } 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci flush_tlb_kernel_range(start, end); 13518c2ecf20Sopenharmony_ci resched_threshold = lazy_max_pages() << 1; 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci spin_lock(&free_vmap_area_lock); 13548c2ecf20Sopenharmony_ci llist_for_each_entry_safe(va, n_va, valist, purge_list) { 13558c2ecf20Sopenharmony_ci unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT; 13568c2ecf20Sopenharmony_ci unsigned long orig_start = va->va_start; 13578c2ecf20Sopenharmony_ci unsigned long orig_end = va->va_end; 13588c2ecf20Sopenharmony_ci 13598c2ecf20Sopenharmony_ci /* 13608c2ecf20Sopenharmony_ci * Finally insert or merge lazily-freed area. It is 13618c2ecf20Sopenharmony_ci * detached and there is no need to "unlink" it from 13628c2ecf20Sopenharmony_ci * anything. 13638c2ecf20Sopenharmony_ci */ 13648c2ecf20Sopenharmony_ci va = merge_or_add_vmap_area(va, &free_vmap_area_root, 13658c2ecf20Sopenharmony_ci &free_vmap_area_list); 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci if (!va) 13688c2ecf20Sopenharmony_ci continue; 13698c2ecf20Sopenharmony_ci 13708c2ecf20Sopenharmony_ci if (is_vmalloc_or_module_addr((void *)orig_start)) 13718c2ecf20Sopenharmony_ci kasan_release_vmalloc(orig_start, orig_end, 13728c2ecf20Sopenharmony_ci va->va_start, va->va_end); 13738c2ecf20Sopenharmony_ci 13748c2ecf20Sopenharmony_ci atomic_long_sub(nr, &vmap_lazy_nr); 13758c2ecf20Sopenharmony_ci 13768c2ecf20Sopenharmony_ci if (atomic_long_read(&vmap_lazy_nr) < resched_threshold) 13778c2ecf20Sopenharmony_ci cond_resched_lock(&free_vmap_area_lock); 13788c2ecf20Sopenharmony_ci } 13798c2ecf20Sopenharmony_ci spin_unlock(&free_vmap_area_lock); 13808c2ecf20Sopenharmony_ci return true; 13818c2ecf20Sopenharmony_ci} 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci/* 13848c2ecf20Sopenharmony_ci * Kick off a purge of the outstanding lazy areas. Don't bother if somebody 13858c2ecf20Sopenharmony_ci * is already purging. 13868c2ecf20Sopenharmony_ci */ 13878c2ecf20Sopenharmony_cistatic void try_purge_vmap_area_lazy(void) 13888c2ecf20Sopenharmony_ci{ 13898c2ecf20Sopenharmony_ci if (mutex_trylock(&vmap_purge_lock)) { 13908c2ecf20Sopenharmony_ci __purge_vmap_area_lazy(ULONG_MAX, 0); 13918c2ecf20Sopenharmony_ci mutex_unlock(&vmap_purge_lock); 13928c2ecf20Sopenharmony_ci } 13938c2ecf20Sopenharmony_ci} 13948c2ecf20Sopenharmony_ci 13958c2ecf20Sopenharmony_ci/* 13968c2ecf20Sopenharmony_ci * Kick off a purge of the outstanding lazy areas. 13978c2ecf20Sopenharmony_ci */ 13988c2ecf20Sopenharmony_cistatic void purge_vmap_area_lazy(void) 13998c2ecf20Sopenharmony_ci{ 14008c2ecf20Sopenharmony_ci mutex_lock(&vmap_purge_lock); 14018c2ecf20Sopenharmony_ci purge_fragmented_blocks_allcpus(); 14028c2ecf20Sopenharmony_ci __purge_vmap_area_lazy(ULONG_MAX, 0); 14038c2ecf20Sopenharmony_ci mutex_unlock(&vmap_purge_lock); 14048c2ecf20Sopenharmony_ci} 14058c2ecf20Sopenharmony_ci 14068c2ecf20Sopenharmony_ci/* 14078c2ecf20Sopenharmony_ci * Free a vmap area, caller ensuring that the area has been unmapped 14088c2ecf20Sopenharmony_ci * and flush_cache_vunmap had been called for the correct range 14098c2ecf20Sopenharmony_ci * previously. 14108c2ecf20Sopenharmony_ci */ 14118c2ecf20Sopenharmony_cistatic void free_vmap_area_noflush(struct vmap_area *va) 14128c2ecf20Sopenharmony_ci{ 14138c2ecf20Sopenharmony_ci unsigned long nr_lazy; 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 14168c2ecf20Sopenharmony_ci unlink_va(va, &vmap_area_root); 14178c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >> 14208c2ecf20Sopenharmony_ci PAGE_SHIFT, &vmap_lazy_nr); 14218c2ecf20Sopenharmony_ci 14228c2ecf20Sopenharmony_ci /* After this point, we may free va at any time */ 14238c2ecf20Sopenharmony_ci llist_add(&va->purge_list, &vmap_purge_list); 14248c2ecf20Sopenharmony_ci 14258c2ecf20Sopenharmony_ci if (unlikely(nr_lazy > lazy_max_pages())) 14268c2ecf20Sopenharmony_ci try_purge_vmap_area_lazy(); 14278c2ecf20Sopenharmony_ci} 14288c2ecf20Sopenharmony_ci 14298c2ecf20Sopenharmony_ci/* 14308c2ecf20Sopenharmony_ci * Free and unmap a vmap area 14318c2ecf20Sopenharmony_ci */ 14328c2ecf20Sopenharmony_cistatic void free_unmap_vmap_area(struct vmap_area *va) 14338c2ecf20Sopenharmony_ci{ 14348c2ecf20Sopenharmony_ci flush_cache_vunmap(va->va_start, va->va_end); 14358c2ecf20Sopenharmony_ci unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start); 14368c2ecf20Sopenharmony_ci if (debug_pagealloc_enabled_static()) 14378c2ecf20Sopenharmony_ci flush_tlb_kernel_range(va->va_start, va->va_end); 14388c2ecf20Sopenharmony_ci 14398c2ecf20Sopenharmony_ci free_vmap_area_noflush(va); 14408c2ecf20Sopenharmony_ci} 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_cistatic struct vmap_area *find_vmap_area(unsigned long addr) 14438c2ecf20Sopenharmony_ci{ 14448c2ecf20Sopenharmony_ci struct vmap_area *va; 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 14478c2ecf20Sopenharmony_ci va = __find_vmap_area(addr); 14488c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 14498c2ecf20Sopenharmony_ci 14508c2ecf20Sopenharmony_ci return va; 14518c2ecf20Sopenharmony_ci} 14528c2ecf20Sopenharmony_ci 14538c2ecf20Sopenharmony_ci/*** Per cpu kva allocator ***/ 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci/* 14568c2ecf20Sopenharmony_ci * vmap space is limited especially on 32 bit architectures. Ensure there is 14578c2ecf20Sopenharmony_ci * room for at least 16 percpu vmap blocks per CPU. 14588c2ecf20Sopenharmony_ci */ 14598c2ecf20Sopenharmony_ci/* 14608c2ecf20Sopenharmony_ci * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able 14618c2ecf20Sopenharmony_ci * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess 14628c2ecf20Sopenharmony_ci * instead (we just need a rough idea) 14638c2ecf20Sopenharmony_ci */ 14648c2ecf20Sopenharmony_ci#if BITS_PER_LONG == 32 14658c2ecf20Sopenharmony_ci#define VMALLOC_SPACE (128UL*1024*1024) 14668c2ecf20Sopenharmony_ci#else 14678c2ecf20Sopenharmony_ci#define VMALLOC_SPACE (128UL*1024*1024*1024) 14688c2ecf20Sopenharmony_ci#endif 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_ci#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE) 14718c2ecf20Sopenharmony_ci#define VMAP_MAX_ALLOC BITS_PER_LONG /* 256K with 4K pages */ 14728c2ecf20Sopenharmony_ci#define VMAP_BBMAP_BITS_MAX 1024 /* 4MB with 4K pages */ 14738c2ecf20Sopenharmony_ci#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2) 14748c2ecf20Sopenharmony_ci#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */ 14758c2ecf20Sopenharmony_ci#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */ 14768c2ecf20Sopenharmony_ci#define VMAP_BBMAP_BITS \ 14778c2ecf20Sopenharmony_ci VMAP_MIN(VMAP_BBMAP_BITS_MAX, \ 14788c2ecf20Sopenharmony_ci VMAP_MAX(VMAP_BBMAP_BITS_MIN, \ 14798c2ecf20Sopenharmony_ci VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16)) 14808c2ecf20Sopenharmony_ci 14818c2ecf20Sopenharmony_ci#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) 14828c2ecf20Sopenharmony_ci 14838c2ecf20Sopenharmony_cistruct vmap_block_queue { 14848c2ecf20Sopenharmony_ci spinlock_t lock; 14858c2ecf20Sopenharmony_ci struct list_head free; 14868c2ecf20Sopenharmony_ci}; 14878c2ecf20Sopenharmony_ci 14888c2ecf20Sopenharmony_cistruct vmap_block { 14898c2ecf20Sopenharmony_ci spinlock_t lock; 14908c2ecf20Sopenharmony_ci struct vmap_area *va; 14918c2ecf20Sopenharmony_ci unsigned long free, dirty; 14928c2ecf20Sopenharmony_ci unsigned long dirty_min, dirty_max; /*< dirty range */ 14938c2ecf20Sopenharmony_ci struct list_head free_list; 14948c2ecf20Sopenharmony_ci struct rcu_head rcu_head; 14958c2ecf20Sopenharmony_ci struct list_head purge; 14968c2ecf20Sopenharmony_ci}; 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_ci/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ 14998c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); 15008c2ecf20Sopenharmony_ci 15018c2ecf20Sopenharmony_ci/* 15028c2ecf20Sopenharmony_ci * XArray of vmap blocks, indexed by address, to quickly find a vmap block 15038c2ecf20Sopenharmony_ci * in the free path. Could get rid of this if we change the API to return a 15048c2ecf20Sopenharmony_ci * "cookie" from alloc, to be passed to free. But no big deal yet. 15058c2ecf20Sopenharmony_ci */ 15068c2ecf20Sopenharmony_cistatic DEFINE_XARRAY(vmap_blocks); 15078c2ecf20Sopenharmony_ci 15088c2ecf20Sopenharmony_ci/* 15098c2ecf20Sopenharmony_ci * We should probably have a fallback mechanism to allocate virtual memory 15108c2ecf20Sopenharmony_ci * out of partially filled vmap blocks. However vmap block sizing should be 15118c2ecf20Sopenharmony_ci * fairly reasonable according to the vmalloc size, so it shouldn't be a 15128c2ecf20Sopenharmony_ci * big problem. 15138c2ecf20Sopenharmony_ci */ 15148c2ecf20Sopenharmony_ci 15158c2ecf20Sopenharmony_cistatic unsigned long addr_to_vb_idx(unsigned long addr) 15168c2ecf20Sopenharmony_ci{ 15178c2ecf20Sopenharmony_ci addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); 15188c2ecf20Sopenharmony_ci addr /= VMAP_BLOCK_SIZE; 15198c2ecf20Sopenharmony_ci return addr; 15208c2ecf20Sopenharmony_ci} 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_cistatic void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off) 15238c2ecf20Sopenharmony_ci{ 15248c2ecf20Sopenharmony_ci unsigned long addr; 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_ci addr = va_start + (pages_off << PAGE_SHIFT); 15278c2ecf20Sopenharmony_ci BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start)); 15288c2ecf20Sopenharmony_ci return (void *)addr; 15298c2ecf20Sopenharmony_ci} 15308c2ecf20Sopenharmony_ci 15318c2ecf20Sopenharmony_ci/** 15328c2ecf20Sopenharmony_ci * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this 15338c2ecf20Sopenharmony_ci * block. Of course pages number can't exceed VMAP_BBMAP_BITS 15348c2ecf20Sopenharmony_ci * @order: how many 2^order pages should be occupied in newly allocated block 15358c2ecf20Sopenharmony_ci * @gfp_mask: flags for the page level allocator 15368c2ecf20Sopenharmony_ci * 15378c2ecf20Sopenharmony_ci * Return: virtual address in a newly allocated block or ERR_PTR(-errno) 15388c2ecf20Sopenharmony_ci */ 15398c2ecf20Sopenharmony_cistatic void *new_vmap_block(unsigned int order, gfp_t gfp_mask) 15408c2ecf20Sopenharmony_ci{ 15418c2ecf20Sopenharmony_ci struct vmap_block_queue *vbq; 15428c2ecf20Sopenharmony_ci struct vmap_block *vb; 15438c2ecf20Sopenharmony_ci struct vmap_area *va; 15448c2ecf20Sopenharmony_ci unsigned long vb_idx; 15458c2ecf20Sopenharmony_ci int node, err; 15468c2ecf20Sopenharmony_ci void *vaddr; 15478c2ecf20Sopenharmony_ci 15488c2ecf20Sopenharmony_ci node = numa_node_id(); 15498c2ecf20Sopenharmony_ci 15508c2ecf20Sopenharmony_ci vb = kmalloc_node(sizeof(struct vmap_block), 15518c2ecf20Sopenharmony_ci gfp_mask & GFP_RECLAIM_MASK, node); 15528c2ecf20Sopenharmony_ci if (unlikely(!vb)) 15538c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 15548c2ecf20Sopenharmony_ci 15558c2ecf20Sopenharmony_ci va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, 15568c2ecf20Sopenharmony_ci VMALLOC_START, VMALLOC_END, 15578c2ecf20Sopenharmony_ci node, gfp_mask); 15588c2ecf20Sopenharmony_ci if (IS_ERR(va)) { 15598c2ecf20Sopenharmony_ci kfree(vb); 15608c2ecf20Sopenharmony_ci return ERR_CAST(va); 15618c2ecf20Sopenharmony_ci } 15628c2ecf20Sopenharmony_ci 15638c2ecf20Sopenharmony_ci vaddr = vmap_block_vaddr(va->va_start, 0); 15648c2ecf20Sopenharmony_ci spin_lock_init(&vb->lock); 15658c2ecf20Sopenharmony_ci vb->va = va; 15668c2ecf20Sopenharmony_ci /* At least something should be left free */ 15678c2ecf20Sopenharmony_ci BUG_ON(VMAP_BBMAP_BITS <= (1UL << order)); 15688c2ecf20Sopenharmony_ci vb->free = VMAP_BBMAP_BITS - (1UL << order); 15698c2ecf20Sopenharmony_ci vb->dirty = 0; 15708c2ecf20Sopenharmony_ci vb->dirty_min = VMAP_BBMAP_BITS; 15718c2ecf20Sopenharmony_ci vb->dirty_max = 0; 15728c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&vb->free_list); 15738c2ecf20Sopenharmony_ci 15748c2ecf20Sopenharmony_ci vb_idx = addr_to_vb_idx(va->va_start); 15758c2ecf20Sopenharmony_ci err = xa_insert(&vmap_blocks, vb_idx, vb, gfp_mask); 15768c2ecf20Sopenharmony_ci if (err) { 15778c2ecf20Sopenharmony_ci kfree(vb); 15788c2ecf20Sopenharmony_ci free_vmap_area(va); 15798c2ecf20Sopenharmony_ci return ERR_PTR(err); 15808c2ecf20Sopenharmony_ci } 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_ci vbq = &get_cpu_var(vmap_block_queue); 15838c2ecf20Sopenharmony_ci spin_lock(&vbq->lock); 15848c2ecf20Sopenharmony_ci list_add_tail_rcu(&vb->free_list, &vbq->free); 15858c2ecf20Sopenharmony_ci spin_unlock(&vbq->lock); 15868c2ecf20Sopenharmony_ci put_cpu_var(vmap_block_queue); 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_ci return vaddr; 15898c2ecf20Sopenharmony_ci} 15908c2ecf20Sopenharmony_ci 15918c2ecf20Sopenharmony_cistatic void free_vmap_block(struct vmap_block *vb) 15928c2ecf20Sopenharmony_ci{ 15938c2ecf20Sopenharmony_ci struct vmap_block *tmp; 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start)); 15968c2ecf20Sopenharmony_ci BUG_ON(tmp != vb); 15978c2ecf20Sopenharmony_ci 15988c2ecf20Sopenharmony_ci free_vmap_area_noflush(vb->va); 15998c2ecf20Sopenharmony_ci kfree_rcu(vb, rcu_head); 16008c2ecf20Sopenharmony_ci} 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_cistatic void purge_fragmented_blocks(int cpu) 16038c2ecf20Sopenharmony_ci{ 16048c2ecf20Sopenharmony_ci LIST_HEAD(purge); 16058c2ecf20Sopenharmony_ci struct vmap_block *vb; 16068c2ecf20Sopenharmony_ci struct vmap_block *n_vb; 16078c2ecf20Sopenharmony_ci struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); 16088c2ecf20Sopenharmony_ci 16098c2ecf20Sopenharmony_ci rcu_read_lock(); 16108c2ecf20Sopenharmony_ci list_for_each_entry_rcu(vb, &vbq->free, free_list) { 16118c2ecf20Sopenharmony_ci 16128c2ecf20Sopenharmony_ci if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) 16138c2ecf20Sopenharmony_ci continue; 16148c2ecf20Sopenharmony_ci 16158c2ecf20Sopenharmony_ci spin_lock(&vb->lock); 16168c2ecf20Sopenharmony_ci if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { 16178c2ecf20Sopenharmony_ci vb->free = 0; /* prevent further allocs after releasing lock */ 16188c2ecf20Sopenharmony_ci vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ 16198c2ecf20Sopenharmony_ci vb->dirty_min = 0; 16208c2ecf20Sopenharmony_ci vb->dirty_max = VMAP_BBMAP_BITS; 16218c2ecf20Sopenharmony_ci spin_lock(&vbq->lock); 16228c2ecf20Sopenharmony_ci list_del_rcu(&vb->free_list); 16238c2ecf20Sopenharmony_ci spin_unlock(&vbq->lock); 16248c2ecf20Sopenharmony_ci spin_unlock(&vb->lock); 16258c2ecf20Sopenharmony_ci list_add_tail(&vb->purge, &purge); 16268c2ecf20Sopenharmony_ci } else 16278c2ecf20Sopenharmony_ci spin_unlock(&vb->lock); 16288c2ecf20Sopenharmony_ci } 16298c2ecf20Sopenharmony_ci rcu_read_unlock(); 16308c2ecf20Sopenharmony_ci 16318c2ecf20Sopenharmony_ci list_for_each_entry_safe(vb, n_vb, &purge, purge) { 16328c2ecf20Sopenharmony_ci list_del(&vb->purge); 16338c2ecf20Sopenharmony_ci free_vmap_block(vb); 16348c2ecf20Sopenharmony_ci } 16358c2ecf20Sopenharmony_ci} 16368c2ecf20Sopenharmony_ci 16378c2ecf20Sopenharmony_cistatic void purge_fragmented_blocks_allcpus(void) 16388c2ecf20Sopenharmony_ci{ 16398c2ecf20Sopenharmony_ci int cpu; 16408c2ecf20Sopenharmony_ci 16418c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 16428c2ecf20Sopenharmony_ci purge_fragmented_blocks(cpu); 16438c2ecf20Sopenharmony_ci} 16448c2ecf20Sopenharmony_ci 16458c2ecf20Sopenharmony_cistatic void *vb_alloc(unsigned long size, gfp_t gfp_mask) 16468c2ecf20Sopenharmony_ci{ 16478c2ecf20Sopenharmony_ci struct vmap_block_queue *vbq; 16488c2ecf20Sopenharmony_ci struct vmap_block *vb; 16498c2ecf20Sopenharmony_ci void *vaddr = NULL; 16508c2ecf20Sopenharmony_ci unsigned int order; 16518c2ecf20Sopenharmony_ci 16528c2ecf20Sopenharmony_ci BUG_ON(offset_in_page(size)); 16538c2ecf20Sopenharmony_ci BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); 16548c2ecf20Sopenharmony_ci if (WARN_ON(size == 0)) { 16558c2ecf20Sopenharmony_ci /* 16568c2ecf20Sopenharmony_ci * Allocating 0 bytes isn't what caller wants since 16578c2ecf20Sopenharmony_ci * get_order(0) returns funny result. Just warn and terminate 16588c2ecf20Sopenharmony_ci * early. 16598c2ecf20Sopenharmony_ci */ 16608c2ecf20Sopenharmony_ci return NULL; 16618c2ecf20Sopenharmony_ci } 16628c2ecf20Sopenharmony_ci order = get_order(size); 16638c2ecf20Sopenharmony_ci 16648c2ecf20Sopenharmony_ci rcu_read_lock(); 16658c2ecf20Sopenharmony_ci vbq = &get_cpu_var(vmap_block_queue); 16668c2ecf20Sopenharmony_ci list_for_each_entry_rcu(vb, &vbq->free, free_list) { 16678c2ecf20Sopenharmony_ci unsigned long pages_off; 16688c2ecf20Sopenharmony_ci 16698c2ecf20Sopenharmony_ci spin_lock(&vb->lock); 16708c2ecf20Sopenharmony_ci if (vb->free < (1UL << order)) { 16718c2ecf20Sopenharmony_ci spin_unlock(&vb->lock); 16728c2ecf20Sopenharmony_ci continue; 16738c2ecf20Sopenharmony_ci } 16748c2ecf20Sopenharmony_ci 16758c2ecf20Sopenharmony_ci pages_off = VMAP_BBMAP_BITS - vb->free; 16768c2ecf20Sopenharmony_ci vaddr = vmap_block_vaddr(vb->va->va_start, pages_off); 16778c2ecf20Sopenharmony_ci vb->free -= 1UL << order; 16788c2ecf20Sopenharmony_ci if (vb->free == 0) { 16798c2ecf20Sopenharmony_ci spin_lock(&vbq->lock); 16808c2ecf20Sopenharmony_ci list_del_rcu(&vb->free_list); 16818c2ecf20Sopenharmony_ci spin_unlock(&vbq->lock); 16828c2ecf20Sopenharmony_ci } 16838c2ecf20Sopenharmony_ci 16848c2ecf20Sopenharmony_ci spin_unlock(&vb->lock); 16858c2ecf20Sopenharmony_ci break; 16868c2ecf20Sopenharmony_ci } 16878c2ecf20Sopenharmony_ci 16888c2ecf20Sopenharmony_ci put_cpu_var(vmap_block_queue); 16898c2ecf20Sopenharmony_ci rcu_read_unlock(); 16908c2ecf20Sopenharmony_ci 16918c2ecf20Sopenharmony_ci /* Allocate new block if nothing was found */ 16928c2ecf20Sopenharmony_ci if (!vaddr) 16938c2ecf20Sopenharmony_ci vaddr = new_vmap_block(order, gfp_mask); 16948c2ecf20Sopenharmony_ci 16958c2ecf20Sopenharmony_ci return vaddr; 16968c2ecf20Sopenharmony_ci} 16978c2ecf20Sopenharmony_ci 16988c2ecf20Sopenharmony_cistatic void vb_free(unsigned long addr, unsigned long size) 16998c2ecf20Sopenharmony_ci{ 17008c2ecf20Sopenharmony_ci unsigned long offset; 17018c2ecf20Sopenharmony_ci unsigned int order; 17028c2ecf20Sopenharmony_ci struct vmap_block *vb; 17038c2ecf20Sopenharmony_ci 17048c2ecf20Sopenharmony_ci BUG_ON(offset_in_page(size)); 17058c2ecf20Sopenharmony_ci BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_ci flush_cache_vunmap(addr, addr + size); 17088c2ecf20Sopenharmony_ci 17098c2ecf20Sopenharmony_ci order = get_order(size); 17108c2ecf20Sopenharmony_ci offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT; 17118c2ecf20Sopenharmony_ci vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr)); 17128c2ecf20Sopenharmony_ci 17138c2ecf20Sopenharmony_ci unmap_kernel_range_noflush(addr, size); 17148c2ecf20Sopenharmony_ci 17158c2ecf20Sopenharmony_ci if (debug_pagealloc_enabled_static()) 17168c2ecf20Sopenharmony_ci flush_tlb_kernel_range(addr, addr + size); 17178c2ecf20Sopenharmony_ci 17188c2ecf20Sopenharmony_ci spin_lock(&vb->lock); 17198c2ecf20Sopenharmony_ci 17208c2ecf20Sopenharmony_ci /* Expand dirty range */ 17218c2ecf20Sopenharmony_ci vb->dirty_min = min(vb->dirty_min, offset); 17228c2ecf20Sopenharmony_ci vb->dirty_max = max(vb->dirty_max, offset + (1UL << order)); 17238c2ecf20Sopenharmony_ci 17248c2ecf20Sopenharmony_ci vb->dirty += 1UL << order; 17258c2ecf20Sopenharmony_ci if (vb->dirty == VMAP_BBMAP_BITS) { 17268c2ecf20Sopenharmony_ci BUG_ON(vb->free); 17278c2ecf20Sopenharmony_ci spin_unlock(&vb->lock); 17288c2ecf20Sopenharmony_ci free_vmap_block(vb); 17298c2ecf20Sopenharmony_ci } else 17308c2ecf20Sopenharmony_ci spin_unlock(&vb->lock); 17318c2ecf20Sopenharmony_ci} 17328c2ecf20Sopenharmony_ci 17338c2ecf20Sopenharmony_cistatic void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) 17348c2ecf20Sopenharmony_ci{ 17358c2ecf20Sopenharmony_ci int cpu; 17368c2ecf20Sopenharmony_ci 17378c2ecf20Sopenharmony_ci if (unlikely(!vmap_initialized)) 17388c2ecf20Sopenharmony_ci return; 17398c2ecf20Sopenharmony_ci 17408c2ecf20Sopenharmony_ci might_sleep(); 17418c2ecf20Sopenharmony_ci 17428c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 17438c2ecf20Sopenharmony_ci struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); 17448c2ecf20Sopenharmony_ci struct vmap_block *vb; 17458c2ecf20Sopenharmony_ci 17468c2ecf20Sopenharmony_ci rcu_read_lock(); 17478c2ecf20Sopenharmony_ci list_for_each_entry_rcu(vb, &vbq->free, free_list) { 17488c2ecf20Sopenharmony_ci spin_lock(&vb->lock); 17498c2ecf20Sopenharmony_ci if (vb->dirty && vb->dirty != VMAP_BBMAP_BITS) { 17508c2ecf20Sopenharmony_ci unsigned long va_start = vb->va->va_start; 17518c2ecf20Sopenharmony_ci unsigned long s, e; 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci s = va_start + (vb->dirty_min << PAGE_SHIFT); 17548c2ecf20Sopenharmony_ci e = va_start + (vb->dirty_max << PAGE_SHIFT); 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ci start = min(s, start); 17578c2ecf20Sopenharmony_ci end = max(e, end); 17588c2ecf20Sopenharmony_ci 17598c2ecf20Sopenharmony_ci flush = 1; 17608c2ecf20Sopenharmony_ci } 17618c2ecf20Sopenharmony_ci spin_unlock(&vb->lock); 17628c2ecf20Sopenharmony_ci } 17638c2ecf20Sopenharmony_ci rcu_read_unlock(); 17648c2ecf20Sopenharmony_ci } 17658c2ecf20Sopenharmony_ci 17668c2ecf20Sopenharmony_ci mutex_lock(&vmap_purge_lock); 17678c2ecf20Sopenharmony_ci purge_fragmented_blocks_allcpus(); 17688c2ecf20Sopenharmony_ci if (!__purge_vmap_area_lazy(start, end) && flush) 17698c2ecf20Sopenharmony_ci flush_tlb_kernel_range(start, end); 17708c2ecf20Sopenharmony_ci mutex_unlock(&vmap_purge_lock); 17718c2ecf20Sopenharmony_ci} 17728c2ecf20Sopenharmony_ci 17738c2ecf20Sopenharmony_ci/** 17748c2ecf20Sopenharmony_ci * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer 17758c2ecf20Sopenharmony_ci * 17768c2ecf20Sopenharmony_ci * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily 17778c2ecf20Sopenharmony_ci * to amortize TLB flushing overheads. What this means is that any page you 17788c2ecf20Sopenharmony_ci * have now, may, in a former life, have been mapped into kernel virtual 17798c2ecf20Sopenharmony_ci * address by the vmap layer and so there might be some CPUs with TLB entries 17808c2ecf20Sopenharmony_ci * still referencing that page (additional to the regular 1:1 kernel mapping). 17818c2ecf20Sopenharmony_ci * 17828c2ecf20Sopenharmony_ci * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can 17838c2ecf20Sopenharmony_ci * be sure that none of the pages we have control over will have any aliases 17848c2ecf20Sopenharmony_ci * from the vmap layer. 17858c2ecf20Sopenharmony_ci */ 17868c2ecf20Sopenharmony_civoid vm_unmap_aliases(void) 17878c2ecf20Sopenharmony_ci{ 17888c2ecf20Sopenharmony_ci unsigned long start = ULONG_MAX, end = 0; 17898c2ecf20Sopenharmony_ci int flush = 0; 17908c2ecf20Sopenharmony_ci 17918c2ecf20Sopenharmony_ci _vm_unmap_aliases(start, end, flush); 17928c2ecf20Sopenharmony_ci} 17938c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vm_unmap_aliases); 17948c2ecf20Sopenharmony_ci 17958c2ecf20Sopenharmony_ci/** 17968c2ecf20Sopenharmony_ci * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram 17978c2ecf20Sopenharmony_ci * @mem: the pointer returned by vm_map_ram 17988c2ecf20Sopenharmony_ci * @count: the count passed to that vm_map_ram call (cannot unmap partial) 17998c2ecf20Sopenharmony_ci */ 18008c2ecf20Sopenharmony_civoid vm_unmap_ram(const void *mem, unsigned int count) 18018c2ecf20Sopenharmony_ci{ 18028c2ecf20Sopenharmony_ci unsigned long size = (unsigned long)count << PAGE_SHIFT; 18038c2ecf20Sopenharmony_ci unsigned long addr = (unsigned long)mem; 18048c2ecf20Sopenharmony_ci struct vmap_area *va; 18058c2ecf20Sopenharmony_ci 18068c2ecf20Sopenharmony_ci might_sleep(); 18078c2ecf20Sopenharmony_ci BUG_ON(!addr); 18088c2ecf20Sopenharmony_ci BUG_ON(addr < VMALLOC_START); 18098c2ecf20Sopenharmony_ci BUG_ON(addr > VMALLOC_END); 18108c2ecf20Sopenharmony_ci BUG_ON(!PAGE_ALIGNED(addr)); 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci kasan_poison_vmalloc(mem, size); 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci if (likely(count <= VMAP_MAX_ALLOC)) { 18158c2ecf20Sopenharmony_ci debug_check_no_locks_freed(mem, size); 18168c2ecf20Sopenharmony_ci vb_free(addr, size); 18178c2ecf20Sopenharmony_ci return; 18188c2ecf20Sopenharmony_ci } 18198c2ecf20Sopenharmony_ci 18208c2ecf20Sopenharmony_ci va = find_vmap_area(addr); 18218c2ecf20Sopenharmony_ci BUG_ON(!va); 18228c2ecf20Sopenharmony_ci debug_check_no_locks_freed((void *)va->va_start, 18238c2ecf20Sopenharmony_ci (va->va_end - va->va_start)); 18248c2ecf20Sopenharmony_ci free_unmap_vmap_area(va); 18258c2ecf20Sopenharmony_ci} 18268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vm_unmap_ram); 18278c2ecf20Sopenharmony_ci 18288c2ecf20Sopenharmony_ci/** 18298c2ecf20Sopenharmony_ci * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space) 18308c2ecf20Sopenharmony_ci * @pages: an array of pointers to the pages to be mapped 18318c2ecf20Sopenharmony_ci * @count: number of pages 18328c2ecf20Sopenharmony_ci * @node: prefer to allocate data structures on this node 18338c2ecf20Sopenharmony_ci * 18348c2ecf20Sopenharmony_ci * If you use this function for less than VMAP_MAX_ALLOC pages, it could be 18358c2ecf20Sopenharmony_ci * faster than vmap so it's good. But if you mix long-life and short-life 18368c2ecf20Sopenharmony_ci * objects with vm_map_ram(), it could consume lots of address space through 18378c2ecf20Sopenharmony_ci * fragmentation (especially on a 32bit machine). You could see failures in 18388c2ecf20Sopenharmony_ci * the end. Please use this function for short-lived objects. 18398c2ecf20Sopenharmony_ci * 18408c2ecf20Sopenharmony_ci * Returns: a pointer to the address that has been mapped, or %NULL on failure 18418c2ecf20Sopenharmony_ci */ 18428c2ecf20Sopenharmony_civoid *vm_map_ram(struct page **pages, unsigned int count, int node) 18438c2ecf20Sopenharmony_ci{ 18448c2ecf20Sopenharmony_ci unsigned long size = (unsigned long)count << PAGE_SHIFT; 18458c2ecf20Sopenharmony_ci unsigned long addr; 18468c2ecf20Sopenharmony_ci void *mem; 18478c2ecf20Sopenharmony_ci 18488c2ecf20Sopenharmony_ci if (likely(count <= VMAP_MAX_ALLOC)) { 18498c2ecf20Sopenharmony_ci mem = vb_alloc(size, GFP_KERNEL); 18508c2ecf20Sopenharmony_ci if (IS_ERR(mem)) 18518c2ecf20Sopenharmony_ci return NULL; 18528c2ecf20Sopenharmony_ci addr = (unsigned long)mem; 18538c2ecf20Sopenharmony_ci } else { 18548c2ecf20Sopenharmony_ci struct vmap_area *va; 18558c2ecf20Sopenharmony_ci va = alloc_vmap_area(size, PAGE_SIZE, 18568c2ecf20Sopenharmony_ci VMALLOC_START, VMALLOC_END, node, GFP_KERNEL); 18578c2ecf20Sopenharmony_ci if (IS_ERR(va)) 18588c2ecf20Sopenharmony_ci return NULL; 18598c2ecf20Sopenharmony_ci 18608c2ecf20Sopenharmony_ci addr = va->va_start; 18618c2ecf20Sopenharmony_ci mem = (void *)addr; 18628c2ecf20Sopenharmony_ci } 18638c2ecf20Sopenharmony_ci 18648c2ecf20Sopenharmony_ci kasan_unpoison_vmalloc(mem, size); 18658c2ecf20Sopenharmony_ci 18668c2ecf20Sopenharmony_ci if (map_kernel_range(addr, size, PAGE_KERNEL, pages) < 0) { 18678c2ecf20Sopenharmony_ci vm_unmap_ram(mem, count); 18688c2ecf20Sopenharmony_ci return NULL; 18698c2ecf20Sopenharmony_ci } 18708c2ecf20Sopenharmony_ci return mem; 18718c2ecf20Sopenharmony_ci} 18728c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vm_map_ram); 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_cistatic struct vm_struct *vmlist __initdata; 18758c2ecf20Sopenharmony_ci 18768c2ecf20Sopenharmony_ci/** 18778c2ecf20Sopenharmony_ci * vm_area_add_early - add vmap area early during boot 18788c2ecf20Sopenharmony_ci * @vm: vm_struct to add 18798c2ecf20Sopenharmony_ci * 18808c2ecf20Sopenharmony_ci * This function is used to add fixed kernel vm area to vmlist before 18818c2ecf20Sopenharmony_ci * vmalloc_init() is called. @vm->addr, @vm->size, and @vm->flags 18828c2ecf20Sopenharmony_ci * should contain proper values and the other fields should be zero. 18838c2ecf20Sopenharmony_ci * 18848c2ecf20Sopenharmony_ci * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. 18858c2ecf20Sopenharmony_ci */ 18868c2ecf20Sopenharmony_civoid __init vm_area_add_early(struct vm_struct *vm) 18878c2ecf20Sopenharmony_ci{ 18888c2ecf20Sopenharmony_ci struct vm_struct *tmp, **p; 18898c2ecf20Sopenharmony_ci 18908c2ecf20Sopenharmony_ci BUG_ON(vmap_initialized); 18918c2ecf20Sopenharmony_ci for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { 18928c2ecf20Sopenharmony_ci if (tmp->addr >= vm->addr) { 18938c2ecf20Sopenharmony_ci BUG_ON(tmp->addr < vm->addr + vm->size); 18948c2ecf20Sopenharmony_ci break; 18958c2ecf20Sopenharmony_ci } else 18968c2ecf20Sopenharmony_ci BUG_ON(tmp->addr + tmp->size > vm->addr); 18978c2ecf20Sopenharmony_ci } 18988c2ecf20Sopenharmony_ci vm->next = *p; 18998c2ecf20Sopenharmony_ci *p = vm; 19008c2ecf20Sopenharmony_ci} 19018c2ecf20Sopenharmony_ci 19028c2ecf20Sopenharmony_ci/** 19038c2ecf20Sopenharmony_ci * vm_area_register_early - register vmap area early during boot 19048c2ecf20Sopenharmony_ci * @vm: vm_struct to register 19058c2ecf20Sopenharmony_ci * @align: requested alignment 19068c2ecf20Sopenharmony_ci * 19078c2ecf20Sopenharmony_ci * This function is used to register kernel vm area before 19088c2ecf20Sopenharmony_ci * vmalloc_init() is called. @vm->size and @vm->flags should contain 19098c2ecf20Sopenharmony_ci * proper values on entry and other fields should be zero. On return, 19108c2ecf20Sopenharmony_ci * vm->addr contains the allocated address. 19118c2ecf20Sopenharmony_ci * 19128c2ecf20Sopenharmony_ci * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. 19138c2ecf20Sopenharmony_ci */ 19148c2ecf20Sopenharmony_civoid __init vm_area_register_early(struct vm_struct *vm, size_t align) 19158c2ecf20Sopenharmony_ci{ 19168c2ecf20Sopenharmony_ci static size_t vm_init_off __initdata; 19178c2ecf20Sopenharmony_ci unsigned long addr; 19188c2ecf20Sopenharmony_ci 19198c2ecf20Sopenharmony_ci addr = ALIGN(VMALLOC_START + vm_init_off, align); 19208c2ecf20Sopenharmony_ci vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; 19218c2ecf20Sopenharmony_ci 19228c2ecf20Sopenharmony_ci vm->addr = (void *)addr; 19238c2ecf20Sopenharmony_ci 19248c2ecf20Sopenharmony_ci vm_area_add_early(vm); 19258c2ecf20Sopenharmony_ci} 19268c2ecf20Sopenharmony_ci 19278c2ecf20Sopenharmony_cistatic void vmap_init_free_space(void) 19288c2ecf20Sopenharmony_ci{ 19298c2ecf20Sopenharmony_ci unsigned long vmap_start = 1; 19308c2ecf20Sopenharmony_ci const unsigned long vmap_end = ULONG_MAX; 19318c2ecf20Sopenharmony_ci struct vmap_area *busy, *free; 19328c2ecf20Sopenharmony_ci 19338c2ecf20Sopenharmony_ci /* 19348c2ecf20Sopenharmony_ci * B F B B B F 19358c2ecf20Sopenharmony_ci * -|-----|.....|-----|-----|-----|.....|- 19368c2ecf20Sopenharmony_ci * | The KVA space | 19378c2ecf20Sopenharmony_ci * |<--------------------------------->| 19388c2ecf20Sopenharmony_ci */ 19398c2ecf20Sopenharmony_ci list_for_each_entry(busy, &vmap_area_list, list) { 19408c2ecf20Sopenharmony_ci if (busy->va_start - vmap_start > 0) { 19418c2ecf20Sopenharmony_ci free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); 19428c2ecf20Sopenharmony_ci if (!WARN_ON_ONCE(!free)) { 19438c2ecf20Sopenharmony_ci free->va_start = vmap_start; 19448c2ecf20Sopenharmony_ci free->va_end = busy->va_start; 19458c2ecf20Sopenharmony_ci 19468c2ecf20Sopenharmony_ci insert_vmap_area_augment(free, NULL, 19478c2ecf20Sopenharmony_ci &free_vmap_area_root, 19488c2ecf20Sopenharmony_ci &free_vmap_area_list); 19498c2ecf20Sopenharmony_ci } 19508c2ecf20Sopenharmony_ci } 19518c2ecf20Sopenharmony_ci 19528c2ecf20Sopenharmony_ci vmap_start = busy->va_end; 19538c2ecf20Sopenharmony_ci } 19548c2ecf20Sopenharmony_ci 19558c2ecf20Sopenharmony_ci if (vmap_end - vmap_start > 0) { 19568c2ecf20Sopenharmony_ci free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); 19578c2ecf20Sopenharmony_ci if (!WARN_ON_ONCE(!free)) { 19588c2ecf20Sopenharmony_ci free->va_start = vmap_start; 19598c2ecf20Sopenharmony_ci free->va_end = vmap_end; 19608c2ecf20Sopenharmony_ci 19618c2ecf20Sopenharmony_ci insert_vmap_area_augment(free, NULL, 19628c2ecf20Sopenharmony_ci &free_vmap_area_root, 19638c2ecf20Sopenharmony_ci &free_vmap_area_list); 19648c2ecf20Sopenharmony_ci } 19658c2ecf20Sopenharmony_ci } 19668c2ecf20Sopenharmony_ci} 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_civoid __init vmalloc_init(void) 19698c2ecf20Sopenharmony_ci{ 19708c2ecf20Sopenharmony_ci struct vmap_area *va; 19718c2ecf20Sopenharmony_ci struct vm_struct *tmp; 19728c2ecf20Sopenharmony_ci int i; 19738c2ecf20Sopenharmony_ci 19748c2ecf20Sopenharmony_ci /* 19758c2ecf20Sopenharmony_ci * Create the cache for vmap_area objects. 19768c2ecf20Sopenharmony_ci */ 19778c2ecf20Sopenharmony_ci vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC); 19788c2ecf20Sopenharmony_ci 19798c2ecf20Sopenharmony_ci for_each_possible_cpu(i) { 19808c2ecf20Sopenharmony_ci struct vmap_block_queue *vbq; 19818c2ecf20Sopenharmony_ci struct vfree_deferred *p; 19828c2ecf20Sopenharmony_ci 19838c2ecf20Sopenharmony_ci vbq = &per_cpu(vmap_block_queue, i); 19848c2ecf20Sopenharmony_ci spin_lock_init(&vbq->lock); 19858c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&vbq->free); 19868c2ecf20Sopenharmony_ci p = &per_cpu(vfree_deferred, i); 19878c2ecf20Sopenharmony_ci init_llist_head(&p->list); 19888c2ecf20Sopenharmony_ci INIT_WORK(&p->wq, free_work); 19898c2ecf20Sopenharmony_ci } 19908c2ecf20Sopenharmony_ci 19918c2ecf20Sopenharmony_ci /* Import existing vmlist entries. */ 19928c2ecf20Sopenharmony_ci for (tmp = vmlist; tmp; tmp = tmp->next) { 19938c2ecf20Sopenharmony_ci va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); 19948c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!va)) 19958c2ecf20Sopenharmony_ci continue; 19968c2ecf20Sopenharmony_ci 19978c2ecf20Sopenharmony_ci va->va_start = (unsigned long)tmp->addr; 19988c2ecf20Sopenharmony_ci va->va_end = va->va_start + tmp->size; 19998c2ecf20Sopenharmony_ci va->vm = tmp; 20008c2ecf20Sopenharmony_ci insert_vmap_area(va, &vmap_area_root, &vmap_area_list); 20018c2ecf20Sopenharmony_ci } 20028c2ecf20Sopenharmony_ci 20038c2ecf20Sopenharmony_ci /* 20048c2ecf20Sopenharmony_ci * Now we can initialize a free vmap space. 20058c2ecf20Sopenharmony_ci */ 20068c2ecf20Sopenharmony_ci vmap_init_free_space(); 20078c2ecf20Sopenharmony_ci vmap_initialized = true; 20088c2ecf20Sopenharmony_ci} 20098c2ecf20Sopenharmony_ci 20108c2ecf20Sopenharmony_ci/** 20118c2ecf20Sopenharmony_ci * unmap_kernel_range - unmap kernel VM area and flush cache and TLB 20128c2ecf20Sopenharmony_ci * @addr: start of the VM area to unmap 20138c2ecf20Sopenharmony_ci * @size: size of the VM area to unmap 20148c2ecf20Sopenharmony_ci * 20158c2ecf20Sopenharmony_ci * Similar to unmap_kernel_range_noflush() but flushes vcache before 20168c2ecf20Sopenharmony_ci * the unmapping and tlb after. 20178c2ecf20Sopenharmony_ci */ 20188c2ecf20Sopenharmony_civoid unmap_kernel_range(unsigned long addr, unsigned long size) 20198c2ecf20Sopenharmony_ci{ 20208c2ecf20Sopenharmony_ci unsigned long end = addr + size; 20218c2ecf20Sopenharmony_ci 20228c2ecf20Sopenharmony_ci flush_cache_vunmap(addr, end); 20238c2ecf20Sopenharmony_ci unmap_kernel_range_noflush(addr, size); 20248c2ecf20Sopenharmony_ci flush_tlb_kernel_range(addr, end); 20258c2ecf20Sopenharmony_ci} 20268c2ecf20Sopenharmony_ci 20278c2ecf20Sopenharmony_cistatic inline void setup_vmalloc_vm_locked(struct vm_struct *vm, 20288c2ecf20Sopenharmony_ci struct vmap_area *va, unsigned long flags, const void *caller) 20298c2ecf20Sopenharmony_ci{ 20308c2ecf20Sopenharmony_ci vm->flags = flags; 20318c2ecf20Sopenharmony_ci vm->addr = (void *)va->va_start; 20328c2ecf20Sopenharmony_ci vm->size = va->va_end - va->va_start; 20338c2ecf20Sopenharmony_ci vm->caller = caller; 20348c2ecf20Sopenharmony_ci va->vm = vm; 20358c2ecf20Sopenharmony_ci} 20368c2ecf20Sopenharmony_ci 20378c2ecf20Sopenharmony_cistatic void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, 20388c2ecf20Sopenharmony_ci unsigned long flags, const void *caller) 20398c2ecf20Sopenharmony_ci{ 20408c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 20418c2ecf20Sopenharmony_ci setup_vmalloc_vm_locked(vm, va, flags, caller); 20428c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 20438c2ecf20Sopenharmony_ci} 20448c2ecf20Sopenharmony_ci 20458c2ecf20Sopenharmony_cistatic void clear_vm_uninitialized_flag(struct vm_struct *vm) 20468c2ecf20Sopenharmony_ci{ 20478c2ecf20Sopenharmony_ci /* 20488c2ecf20Sopenharmony_ci * Before removing VM_UNINITIALIZED, 20498c2ecf20Sopenharmony_ci * we should make sure that vm has proper values. 20508c2ecf20Sopenharmony_ci * Pair with smp_rmb() in show_numa_info(). 20518c2ecf20Sopenharmony_ci */ 20528c2ecf20Sopenharmony_ci smp_wmb(); 20538c2ecf20Sopenharmony_ci vm->flags &= ~VM_UNINITIALIZED; 20548c2ecf20Sopenharmony_ci} 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_cistatic struct vm_struct *__get_vm_area_node(unsigned long size, 20578c2ecf20Sopenharmony_ci unsigned long align, unsigned long flags, unsigned long start, 20588c2ecf20Sopenharmony_ci unsigned long end, int node, gfp_t gfp_mask, const void *caller) 20598c2ecf20Sopenharmony_ci{ 20608c2ecf20Sopenharmony_ci struct vmap_area *va; 20618c2ecf20Sopenharmony_ci struct vm_struct *area; 20628c2ecf20Sopenharmony_ci unsigned long requested_size = size; 20638c2ecf20Sopenharmony_ci 20648c2ecf20Sopenharmony_ci BUG_ON(in_interrupt()); 20658c2ecf20Sopenharmony_ci size = PAGE_ALIGN(size); 20668c2ecf20Sopenharmony_ci if (unlikely(!size)) 20678c2ecf20Sopenharmony_ci return NULL; 20688c2ecf20Sopenharmony_ci 20698c2ecf20Sopenharmony_ci if (flags & VM_IOREMAP) 20708c2ecf20Sopenharmony_ci align = 1ul << clamp_t(int, get_count_order_long(size), 20718c2ecf20Sopenharmony_ci PAGE_SHIFT, IOREMAP_MAX_ORDER); 20728c2ecf20Sopenharmony_ci 20738c2ecf20Sopenharmony_ci area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); 20748c2ecf20Sopenharmony_ci if (unlikely(!area)) 20758c2ecf20Sopenharmony_ci return NULL; 20768c2ecf20Sopenharmony_ci 20778c2ecf20Sopenharmony_ci if (!(flags & VM_NO_GUARD)) 20788c2ecf20Sopenharmony_ci size += PAGE_SIZE; 20798c2ecf20Sopenharmony_ci 20808c2ecf20Sopenharmony_ci va = alloc_vmap_area(size, align, start, end, node, gfp_mask); 20818c2ecf20Sopenharmony_ci if (IS_ERR(va)) { 20828c2ecf20Sopenharmony_ci kfree(area); 20838c2ecf20Sopenharmony_ci return NULL; 20848c2ecf20Sopenharmony_ci } 20858c2ecf20Sopenharmony_ci 20868c2ecf20Sopenharmony_ci kasan_unpoison_vmalloc((void *)va->va_start, requested_size); 20878c2ecf20Sopenharmony_ci 20888c2ecf20Sopenharmony_ci setup_vmalloc_vm(area, va, flags, caller); 20898c2ecf20Sopenharmony_ci 20908c2ecf20Sopenharmony_ci return area; 20918c2ecf20Sopenharmony_ci} 20928c2ecf20Sopenharmony_ci 20938c2ecf20Sopenharmony_cistruct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, 20948c2ecf20Sopenharmony_ci unsigned long start, unsigned long end, 20958c2ecf20Sopenharmony_ci const void *caller) 20968c2ecf20Sopenharmony_ci{ 20978c2ecf20Sopenharmony_ci return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, 20988c2ecf20Sopenharmony_ci GFP_KERNEL, caller); 20998c2ecf20Sopenharmony_ci} 21008c2ecf20Sopenharmony_ci 21018c2ecf20Sopenharmony_ci/** 21028c2ecf20Sopenharmony_ci * get_vm_area - reserve a contiguous kernel virtual area 21038c2ecf20Sopenharmony_ci * @size: size of the area 21048c2ecf20Sopenharmony_ci * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC 21058c2ecf20Sopenharmony_ci * 21068c2ecf20Sopenharmony_ci * Search an area of @size in the kernel virtual mapping area, 21078c2ecf20Sopenharmony_ci * and reserved it for out purposes. Returns the area descriptor 21088c2ecf20Sopenharmony_ci * on success or %NULL on failure. 21098c2ecf20Sopenharmony_ci * 21108c2ecf20Sopenharmony_ci * Return: the area descriptor on success or %NULL on failure. 21118c2ecf20Sopenharmony_ci */ 21128c2ecf20Sopenharmony_cistruct vm_struct *get_vm_area(unsigned long size, unsigned long flags) 21138c2ecf20Sopenharmony_ci{ 21148c2ecf20Sopenharmony_ci return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, 21158c2ecf20Sopenharmony_ci NUMA_NO_NODE, GFP_KERNEL, 21168c2ecf20Sopenharmony_ci __builtin_return_address(0)); 21178c2ecf20Sopenharmony_ci} 21188c2ecf20Sopenharmony_ci 21198c2ecf20Sopenharmony_cistruct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, 21208c2ecf20Sopenharmony_ci const void *caller) 21218c2ecf20Sopenharmony_ci{ 21228c2ecf20Sopenharmony_ci return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, 21238c2ecf20Sopenharmony_ci NUMA_NO_NODE, GFP_KERNEL, caller); 21248c2ecf20Sopenharmony_ci} 21258c2ecf20Sopenharmony_ci 21268c2ecf20Sopenharmony_ci/** 21278c2ecf20Sopenharmony_ci * find_vm_area - find a continuous kernel virtual area 21288c2ecf20Sopenharmony_ci * @addr: base address 21298c2ecf20Sopenharmony_ci * 21308c2ecf20Sopenharmony_ci * Search for the kernel VM area starting at @addr, and return it. 21318c2ecf20Sopenharmony_ci * It is up to the caller to do all required locking to keep the returned 21328c2ecf20Sopenharmony_ci * pointer valid. 21338c2ecf20Sopenharmony_ci * 21348c2ecf20Sopenharmony_ci * Return: the area descriptor on success or %NULL on failure. 21358c2ecf20Sopenharmony_ci */ 21368c2ecf20Sopenharmony_cistruct vm_struct *find_vm_area(const void *addr) 21378c2ecf20Sopenharmony_ci{ 21388c2ecf20Sopenharmony_ci struct vmap_area *va; 21398c2ecf20Sopenharmony_ci 21408c2ecf20Sopenharmony_ci va = find_vmap_area((unsigned long)addr); 21418c2ecf20Sopenharmony_ci if (!va) 21428c2ecf20Sopenharmony_ci return NULL; 21438c2ecf20Sopenharmony_ci 21448c2ecf20Sopenharmony_ci return va->vm; 21458c2ecf20Sopenharmony_ci} 21468c2ecf20Sopenharmony_ci 21478c2ecf20Sopenharmony_ci/** 21488c2ecf20Sopenharmony_ci * remove_vm_area - find and remove a continuous kernel virtual area 21498c2ecf20Sopenharmony_ci * @addr: base address 21508c2ecf20Sopenharmony_ci * 21518c2ecf20Sopenharmony_ci * Search for the kernel VM area starting at @addr, and remove it. 21528c2ecf20Sopenharmony_ci * This function returns the found VM area, but using it is NOT safe 21538c2ecf20Sopenharmony_ci * on SMP machines, except for its size or flags. 21548c2ecf20Sopenharmony_ci * 21558c2ecf20Sopenharmony_ci * Return: the area descriptor on success or %NULL on failure. 21568c2ecf20Sopenharmony_ci */ 21578c2ecf20Sopenharmony_cistruct vm_struct *remove_vm_area(const void *addr) 21588c2ecf20Sopenharmony_ci{ 21598c2ecf20Sopenharmony_ci struct vmap_area *va; 21608c2ecf20Sopenharmony_ci 21618c2ecf20Sopenharmony_ci might_sleep(); 21628c2ecf20Sopenharmony_ci 21638c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 21648c2ecf20Sopenharmony_ci va = __find_vmap_area((unsigned long)addr); 21658c2ecf20Sopenharmony_ci if (va && va->vm) { 21668c2ecf20Sopenharmony_ci struct vm_struct *vm = va->vm; 21678c2ecf20Sopenharmony_ci 21688c2ecf20Sopenharmony_ci va->vm = NULL; 21698c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 21708c2ecf20Sopenharmony_ci 21718c2ecf20Sopenharmony_ci kasan_free_shadow(vm); 21728c2ecf20Sopenharmony_ci free_unmap_vmap_area(va); 21738c2ecf20Sopenharmony_ci 21748c2ecf20Sopenharmony_ci return vm; 21758c2ecf20Sopenharmony_ci } 21768c2ecf20Sopenharmony_ci 21778c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 21788c2ecf20Sopenharmony_ci return NULL; 21798c2ecf20Sopenharmony_ci} 21808c2ecf20Sopenharmony_ci 21818c2ecf20Sopenharmony_cistatic inline void set_area_direct_map(const struct vm_struct *area, 21828c2ecf20Sopenharmony_ci int (*set_direct_map)(struct page *page)) 21838c2ecf20Sopenharmony_ci{ 21848c2ecf20Sopenharmony_ci int i; 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci for (i = 0; i < area->nr_pages; i++) 21878c2ecf20Sopenharmony_ci if (page_address(area->pages[i])) 21888c2ecf20Sopenharmony_ci set_direct_map(area->pages[i]); 21898c2ecf20Sopenharmony_ci} 21908c2ecf20Sopenharmony_ci 21918c2ecf20Sopenharmony_ci/* Handle removing and resetting vm mappings related to the vm_struct. */ 21928c2ecf20Sopenharmony_cistatic void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) 21938c2ecf20Sopenharmony_ci{ 21948c2ecf20Sopenharmony_ci unsigned long start = ULONG_MAX, end = 0; 21958c2ecf20Sopenharmony_ci int flush_reset = area->flags & VM_FLUSH_RESET_PERMS; 21968c2ecf20Sopenharmony_ci int flush_dmap = 0; 21978c2ecf20Sopenharmony_ci int i; 21988c2ecf20Sopenharmony_ci 21998c2ecf20Sopenharmony_ci remove_vm_area(area->addr); 22008c2ecf20Sopenharmony_ci 22018c2ecf20Sopenharmony_ci /* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */ 22028c2ecf20Sopenharmony_ci if (!flush_reset) 22038c2ecf20Sopenharmony_ci return; 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci /* 22068c2ecf20Sopenharmony_ci * If not deallocating pages, just do the flush of the VM area and 22078c2ecf20Sopenharmony_ci * return. 22088c2ecf20Sopenharmony_ci */ 22098c2ecf20Sopenharmony_ci if (!deallocate_pages) { 22108c2ecf20Sopenharmony_ci vm_unmap_aliases(); 22118c2ecf20Sopenharmony_ci return; 22128c2ecf20Sopenharmony_ci } 22138c2ecf20Sopenharmony_ci 22148c2ecf20Sopenharmony_ci /* 22158c2ecf20Sopenharmony_ci * If execution gets here, flush the vm mapping and reset the direct 22168c2ecf20Sopenharmony_ci * map. Find the start and end range of the direct mappings to make sure 22178c2ecf20Sopenharmony_ci * the vm_unmap_aliases() flush includes the direct map. 22188c2ecf20Sopenharmony_ci */ 22198c2ecf20Sopenharmony_ci for (i = 0; i < area->nr_pages; i++) { 22208c2ecf20Sopenharmony_ci unsigned long addr = (unsigned long)page_address(area->pages[i]); 22218c2ecf20Sopenharmony_ci if (addr) { 22228c2ecf20Sopenharmony_ci start = min(addr, start); 22238c2ecf20Sopenharmony_ci end = max(addr + PAGE_SIZE, end); 22248c2ecf20Sopenharmony_ci flush_dmap = 1; 22258c2ecf20Sopenharmony_ci } 22268c2ecf20Sopenharmony_ci } 22278c2ecf20Sopenharmony_ci 22288c2ecf20Sopenharmony_ci /* 22298c2ecf20Sopenharmony_ci * Set direct map to something invalid so that it won't be cached if 22308c2ecf20Sopenharmony_ci * there are any accesses after the TLB flush, then flush the TLB and 22318c2ecf20Sopenharmony_ci * reset the direct map permissions to the default. 22328c2ecf20Sopenharmony_ci */ 22338c2ecf20Sopenharmony_ci set_area_direct_map(area, set_direct_map_invalid_noflush); 22348c2ecf20Sopenharmony_ci _vm_unmap_aliases(start, end, flush_dmap); 22358c2ecf20Sopenharmony_ci set_area_direct_map(area, set_direct_map_default_noflush); 22368c2ecf20Sopenharmony_ci} 22378c2ecf20Sopenharmony_ci 22388c2ecf20Sopenharmony_cistatic void __vunmap(const void *addr, int deallocate_pages) 22398c2ecf20Sopenharmony_ci{ 22408c2ecf20Sopenharmony_ci struct vm_struct *area; 22418c2ecf20Sopenharmony_ci 22428c2ecf20Sopenharmony_ci if (!addr) 22438c2ecf20Sopenharmony_ci return; 22448c2ecf20Sopenharmony_ci 22458c2ecf20Sopenharmony_ci if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n", 22468c2ecf20Sopenharmony_ci addr)) 22478c2ecf20Sopenharmony_ci return; 22488c2ecf20Sopenharmony_ci 22498c2ecf20Sopenharmony_ci area = find_vm_area(addr); 22508c2ecf20Sopenharmony_ci if (unlikely(!area)) { 22518c2ecf20Sopenharmony_ci WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", 22528c2ecf20Sopenharmony_ci addr); 22538c2ecf20Sopenharmony_ci return; 22548c2ecf20Sopenharmony_ci } 22558c2ecf20Sopenharmony_ci 22568c2ecf20Sopenharmony_ci debug_check_no_locks_freed(area->addr, get_vm_area_size(area)); 22578c2ecf20Sopenharmony_ci debug_check_no_obj_freed(area->addr, get_vm_area_size(area)); 22588c2ecf20Sopenharmony_ci 22598c2ecf20Sopenharmony_ci kasan_poison_vmalloc(area->addr, get_vm_area_size(area)); 22608c2ecf20Sopenharmony_ci 22618c2ecf20Sopenharmony_ci vm_remove_mappings(area, deallocate_pages); 22628c2ecf20Sopenharmony_ci 22638c2ecf20Sopenharmony_ci if (deallocate_pages) { 22648c2ecf20Sopenharmony_ci int i; 22658c2ecf20Sopenharmony_ci 22668c2ecf20Sopenharmony_ci for (i = 0; i < area->nr_pages; i++) { 22678c2ecf20Sopenharmony_ci struct page *page = area->pages[i]; 22688c2ecf20Sopenharmony_ci 22698c2ecf20Sopenharmony_ci BUG_ON(!page); 22708c2ecf20Sopenharmony_ci __free_pages(page, 0); 22718c2ecf20Sopenharmony_ci } 22728c2ecf20Sopenharmony_ci atomic_long_sub(area->nr_pages, &nr_vmalloc_pages); 22738c2ecf20Sopenharmony_ci 22748c2ecf20Sopenharmony_ci kvfree(area->pages); 22758c2ecf20Sopenharmony_ci } 22768c2ecf20Sopenharmony_ci 22778c2ecf20Sopenharmony_ci kfree(area); 22788c2ecf20Sopenharmony_ci return; 22798c2ecf20Sopenharmony_ci} 22808c2ecf20Sopenharmony_ci 22818c2ecf20Sopenharmony_cistatic inline void __vfree_deferred(const void *addr) 22828c2ecf20Sopenharmony_ci{ 22838c2ecf20Sopenharmony_ci /* 22848c2ecf20Sopenharmony_ci * Use raw_cpu_ptr() because this can be called from preemptible 22858c2ecf20Sopenharmony_ci * context. Preemption is absolutely fine here, because the llist_add() 22868c2ecf20Sopenharmony_ci * implementation is lockless, so it works even if we are adding to 22878c2ecf20Sopenharmony_ci * another cpu's list. schedule_work() should be fine with this too. 22888c2ecf20Sopenharmony_ci */ 22898c2ecf20Sopenharmony_ci struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred); 22908c2ecf20Sopenharmony_ci 22918c2ecf20Sopenharmony_ci if (llist_add((struct llist_node *)addr, &p->list)) 22928c2ecf20Sopenharmony_ci schedule_work(&p->wq); 22938c2ecf20Sopenharmony_ci} 22948c2ecf20Sopenharmony_ci 22958c2ecf20Sopenharmony_ci/** 22968c2ecf20Sopenharmony_ci * vfree_atomic - release memory allocated by vmalloc() 22978c2ecf20Sopenharmony_ci * @addr: memory base address 22988c2ecf20Sopenharmony_ci * 22998c2ecf20Sopenharmony_ci * This one is just like vfree() but can be called in any atomic context 23008c2ecf20Sopenharmony_ci * except NMIs. 23018c2ecf20Sopenharmony_ci */ 23028c2ecf20Sopenharmony_civoid vfree_atomic(const void *addr) 23038c2ecf20Sopenharmony_ci{ 23048c2ecf20Sopenharmony_ci BUG_ON(in_nmi()); 23058c2ecf20Sopenharmony_ci 23068c2ecf20Sopenharmony_ci kmemleak_free(addr); 23078c2ecf20Sopenharmony_ci 23088c2ecf20Sopenharmony_ci if (!addr) 23098c2ecf20Sopenharmony_ci return; 23108c2ecf20Sopenharmony_ci __vfree_deferred(addr); 23118c2ecf20Sopenharmony_ci} 23128c2ecf20Sopenharmony_ci 23138c2ecf20Sopenharmony_cistatic void __vfree(const void *addr) 23148c2ecf20Sopenharmony_ci{ 23158c2ecf20Sopenharmony_ci if (unlikely(in_interrupt())) 23168c2ecf20Sopenharmony_ci __vfree_deferred(addr); 23178c2ecf20Sopenharmony_ci else 23188c2ecf20Sopenharmony_ci __vunmap(addr, 1); 23198c2ecf20Sopenharmony_ci} 23208c2ecf20Sopenharmony_ci 23218c2ecf20Sopenharmony_ci/** 23228c2ecf20Sopenharmony_ci * vfree - Release memory allocated by vmalloc() 23238c2ecf20Sopenharmony_ci * @addr: Memory base address 23248c2ecf20Sopenharmony_ci * 23258c2ecf20Sopenharmony_ci * Free the virtually continuous memory area starting at @addr, as obtained 23268c2ecf20Sopenharmony_ci * from one of the vmalloc() family of APIs. This will usually also free the 23278c2ecf20Sopenharmony_ci * physical memory underlying the virtual allocation, but that memory is 23288c2ecf20Sopenharmony_ci * reference counted, so it will not be freed until the last user goes away. 23298c2ecf20Sopenharmony_ci * 23308c2ecf20Sopenharmony_ci * If @addr is NULL, no operation is performed. 23318c2ecf20Sopenharmony_ci * 23328c2ecf20Sopenharmony_ci * Context: 23338c2ecf20Sopenharmony_ci * May sleep if called *not* from interrupt context. 23348c2ecf20Sopenharmony_ci * Must not be called in NMI context (strictly speaking, it could be 23358c2ecf20Sopenharmony_ci * if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling 23368c2ecf20Sopenharmony_ci * conventions for vfree() arch-depenedent would be a really bad idea). 23378c2ecf20Sopenharmony_ci */ 23388c2ecf20Sopenharmony_civoid vfree(const void *addr) 23398c2ecf20Sopenharmony_ci{ 23408c2ecf20Sopenharmony_ci BUG_ON(in_nmi()); 23418c2ecf20Sopenharmony_ci 23428c2ecf20Sopenharmony_ci kmemleak_free(addr); 23438c2ecf20Sopenharmony_ci 23448c2ecf20Sopenharmony_ci might_sleep_if(!in_interrupt()); 23458c2ecf20Sopenharmony_ci 23468c2ecf20Sopenharmony_ci if (!addr) 23478c2ecf20Sopenharmony_ci return; 23488c2ecf20Sopenharmony_ci 23498c2ecf20Sopenharmony_ci __vfree(addr); 23508c2ecf20Sopenharmony_ci} 23518c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vfree); 23528c2ecf20Sopenharmony_ci 23538c2ecf20Sopenharmony_ci/** 23548c2ecf20Sopenharmony_ci * vunmap - release virtual mapping obtained by vmap() 23558c2ecf20Sopenharmony_ci * @addr: memory base address 23568c2ecf20Sopenharmony_ci * 23578c2ecf20Sopenharmony_ci * Free the virtually contiguous memory area starting at @addr, 23588c2ecf20Sopenharmony_ci * which was created from the page array passed to vmap(). 23598c2ecf20Sopenharmony_ci * 23608c2ecf20Sopenharmony_ci * Must not be called in interrupt context. 23618c2ecf20Sopenharmony_ci */ 23628c2ecf20Sopenharmony_civoid vunmap(const void *addr) 23638c2ecf20Sopenharmony_ci{ 23648c2ecf20Sopenharmony_ci BUG_ON(in_interrupt()); 23658c2ecf20Sopenharmony_ci might_sleep(); 23668c2ecf20Sopenharmony_ci if (addr) 23678c2ecf20Sopenharmony_ci __vunmap(addr, 0); 23688c2ecf20Sopenharmony_ci} 23698c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vunmap); 23708c2ecf20Sopenharmony_ci 23718c2ecf20Sopenharmony_ci/** 23728c2ecf20Sopenharmony_ci * vmap - map an array of pages into virtually contiguous space 23738c2ecf20Sopenharmony_ci * @pages: array of page pointers 23748c2ecf20Sopenharmony_ci * @count: number of pages to map 23758c2ecf20Sopenharmony_ci * @flags: vm_area->flags 23768c2ecf20Sopenharmony_ci * @prot: page protection for the mapping 23778c2ecf20Sopenharmony_ci * 23788c2ecf20Sopenharmony_ci * Maps @count pages from @pages into contiguous kernel virtual space. 23798c2ecf20Sopenharmony_ci * If @flags contains %VM_MAP_PUT_PAGES the ownership of the pages array itself 23808c2ecf20Sopenharmony_ci * (which must be kmalloc or vmalloc memory) and one reference per pages in it 23818c2ecf20Sopenharmony_ci * are transferred from the caller to vmap(), and will be freed / dropped when 23828c2ecf20Sopenharmony_ci * vfree() is called on the return value. 23838c2ecf20Sopenharmony_ci * 23848c2ecf20Sopenharmony_ci * Return: the address of the area or %NULL on failure 23858c2ecf20Sopenharmony_ci */ 23868c2ecf20Sopenharmony_civoid *vmap(struct page **pages, unsigned int count, 23878c2ecf20Sopenharmony_ci unsigned long flags, pgprot_t prot) 23888c2ecf20Sopenharmony_ci{ 23898c2ecf20Sopenharmony_ci struct vm_struct *area; 23908c2ecf20Sopenharmony_ci unsigned long size; /* In bytes */ 23918c2ecf20Sopenharmony_ci 23928c2ecf20Sopenharmony_ci might_sleep(); 23938c2ecf20Sopenharmony_ci 23948c2ecf20Sopenharmony_ci if (count > totalram_pages()) 23958c2ecf20Sopenharmony_ci return NULL; 23968c2ecf20Sopenharmony_ci 23978c2ecf20Sopenharmony_ci size = (unsigned long)count << PAGE_SHIFT; 23988c2ecf20Sopenharmony_ci area = get_vm_area_caller(size, flags, __builtin_return_address(0)); 23998c2ecf20Sopenharmony_ci if (!area) 24008c2ecf20Sopenharmony_ci return NULL; 24018c2ecf20Sopenharmony_ci 24028c2ecf20Sopenharmony_ci if (map_kernel_range((unsigned long)area->addr, size, pgprot_nx(prot), 24038c2ecf20Sopenharmony_ci pages) < 0) { 24048c2ecf20Sopenharmony_ci vunmap(area->addr); 24058c2ecf20Sopenharmony_ci return NULL; 24068c2ecf20Sopenharmony_ci } 24078c2ecf20Sopenharmony_ci 24088c2ecf20Sopenharmony_ci if (flags & VM_MAP_PUT_PAGES) { 24098c2ecf20Sopenharmony_ci area->pages = pages; 24108c2ecf20Sopenharmony_ci area->nr_pages = count; 24118c2ecf20Sopenharmony_ci } 24128c2ecf20Sopenharmony_ci return area->addr; 24138c2ecf20Sopenharmony_ci} 24148c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vmap); 24158c2ecf20Sopenharmony_ci 24168c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_PFN 24178c2ecf20Sopenharmony_cistruct vmap_pfn_data { 24188c2ecf20Sopenharmony_ci unsigned long *pfns; 24198c2ecf20Sopenharmony_ci pgprot_t prot; 24208c2ecf20Sopenharmony_ci unsigned int idx; 24218c2ecf20Sopenharmony_ci}; 24228c2ecf20Sopenharmony_ci 24238c2ecf20Sopenharmony_cistatic int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private) 24248c2ecf20Sopenharmony_ci{ 24258c2ecf20Sopenharmony_ci struct vmap_pfn_data *data = private; 24268c2ecf20Sopenharmony_ci 24278c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx]))) 24288c2ecf20Sopenharmony_ci return -EINVAL; 24298c2ecf20Sopenharmony_ci *pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot)); 24308c2ecf20Sopenharmony_ci return 0; 24318c2ecf20Sopenharmony_ci} 24328c2ecf20Sopenharmony_ci 24338c2ecf20Sopenharmony_ci/** 24348c2ecf20Sopenharmony_ci * vmap_pfn - map an array of PFNs into virtually contiguous space 24358c2ecf20Sopenharmony_ci * @pfns: array of PFNs 24368c2ecf20Sopenharmony_ci * @count: number of pages to map 24378c2ecf20Sopenharmony_ci * @prot: page protection for the mapping 24388c2ecf20Sopenharmony_ci * 24398c2ecf20Sopenharmony_ci * Maps @count PFNs from @pfns into contiguous kernel virtual space and returns 24408c2ecf20Sopenharmony_ci * the start address of the mapping. 24418c2ecf20Sopenharmony_ci */ 24428c2ecf20Sopenharmony_civoid *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot) 24438c2ecf20Sopenharmony_ci{ 24448c2ecf20Sopenharmony_ci struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) }; 24458c2ecf20Sopenharmony_ci struct vm_struct *area; 24468c2ecf20Sopenharmony_ci 24478c2ecf20Sopenharmony_ci area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP, 24488c2ecf20Sopenharmony_ci __builtin_return_address(0)); 24498c2ecf20Sopenharmony_ci if (!area) 24508c2ecf20Sopenharmony_ci return NULL; 24518c2ecf20Sopenharmony_ci if (apply_to_page_range(&init_mm, (unsigned long)area->addr, 24528c2ecf20Sopenharmony_ci count * PAGE_SIZE, vmap_pfn_apply, &data)) { 24538c2ecf20Sopenharmony_ci free_vm_area(area); 24548c2ecf20Sopenharmony_ci return NULL; 24558c2ecf20Sopenharmony_ci } 24568c2ecf20Sopenharmony_ci 24578c2ecf20Sopenharmony_ci flush_cache_vmap((unsigned long)area->addr, 24588c2ecf20Sopenharmony_ci (unsigned long)area->addr + count * PAGE_SIZE); 24598c2ecf20Sopenharmony_ci 24608c2ecf20Sopenharmony_ci return area->addr; 24618c2ecf20Sopenharmony_ci} 24628c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vmap_pfn); 24638c2ecf20Sopenharmony_ci#endif /* CONFIG_VMAP_PFN */ 24648c2ecf20Sopenharmony_ci 24658c2ecf20Sopenharmony_cistatic void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, 24668c2ecf20Sopenharmony_ci pgprot_t prot, int node) 24678c2ecf20Sopenharmony_ci{ 24688c2ecf20Sopenharmony_ci const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; 24698c2ecf20Sopenharmony_ci unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; 24708c2ecf20Sopenharmony_ci unsigned long array_size; 24718c2ecf20Sopenharmony_ci unsigned int i; 24728c2ecf20Sopenharmony_ci struct page **pages; 24738c2ecf20Sopenharmony_ci 24748c2ecf20Sopenharmony_ci array_size = (unsigned long)nr_pages * sizeof(struct page *); 24758c2ecf20Sopenharmony_ci gfp_mask |= __GFP_NOWARN; 24768c2ecf20Sopenharmony_ci if (!(gfp_mask & (GFP_DMA | GFP_DMA32))) 24778c2ecf20Sopenharmony_ci gfp_mask |= __GFP_HIGHMEM; 24788c2ecf20Sopenharmony_ci 24798c2ecf20Sopenharmony_ci /* Please note that the recursion is strictly bounded. */ 24808c2ecf20Sopenharmony_ci if (array_size > PAGE_SIZE) { 24818c2ecf20Sopenharmony_ci pages = __vmalloc_node(array_size, 1, nested_gfp, node, 24828c2ecf20Sopenharmony_ci area->caller); 24838c2ecf20Sopenharmony_ci } else { 24848c2ecf20Sopenharmony_ci pages = kmalloc_node(array_size, nested_gfp, node); 24858c2ecf20Sopenharmony_ci } 24868c2ecf20Sopenharmony_ci 24878c2ecf20Sopenharmony_ci if (!pages) { 24888c2ecf20Sopenharmony_ci remove_vm_area(area->addr); 24898c2ecf20Sopenharmony_ci kfree(area); 24908c2ecf20Sopenharmony_ci return NULL; 24918c2ecf20Sopenharmony_ci } 24928c2ecf20Sopenharmony_ci 24938c2ecf20Sopenharmony_ci area->pages = pages; 24948c2ecf20Sopenharmony_ci area->nr_pages = nr_pages; 24958c2ecf20Sopenharmony_ci 24968c2ecf20Sopenharmony_ci for (i = 0; i < area->nr_pages; i++) { 24978c2ecf20Sopenharmony_ci struct page *page; 24988c2ecf20Sopenharmony_ci 24998c2ecf20Sopenharmony_ci if (node == NUMA_NO_NODE) 25008c2ecf20Sopenharmony_ci page = alloc_page(gfp_mask); 25018c2ecf20Sopenharmony_ci else 25028c2ecf20Sopenharmony_ci page = alloc_pages_node(node, gfp_mask, 0); 25038c2ecf20Sopenharmony_ci 25048c2ecf20Sopenharmony_ci if (unlikely(!page)) { 25058c2ecf20Sopenharmony_ci /* Successfully allocated i pages, free them in __vfree() */ 25068c2ecf20Sopenharmony_ci area->nr_pages = i; 25078c2ecf20Sopenharmony_ci atomic_long_add(area->nr_pages, &nr_vmalloc_pages); 25088c2ecf20Sopenharmony_ci goto fail; 25098c2ecf20Sopenharmony_ci } 25108c2ecf20Sopenharmony_ci area->pages[i] = page; 25118c2ecf20Sopenharmony_ci if (gfpflags_allow_blocking(gfp_mask)) 25128c2ecf20Sopenharmony_ci cond_resched(); 25138c2ecf20Sopenharmony_ci } 25148c2ecf20Sopenharmony_ci atomic_long_add(area->nr_pages, &nr_vmalloc_pages); 25158c2ecf20Sopenharmony_ci 25168c2ecf20Sopenharmony_ci if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area), 25178c2ecf20Sopenharmony_ci prot, pages) < 0) 25188c2ecf20Sopenharmony_ci goto fail; 25198c2ecf20Sopenharmony_ci 25208c2ecf20Sopenharmony_ci return area->addr; 25218c2ecf20Sopenharmony_ci 25228c2ecf20Sopenharmony_cifail: 25238c2ecf20Sopenharmony_ci warn_alloc(gfp_mask, NULL, 25248c2ecf20Sopenharmony_ci "vmalloc: allocation failure, allocated %ld of %ld bytes", 25258c2ecf20Sopenharmony_ci (area->nr_pages*PAGE_SIZE), area->size); 25268c2ecf20Sopenharmony_ci __vfree(area->addr); 25278c2ecf20Sopenharmony_ci return NULL; 25288c2ecf20Sopenharmony_ci} 25298c2ecf20Sopenharmony_ci 25308c2ecf20Sopenharmony_ci/** 25318c2ecf20Sopenharmony_ci * __vmalloc_node_range - allocate virtually contiguous memory 25328c2ecf20Sopenharmony_ci * @size: allocation size 25338c2ecf20Sopenharmony_ci * @align: desired alignment 25348c2ecf20Sopenharmony_ci * @start: vm area range start 25358c2ecf20Sopenharmony_ci * @end: vm area range end 25368c2ecf20Sopenharmony_ci * @gfp_mask: flags for the page level allocator 25378c2ecf20Sopenharmony_ci * @prot: protection mask for the allocated pages 25388c2ecf20Sopenharmony_ci * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD) 25398c2ecf20Sopenharmony_ci * @node: node to use for allocation or NUMA_NO_NODE 25408c2ecf20Sopenharmony_ci * @caller: caller's return address 25418c2ecf20Sopenharmony_ci * 25428c2ecf20Sopenharmony_ci * Allocate enough pages to cover @size from the page level 25438c2ecf20Sopenharmony_ci * allocator with @gfp_mask flags. Map them into contiguous 25448c2ecf20Sopenharmony_ci * kernel virtual space, using a pagetable protection of @prot. 25458c2ecf20Sopenharmony_ci * 25468c2ecf20Sopenharmony_ci * Return: the address of the area or %NULL on failure 25478c2ecf20Sopenharmony_ci */ 25488c2ecf20Sopenharmony_civoid *__vmalloc_node_range(unsigned long size, unsigned long align, 25498c2ecf20Sopenharmony_ci unsigned long start, unsigned long end, gfp_t gfp_mask, 25508c2ecf20Sopenharmony_ci pgprot_t prot, unsigned long vm_flags, int node, 25518c2ecf20Sopenharmony_ci const void *caller) 25528c2ecf20Sopenharmony_ci{ 25538c2ecf20Sopenharmony_ci struct vm_struct *area; 25548c2ecf20Sopenharmony_ci void *addr; 25558c2ecf20Sopenharmony_ci unsigned long real_size = size; 25568c2ecf20Sopenharmony_ci 25578c2ecf20Sopenharmony_ci size = PAGE_ALIGN(size); 25588c2ecf20Sopenharmony_ci if (!size || (size >> PAGE_SHIFT) > totalram_pages()) 25598c2ecf20Sopenharmony_ci goto fail; 25608c2ecf20Sopenharmony_ci 25618c2ecf20Sopenharmony_ci area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED | 25628c2ecf20Sopenharmony_ci vm_flags, start, end, node, gfp_mask, caller); 25638c2ecf20Sopenharmony_ci if (!area) 25648c2ecf20Sopenharmony_ci goto fail; 25658c2ecf20Sopenharmony_ci 25668c2ecf20Sopenharmony_ci addr = __vmalloc_area_node(area, gfp_mask, prot, node); 25678c2ecf20Sopenharmony_ci if (!addr) 25688c2ecf20Sopenharmony_ci return NULL; 25698c2ecf20Sopenharmony_ci 25708c2ecf20Sopenharmony_ci /* 25718c2ecf20Sopenharmony_ci * In this function, newly allocated vm_struct has VM_UNINITIALIZED 25728c2ecf20Sopenharmony_ci * flag. It means that vm_struct is not fully initialized. 25738c2ecf20Sopenharmony_ci * Now, it is fully initialized, so remove this flag here. 25748c2ecf20Sopenharmony_ci */ 25758c2ecf20Sopenharmony_ci clear_vm_uninitialized_flag(area); 25768c2ecf20Sopenharmony_ci 25778c2ecf20Sopenharmony_ci kmemleak_vmalloc(area, size, gfp_mask); 25788c2ecf20Sopenharmony_ci 25798c2ecf20Sopenharmony_ci return addr; 25808c2ecf20Sopenharmony_ci 25818c2ecf20Sopenharmony_cifail: 25828c2ecf20Sopenharmony_ci warn_alloc(gfp_mask, NULL, 25838c2ecf20Sopenharmony_ci "vmalloc: allocation failure: %lu bytes", real_size); 25848c2ecf20Sopenharmony_ci return NULL; 25858c2ecf20Sopenharmony_ci} 25868c2ecf20Sopenharmony_ci 25878c2ecf20Sopenharmony_ci/** 25888c2ecf20Sopenharmony_ci * __vmalloc_node - allocate virtually contiguous memory 25898c2ecf20Sopenharmony_ci * @size: allocation size 25908c2ecf20Sopenharmony_ci * @align: desired alignment 25918c2ecf20Sopenharmony_ci * @gfp_mask: flags for the page level allocator 25928c2ecf20Sopenharmony_ci * @node: node to use for allocation or NUMA_NO_NODE 25938c2ecf20Sopenharmony_ci * @caller: caller's return address 25948c2ecf20Sopenharmony_ci * 25958c2ecf20Sopenharmony_ci * Allocate enough pages to cover @size from the page level allocator with 25968c2ecf20Sopenharmony_ci * @gfp_mask flags. Map them into contiguous kernel virtual space. 25978c2ecf20Sopenharmony_ci * 25988c2ecf20Sopenharmony_ci * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL 25998c2ecf20Sopenharmony_ci * and __GFP_NOFAIL are not supported 26008c2ecf20Sopenharmony_ci * 26018c2ecf20Sopenharmony_ci * Any use of gfp flags outside of GFP_KERNEL should be consulted 26028c2ecf20Sopenharmony_ci * with mm people. 26038c2ecf20Sopenharmony_ci * 26048c2ecf20Sopenharmony_ci * Return: pointer to the allocated memory or %NULL on error 26058c2ecf20Sopenharmony_ci */ 26068c2ecf20Sopenharmony_civoid *__vmalloc_node(unsigned long size, unsigned long align, 26078c2ecf20Sopenharmony_ci gfp_t gfp_mask, int node, const void *caller) 26088c2ecf20Sopenharmony_ci{ 26098c2ecf20Sopenharmony_ci return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 26108c2ecf20Sopenharmony_ci gfp_mask, PAGE_KERNEL, 0, node, caller); 26118c2ecf20Sopenharmony_ci} 26128c2ecf20Sopenharmony_ci/* 26138c2ecf20Sopenharmony_ci * This is only for performance analysis of vmalloc and stress purpose. 26148c2ecf20Sopenharmony_ci * It is required by vmalloc test module, therefore do not use it other 26158c2ecf20Sopenharmony_ci * than that. 26168c2ecf20Sopenharmony_ci */ 26178c2ecf20Sopenharmony_ci#ifdef CONFIG_TEST_VMALLOC_MODULE 26188c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__vmalloc_node); 26198c2ecf20Sopenharmony_ci#endif 26208c2ecf20Sopenharmony_ci 26218c2ecf20Sopenharmony_civoid *__vmalloc(unsigned long size, gfp_t gfp_mask) 26228c2ecf20Sopenharmony_ci{ 26238c2ecf20Sopenharmony_ci return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE, 26248c2ecf20Sopenharmony_ci __builtin_return_address(0)); 26258c2ecf20Sopenharmony_ci} 26268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__vmalloc); 26278c2ecf20Sopenharmony_ci 26288c2ecf20Sopenharmony_ci/** 26298c2ecf20Sopenharmony_ci * vmalloc - allocate virtually contiguous memory 26308c2ecf20Sopenharmony_ci * @size: allocation size 26318c2ecf20Sopenharmony_ci * 26328c2ecf20Sopenharmony_ci * Allocate enough pages to cover @size from the page level 26338c2ecf20Sopenharmony_ci * allocator and map them into contiguous kernel virtual space. 26348c2ecf20Sopenharmony_ci * 26358c2ecf20Sopenharmony_ci * For tight control over page level allocator and protection flags 26368c2ecf20Sopenharmony_ci * use __vmalloc() instead. 26378c2ecf20Sopenharmony_ci * 26388c2ecf20Sopenharmony_ci * Return: pointer to the allocated memory or %NULL on error 26398c2ecf20Sopenharmony_ci */ 26408c2ecf20Sopenharmony_civoid *vmalloc(unsigned long size) 26418c2ecf20Sopenharmony_ci{ 26428c2ecf20Sopenharmony_ci return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE, 26438c2ecf20Sopenharmony_ci __builtin_return_address(0)); 26448c2ecf20Sopenharmony_ci} 26458c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vmalloc); 26468c2ecf20Sopenharmony_ci 26478c2ecf20Sopenharmony_ci/** 26488c2ecf20Sopenharmony_ci * vzalloc - allocate virtually contiguous memory with zero fill 26498c2ecf20Sopenharmony_ci * @size: allocation size 26508c2ecf20Sopenharmony_ci * 26518c2ecf20Sopenharmony_ci * Allocate enough pages to cover @size from the page level 26528c2ecf20Sopenharmony_ci * allocator and map them into contiguous kernel virtual space. 26538c2ecf20Sopenharmony_ci * The memory allocated is set to zero. 26548c2ecf20Sopenharmony_ci * 26558c2ecf20Sopenharmony_ci * For tight control over page level allocator and protection flags 26568c2ecf20Sopenharmony_ci * use __vmalloc() instead. 26578c2ecf20Sopenharmony_ci * 26588c2ecf20Sopenharmony_ci * Return: pointer to the allocated memory or %NULL on error 26598c2ecf20Sopenharmony_ci */ 26608c2ecf20Sopenharmony_civoid *vzalloc(unsigned long size) 26618c2ecf20Sopenharmony_ci{ 26628c2ecf20Sopenharmony_ci return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 26638c2ecf20Sopenharmony_ci __builtin_return_address(0)); 26648c2ecf20Sopenharmony_ci} 26658c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vzalloc); 26668c2ecf20Sopenharmony_ci 26678c2ecf20Sopenharmony_ci/** 26688c2ecf20Sopenharmony_ci * vmalloc_user - allocate zeroed virtually contiguous memory for userspace 26698c2ecf20Sopenharmony_ci * @size: allocation size 26708c2ecf20Sopenharmony_ci * 26718c2ecf20Sopenharmony_ci * The resulting memory area is zeroed so it can be mapped to userspace 26728c2ecf20Sopenharmony_ci * without leaking data. 26738c2ecf20Sopenharmony_ci * 26748c2ecf20Sopenharmony_ci * Return: pointer to the allocated memory or %NULL on error 26758c2ecf20Sopenharmony_ci */ 26768c2ecf20Sopenharmony_civoid *vmalloc_user(unsigned long size) 26778c2ecf20Sopenharmony_ci{ 26788c2ecf20Sopenharmony_ci return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, 26798c2ecf20Sopenharmony_ci GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, 26808c2ecf20Sopenharmony_ci VM_USERMAP, NUMA_NO_NODE, 26818c2ecf20Sopenharmony_ci __builtin_return_address(0)); 26828c2ecf20Sopenharmony_ci} 26838c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vmalloc_user); 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci/** 26868c2ecf20Sopenharmony_ci * vmalloc_node - allocate memory on a specific node 26878c2ecf20Sopenharmony_ci * @size: allocation size 26888c2ecf20Sopenharmony_ci * @node: numa node 26898c2ecf20Sopenharmony_ci * 26908c2ecf20Sopenharmony_ci * Allocate enough pages to cover @size from the page level 26918c2ecf20Sopenharmony_ci * allocator and map them into contiguous kernel virtual space. 26928c2ecf20Sopenharmony_ci * 26938c2ecf20Sopenharmony_ci * For tight control over page level allocator and protection flags 26948c2ecf20Sopenharmony_ci * use __vmalloc() instead. 26958c2ecf20Sopenharmony_ci * 26968c2ecf20Sopenharmony_ci * Return: pointer to the allocated memory or %NULL on error 26978c2ecf20Sopenharmony_ci */ 26988c2ecf20Sopenharmony_civoid *vmalloc_node(unsigned long size, int node) 26998c2ecf20Sopenharmony_ci{ 27008c2ecf20Sopenharmony_ci return __vmalloc_node(size, 1, GFP_KERNEL, node, 27018c2ecf20Sopenharmony_ci __builtin_return_address(0)); 27028c2ecf20Sopenharmony_ci} 27038c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vmalloc_node); 27048c2ecf20Sopenharmony_ci 27058c2ecf20Sopenharmony_ci/** 27068c2ecf20Sopenharmony_ci * vzalloc_node - allocate memory on a specific node with zero fill 27078c2ecf20Sopenharmony_ci * @size: allocation size 27088c2ecf20Sopenharmony_ci * @node: numa node 27098c2ecf20Sopenharmony_ci * 27108c2ecf20Sopenharmony_ci * Allocate enough pages to cover @size from the page level 27118c2ecf20Sopenharmony_ci * allocator and map them into contiguous kernel virtual space. 27128c2ecf20Sopenharmony_ci * The memory allocated is set to zero. 27138c2ecf20Sopenharmony_ci * 27148c2ecf20Sopenharmony_ci * Return: pointer to the allocated memory or %NULL on error 27158c2ecf20Sopenharmony_ci */ 27168c2ecf20Sopenharmony_civoid *vzalloc_node(unsigned long size, int node) 27178c2ecf20Sopenharmony_ci{ 27188c2ecf20Sopenharmony_ci return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node, 27198c2ecf20Sopenharmony_ci __builtin_return_address(0)); 27208c2ecf20Sopenharmony_ci} 27218c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vzalloc_node); 27228c2ecf20Sopenharmony_ci 27238c2ecf20Sopenharmony_ci#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) 27248c2ecf20Sopenharmony_ci#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) 27258c2ecf20Sopenharmony_ci#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) 27268c2ecf20Sopenharmony_ci#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL) 27278c2ecf20Sopenharmony_ci#else 27288c2ecf20Sopenharmony_ci/* 27298c2ecf20Sopenharmony_ci * 64b systems should always have either DMA or DMA32 zones. For others 27308c2ecf20Sopenharmony_ci * GFP_DMA32 should do the right thing and use the normal zone. 27318c2ecf20Sopenharmony_ci */ 27328c2ecf20Sopenharmony_ci#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL 27338c2ecf20Sopenharmony_ci#endif 27348c2ecf20Sopenharmony_ci 27358c2ecf20Sopenharmony_ci/** 27368c2ecf20Sopenharmony_ci * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 27378c2ecf20Sopenharmony_ci * @size: allocation size 27388c2ecf20Sopenharmony_ci * 27398c2ecf20Sopenharmony_ci * Allocate enough 32bit PA addressable pages to cover @size from the 27408c2ecf20Sopenharmony_ci * page level allocator and map them into contiguous kernel virtual space. 27418c2ecf20Sopenharmony_ci * 27428c2ecf20Sopenharmony_ci * Return: pointer to the allocated memory or %NULL on error 27438c2ecf20Sopenharmony_ci */ 27448c2ecf20Sopenharmony_civoid *vmalloc_32(unsigned long size) 27458c2ecf20Sopenharmony_ci{ 27468c2ecf20Sopenharmony_ci return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE, 27478c2ecf20Sopenharmony_ci __builtin_return_address(0)); 27488c2ecf20Sopenharmony_ci} 27498c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vmalloc_32); 27508c2ecf20Sopenharmony_ci 27518c2ecf20Sopenharmony_ci/** 27528c2ecf20Sopenharmony_ci * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 27538c2ecf20Sopenharmony_ci * @size: allocation size 27548c2ecf20Sopenharmony_ci * 27558c2ecf20Sopenharmony_ci * The resulting memory area is 32bit addressable and zeroed so it can be 27568c2ecf20Sopenharmony_ci * mapped to userspace without leaking data. 27578c2ecf20Sopenharmony_ci * 27588c2ecf20Sopenharmony_ci * Return: pointer to the allocated memory or %NULL on error 27598c2ecf20Sopenharmony_ci */ 27608c2ecf20Sopenharmony_civoid *vmalloc_32_user(unsigned long size) 27618c2ecf20Sopenharmony_ci{ 27628c2ecf20Sopenharmony_ci return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, 27638c2ecf20Sopenharmony_ci GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, 27648c2ecf20Sopenharmony_ci VM_USERMAP, NUMA_NO_NODE, 27658c2ecf20Sopenharmony_ci __builtin_return_address(0)); 27668c2ecf20Sopenharmony_ci} 27678c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vmalloc_32_user); 27688c2ecf20Sopenharmony_ci 27698c2ecf20Sopenharmony_ci/* 27708c2ecf20Sopenharmony_ci * small helper routine , copy contents to buf from addr. 27718c2ecf20Sopenharmony_ci * If the page is not present, fill zero. 27728c2ecf20Sopenharmony_ci */ 27738c2ecf20Sopenharmony_ci 27748c2ecf20Sopenharmony_cistatic int aligned_vread(char *buf, char *addr, unsigned long count) 27758c2ecf20Sopenharmony_ci{ 27768c2ecf20Sopenharmony_ci struct page *p; 27778c2ecf20Sopenharmony_ci int copied = 0; 27788c2ecf20Sopenharmony_ci 27798c2ecf20Sopenharmony_ci while (count) { 27808c2ecf20Sopenharmony_ci unsigned long offset, length; 27818c2ecf20Sopenharmony_ci 27828c2ecf20Sopenharmony_ci offset = offset_in_page(addr); 27838c2ecf20Sopenharmony_ci length = PAGE_SIZE - offset; 27848c2ecf20Sopenharmony_ci if (length > count) 27858c2ecf20Sopenharmony_ci length = count; 27868c2ecf20Sopenharmony_ci p = vmalloc_to_page(addr); 27878c2ecf20Sopenharmony_ci /* 27888c2ecf20Sopenharmony_ci * To do safe access to this _mapped_ area, we need 27898c2ecf20Sopenharmony_ci * lock. But adding lock here means that we need to add 27908c2ecf20Sopenharmony_ci * overhead of vmalloc()/vfree() calles for this _debug_ 27918c2ecf20Sopenharmony_ci * interface, rarely used. Instead of that, we'll use 27928c2ecf20Sopenharmony_ci * kmap() and get small overhead in this access function. 27938c2ecf20Sopenharmony_ci */ 27948c2ecf20Sopenharmony_ci if (p) { 27958c2ecf20Sopenharmony_ci /* 27968c2ecf20Sopenharmony_ci * we can expect USER0 is not used (see vread/vwrite's 27978c2ecf20Sopenharmony_ci * function description) 27988c2ecf20Sopenharmony_ci */ 27998c2ecf20Sopenharmony_ci void *map = kmap_atomic(p); 28008c2ecf20Sopenharmony_ci memcpy(buf, map + offset, length); 28018c2ecf20Sopenharmony_ci kunmap_atomic(map); 28028c2ecf20Sopenharmony_ci } else 28038c2ecf20Sopenharmony_ci memset(buf, 0, length); 28048c2ecf20Sopenharmony_ci 28058c2ecf20Sopenharmony_ci addr += length; 28068c2ecf20Sopenharmony_ci buf += length; 28078c2ecf20Sopenharmony_ci copied += length; 28088c2ecf20Sopenharmony_ci count -= length; 28098c2ecf20Sopenharmony_ci } 28108c2ecf20Sopenharmony_ci return copied; 28118c2ecf20Sopenharmony_ci} 28128c2ecf20Sopenharmony_ci 28138c2ecf20Sopenharmony_cistatic int aligned_vwrite(char *buf, char *addr, unsigned long count) 28148c2ecf20Sopenharmony_ci{ 28158c2ecf20Sopenharmony_ci struct page *p; 28168c2ecf20Sopenharmony_ci int copied = 0; 28178c2ecf20Sopenharmony_ci 28188c2ecf20Sopenharmony_ci while (count) { 28198c2ecf20Sopenharmony_ci unsigned long offset, length; 28208c2ecf20Sopenharmony_ci 28218c2ecf20Sopenharmony_ci offset = offset_in_page(addr); 28228c2ecf20Sopenharmony_ci length = PAGE_SIZE - offset; 28238c2ecf20Sopenharmony_ci if (length > count) 28248c2ecf20Sopenharmony_ci length = count; 28258c2ecf20Sopenharmony_ci p = vmalloc_to_page(addr); 28268c2ecf20Sopenharmony_ci /* 28278c2ecf20Sopenharmony_ci * To do safe access to this _mapped_ area, we need 28288c2ecf20Sopenharmony_ci * lock. But adding lock here means that we need to add 28298c2ecf20Sopenharmony_ci * overhead of vmalloc()/vfree() calles for this _debug_ 28308c2ecf20Sopenharmony_ci * interface, rarely used. Instead of that, we'll use 28318c2ecf20Sopenharmony_ci * kmap() and get small overhead in this access function. 28328c2ecf20Sopenharmony_ci */ 28338c2ecf20Sopenharmony_ci if (p) { 28348c2ecf20Sopenharmony_ci /* 28358c2ecf20Sopenharmony_ci * we can expect USER0 is not used (see vread/vwrite's 28368c2ecf20Sopenharmony_ci * function description) 28378c2ecf20Sopenharmony_ci */ 28388c2ecf20Sopenharmony_ci void *map = kmap_atomic(p); 28398c2ecf20Sopenharmony_ci memcpy(map + offset, buf, length); 28408c2ecf20Sopenharmony_ci kunmap_atomic(map); 28418c2ecf20Sopenharmony_ci } 28428c2ecf20Sopenharmony_ci addr += length; 28438c2ecf20Sopenharmony_ci buf += length; 28448c2ecf20Sopenharmony_ci copied += length; 28458c2ecf20Sopenharmony_ci count -= length; 28468c2ecf20Sopenharmony_ci } 28478c2ecf20Sopenharmony_ci return copied; 28488c2ecf20Sopenharmony_ci} 28498c2ecf20Sopenharmony_ci 28508c2ecf20Sopenharmony_ci/** 28518c2ecf20Sopenharmony_ci * vread() - read vmalloc area in a safe way. 28528c2ecf20Sopenharmony_ci * @buf: buffer for reading data 28538c2ecf20Sopenharmony_ci * @addr: vm address. 28548c2ecf20Sopenharmony_ci * @count: number of bytes to be read. 28558c2ecf20Sopenharmony_ci * 28568c2ecf20Sopenharmony_ci * This function checks that addr is a valid vmalloc'ed area, and 28578c2ecf20Sopenharmony_ci * copy data from that area to a given buffer. If the given memory range 28588c2ecf20Sopenharmony_ci * of [addr...addr+count) includes some valid address, data is copied to 28598c2ecf20Sopenharmony_ci * proper area of @buf. If there are memory holes, they'll be zero-filled. 28608c2ecf20Sopenharmony_ci * IOREMAP area is treated as memory hole and no copy is done. 28618c2ecf20Sopenharmony_ci * 28628c2ecf20Sopenharmony_ci * If [addr...addr+count) doesn't includes any intersects with alive 28638c2ecf20Sopenharmony_ci * vm_struct area, returns 0. @buf should be kernel's buffer. 28648c2ecf20Sopenharmony_ci * 28658c2ecf20Sopenharmony_ci * Note: In usual ops, vread() is never necessary because the caller 28668c2ecf20Sopenharmony_ci * should know vmalloc() area is valid and can use memcpy(). 28678c2ecf20Sopenharmony_ci * This is for routines which have to access vmalloc area without 28688c2ecf20Sopenharmony_ci * any information, as /dev/kmem. 28698c2ecf20Sopenharmony_ci * 28708c2ecf20Sopenharmony_ci * Return: number of bytes for which addr and buf should be increased 28718c2ecf20Sopenharmony_ci * (same number as @count) or %0 if [addr...addr+count) doesn't 28728c2ecf20Sopenharmony_ci * include any intersection with valid vmalloc area 28738c2ecf20Sopenharmony_ci */ 28748c2ecf20Sopenharmony_cilong vread(char *buf, char *addr, unsigned long count) 28758c2ecf20Sopenharmony_ci{ 28768c2ecf20Sopenharmony_ci struct vmap_area *va; 28778c2ecf20Sopenharmony_ci struct vm_struct *vm; 28788c2ecf20Sopenharmony_ci char *vaddr, *buf_start = buf; 28798c2ecf20Sopenharmony_ci unsigned long buflen = count; 28808c2ecf20Sopenharmony_ci unsigned long n; 28818c2ecf20Sopenharmony_ci 28828c2ecf20Sopenharmony_ci /* Don't allow overflow */ 28838c2ecf20Sopenharmony_ci if ((unsigned long) addr + count < count) 28848c2ecf20Sopenharmony_ci count = -(unsigned long) addr; 28858c2ecf20Sopenharmony_ci 28868c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 28878c2ecf20Sopenharmony_ci list_for_each_entry(va, &vmap_area_list, list) { 28888c2ecf20Sopenharmony_ci if (!count) 28898c2ecf20Sopenharmony_ci break; 28908c2ecf20Sopenharmony_ci 28918c2ecf20Sopenharmony_ci if (!va->vm) 28928c2ecf20Sopenharmony_ci continue; 28938c2ecf20Sopenharmony_ci 28948c2ecf20Sopenharmony_ci vm = va->vm; 28958c2ecf20Sopenharmony_ci vaddr = (char *) vm->addr; 28968c2ecf20Sopenharmony_ci if (addr >= vaddr + get_vm_area_size(vm)) 28978c2ecf20Sopenharmony_ci continue; 28988c2ecf20Sopenharmony_ci while (addr < vaddr) { 28998c2ecf20Sopenharmony_ci if (count == 0) 29008c2ecf20Sopenharmony_ci goto finished; 29018c2ecf20Sopenharmony_ci *buf = '\0'; 29028c2ecf20Sopenharmony_ci buf++; 29038c2ecf20Sopenharmony_ci addr++; 29048c2ecf20Sopenharmony_ci count--; 29058c2ecf20Sopenharmony_ci } 29068c2ecf20Sopenharmony_ci n = vaddr + get_vm_area_size(vm) - addr; 29078c2ecf20Sopenharmony_ci if (n > count) 29088c2ecf20Sopenharmony_ci n = count; 29098c2ecf20Sopenharmony_ci if (!(vm->flags & VM_IOREMAP)) 29108c2ecf20Sopenharmony_ci aligned_vread(buf, addr, n); 29118c2ecf20Sopenharmony_ci else /* IOREMAP area is treated as memory hole */ 29128c2ecf20Sopenharmony_ci memset(buf, 0, n); 29138c2ecf20Sopenharmony_ci buf += n; 29148c2ecf20Sopenharmony_ci addr += n; 29158c2ecf20Sopenharmony_ci count -= n; 29168c2ecf20Sopenharmony_ci } 29178c2ecf20Sopenharmony_cifinished: 29188c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 29198c2ecf20Sopenharmony_ci 29208c2ecf20Sopenharmony_ci if (buf == buf_start) 29218c2ecf20Sopenharmony_ci return 0; 29228c2ecf20Sopenharmony_ci /* zero-fill memory holes */ 29238c2ecf20Sopenharmony_ci if (buf != buf_start + buflen) 29248c2ecf20Sopenharmony_ci memset(buf, 0, buflen - (buf - buf_start)); 29258c2ecf20Sopenharmony_ci 29268c2ecf20Sopenharmony_ci return buflen; 29278c2ecf20Sopenharmony_ci} 29288c2ecf20Sopenharmony_ci 29298c2ecf20Sopenharmony_ci/** 29308c2ecf20Sopenharmony_ci * vwrite() - write vmalloc area in a safe way. 29318c2ecf20Sopenharmony_ci * @buf: buffer for source data 29328c2ecf20Sopenharmony_ci * @addr: vm address. 29338c2ecf20Sopenharmony_ci * @count: number of bytes to be read. 29348c2ecf20Sopenharmony_ci * 29358c2ecf20Sopenharmony_ci * This function checks that addr is a valid vmalloc'ed area, and 29368c2ecf20Sopenharmony_ci * copy data from a buffer to the given addr. If specified range of 29378c2ecf20Sopenharmony_ci * [addr...addr+count) includes some valid address, data is copied from 29388c2ecf20Sopenharmony_ci * proper area of @buf. If there are memory holes, no copy to hole. 29398c2ecf20Sopenharmony_ci * IOREMAP area is treated as memory hole and no copy is done. 29408c2ecf20Sopenharmony_ci * 29418c2ecf20Sopenharmony_ci * If [addr...addr+count) doesn't includes any intersects with alive 29428c2ecf20Sopenharmony_ci * vm_struct area, returns 0. @buf should be kernel's buffer. 29438c2ecf20Sopenharmony_ci * 29448c2ecf20Sopenharmony_ci * Note: In usual ops, vwrite() is never necessary because the caller 29458c2ecf20Sopenharmony_ci * should know vmalloc() area is valid and can use memcpy(). 29468c2ecf20Sopenharmony_ci * This is for routines which have to access vmalloc area without 29478c2ecf20Sopenharmony_ci * any information, as /dev/kmem. 29488c2ecf20Sopenharmony_ci * 29498c2ecf20Sopenharmony_ci * Return: number of bytes for which addr and buf should be 29508c2ecf20Sopenharmony_ci * increased (same number as @count) or %0 if [addr...addr+count) 29518c2ecf20Sopenharmony_ci * doesn't include any intersection with valid vmalloc area 29528c2ecf20Sopenharmony_ci */ 29538c2ecf20Sopenharmony_cilong vwrite(char *buf, char *addr, unsigned long count) 29548c2ecf20Sopenharmony_ci{ 29558c2ecf20Sopenharmony_ci struct vmap_area *va; 29568c2ecf20Sopenharmony_ci struct vm_struct *vm; 29578c2ecf20Sopenharmony_ci char *vaddr; 29588c2ecf20Sopenharmony_ci unsigned long n, buflen; 29598c2ecf20Sopenharmony_ci int copied = 0; 29608c2ecf20Sopenharmony_ci 29618c2ecf20Sopenharmony_ci /* Don't allow overflow */ 29628c2ecf20Sopenharmony_ci if ((unsigned long) addr + count < count) 29638c2ecf20Sopenharmony_ci count = -(unsigned long) addr; 29648c2ecf20Sopenharmony_ci buflen = count; 29658c2ecf20Sopenharmony_ci 29668c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 29678c2ecf20Sopenharmony_ci list_for_each_entry(va, &vmap_area_list, list) { 29688c2ecf20Sopenharmony_ci if (!count) 29698c2ecf20Sopenharmony_ci break; 29708c2ecf20Sopenharmony_ci 29718c2ecf20Sopenharmony_ci if (!va->vm) 29728c2ecf20Sopenharmony_ci continue; 29738c2ecf20Sopenharmony_ci 29748c2ecf20Sopenharmony_ci vm = va->vm; 29758c2ecf20Sopenharmony_ci vaddr = (char *) vm->addr; 29768c2ecf20Sopenharmony_ci if (addr >= vaddr + get_vm_area_size(vm)) 29778c2ecf20Sopenharmony_ci continue; 29788c2ecf20Sopenharmony_ci while (addr < vaddr) { 29798c2ecf20Sopenharmony_ci if (count == 0) 29808c2ecf20Sopenharmony_ci goto finished; 29818c2ecf20Sopenharmony_ci buf++; 29828c2ecf20Sopenharmony_ci addr++; 29838c2ecf20Sopenharmony_ci count--; 29848c2ecf20Sopenharmony_ci } 29858c2ecf20Sopenharmony_ci n = vaddr + get_vm_area_size(vm) - addr; 29868c2ecf20Sopenharmony_ci if (n > count) 29878c2ecf20Sopenharmony_ci n = count; 29888c2ecf20Sopenharmony_ci if (!(vm->flags & VM_IOREMAP)) { 29898c2ecf20Sopenharmony_ci aligned_vwrite(buf, addr, n); 29908c2ecf20Sopenharmony_ci copied++; 29918c2ecf20Sopenharmony_ci } 29928c2ecf20Sopenharmony_ci buf += n; 29938c2ecf20Sopenharmony_ci addr += n; 29948c2ecf20Sopenharmony_ci count -= n; 29958c2ecf20Sopenharmony_ci } 29968c2ecf20Sopenharmony_cifinished: 29978c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 29988c2ecf20Sopenharmony_ci if (!copied) 29998c2ecf20Sopenharmony_ci return 0; 30008c2ecf20Sopenharmony_ci return buflen; 30018c2ecf20Sopenharmony_ci} 30028c2ecf20Sopenharmony_ci 30038c2ecf20Sopenharmony_ci/** 30048c2ecf20Sopenharmony_ci * remap_vmalloc_range_partial - map vmalloc pages to userspace 30058c2ecf20Sopenharmony_ci * @vma: vma to cover 30068c2ecf20Sopenharmony_ci * @uaddr: target user address to start at 30078c2ecf20Sopenharmony_ci * @kaddr: virtual address of vmalloc kernel memory 30088c2ecf20Sopenharmony_ci * @pgoff: offset from @kaddr to start at 30098c2ecf20Sopenharmony_ci * @size: size of map area 30108c2ecf20Sopenharmony_ci * 30118c2ecf20Sopenharmony_ci * Returns: 0 for success, -Exxx on failure 30128c2ecf20Sopenharmony_ci * 30138c2ecf20Sopenharmony_ci * This function checks that @kaddr is a valid vmalloc'ed area, 30148c2ecf20Sopenharmony_ci * and that it is big enough to cover the range starting at 30158c2ecf20Sopenharmony_ci * @uaddr in @vma. Will return failure if that criteria isn't 30168c2ecf20Sopenharmony_ci * met. 30178c2ecf20Sopenharmony_ci * 30188c2ecf20Sopenharmony_ci * Similar to remap_pfn_range() (see mm/memory.c) 30198c2ecf20Sopenharmony_ci */ 30208c2ecf20Sopenharmony_ciint remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, 30218c2ecf20Sopenharmony_ci void *kaddr, unsigned long pgoff, 30228c2ecf20Sopenharmony_ci unsigned long size) 30238c2ecf20Sopenharmony_ci{ 30248c2ecf20Sopenharmony_ci struct vm_struct *area; 30258c2ecf20Sopenharmony_ci unsigned long off; 30268c2ecf20Sopenharmony_ci unsigned long end_index; 30278c2ecf20Sopenharmony_ci 30288c2ecf20Sopenharmony_ci if (check_shl_overflow(pgoff, PAGE_SHIFT, &off)) 30298c2ecf20Sopenharmony_ci return -EINVAL; 30308c2ecf20Sopenharmony_ci 30318c2ecf20Sopenharmony_ci size = PAGE_ALIGN(size); 30328c2ecf20Sopenharmony_ci 30338c2ecf20Sopenharmony_ci if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr)) 30348c2ecf20Sopenharmony_ci return -EINVAL; 30358c2ecf20Sopenharmony_ci 30368c2ecf20Sopenharmony_ci area = find_vm_area(kaddr); 30378c2ecf20Sopenharmony_ci if (!area) 30388c2ecf20Sopenharmony_ci return -EINVAL; 30398c2ecf20Sopenharmony_ci 30408c2ecf20Sopenharmony_ci if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) 30418c2ecf20Sopenharmony_ci return -EINVAL; 30428c2ecf20Sopenharmony_ci 30438c2ecf20Sopenharmony_ci if (check_add_overflow(size, off, &end_index) || 30448c2ecf20Sopenharmony_ci end_index > get_vm_area_size(area)) 30458c2ecf20Sopenharmony_ci return -EINVAL; 30468c2ecf20Sopenharmony_ci kaddr += off; 30478c2ecf20Sopenharmony_ci 30488c2ecf20Sopenharmony_ci do { 30498c2ecf20Sopenharmony_ci struct page *page = vmalloc_to_page(kaddr); 30508c2ecf20Sopenharmony_ci int ret; 30518c2ecf20Sopenharmony_ci 30528c2ecf20Sopenharmony_ci ret = vm_insert_page(vma, uaddr, page); 30538c2ecf20Sopenharmony_ci if (ret) 30548c2ecf20Sopenharmony_ci return ret; 30558c2ecf20Sopenharmony_ci 30568c2ecf20Sopenharmony_ci uaddr += PAGE_SIZE; 30578c2ecf20Sopenharmony_ci kaddr += PAGE_SIZE; 30588c2ecf20Sopenharmony_ci size -= PAGE_SIZE; 30598c2ecf20Sopenharmony_ci } while (size > 0); 30608c2ecf20Sopenharmony_ci 30618c2ecf20Sopenharmony_ci vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 30628c2ecf20Sopenharmony_ci 30638c2ecf20Sopenharmony_ci return 0; 30648c2ecf20Sopenharmony_ci} 30658c2ecf20Sopenharmony_ciEXPORT_SYMBOL(remap_vmalloc_range_partial); 30668c2ecf20Sopenharmony_ci 30678c2ecf20Sopenharmony_ci/** 30688c2ecf20Sopenharmony_ci * remap_vmalloc_range - map vmalloc pages to userspace 30698c2ecf20Sopenharmony_ci * @vma: vma to cover (map full range of vma) 30708c2ecf20Sopenharmony_ci * @addr: vmalloc memory 30718c2ecf20Sopenharmony_ci * @pgoff: number of pages into addr before first page to map 30728c2ecf20Sopenharmony_ci * 30738c2ecf20Sopenharmony_ci * Returns: 0 for success, -Exxx on failure 30748c2ecf20Sopenharmony_ci * 30758c2ecf20Sopenharmony_ci * This function checks that addr is a valid vmalloc'ed area, and 30768c2ecf20Sopenharmony_ci * that it is big enough to cover the vma. Will return failure if 30778c2ecf20Sopenharmony_ci * that criteria isn't met. 30788c2ecf20Sopenharmony_ci * 30798c2ecf20Sopenharmony_ci * Similar to remap_pfn_range() (see mm/memory.c) 30808c2ecf20Sopenharmony_ci */ 30818c2ecf20Sopenharmony_ciint remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 30828c2ecf20Sopenharmony_ci unsigned long pgoff) 30838c2ecf20Sopenharmony_ci{ 30848c2ecf20Sopenharmony_ci return remap_vmalloc_range_partial(vma, vma->vm_start, 30858c2ecf20Sopenharmony_ci addr, pgoff, 30868c2ecf20Sopenharmony_ci vma->vm_end - vma->vm_start); 30878c2ecf20Sopenharmony_ci} 30888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(remap_vmalloc_range); 30898c2ecf20Sopenharmony_ci 30908c2ecf20Sopenharmony_civoid free_vm_area(struct vm_struct *area) 30918c2ecf20Sopenharmony_ci{ 30928c2ecf20Sopenharmony_ci struct vm_struct *ret; 30938c2ecf20Sopenharmony_ci ret = remove_vm_area(area->addr); 30948c2ecf20Sopenharmony_ci BUG_ON(ret != area); 30958c2ecf20Sopenharmony_ci kfree(area); 30968c2ecf20Sopenharmony_ci} 30978c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(free_vm_area); 30988c2ecf20Sopenharmony_ci 30998c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 31008c2ecf20Sopenharmony_cistatic struct vmap_area *node_to_va(struct rb_node *n) 31018c2ecf20Sopenharmony_ci{ 31028c2ecf20Sopenharmony_ci return rb_entry_safe(n, struct vmap_area, rb_node); 31038c2ecf20Sopenharmony_ci} 31048c2ecf20Sopenharmony_ci 31058c2ecf20Sopenharmony_ci/** 31068c2ecf20Sopenharmony_ci * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to 31078c2ecf20Sopenharmony_ci * @addr: target address 31088c2ecf20Sopenharmony_ci * 31098c2ecf20Sopenharmony_ci * Returns: vmap_area if it is found. If there is no such area 31108c2ecf20Sopenharmony_ci * the first highest(reverse order) vmap_area is returned 31118c2ecf20Sopenharmony_ci * i.e. va->va_start < addr && va->va_end < addr or NULL 31128c2ecf20Sopenharmony_ci * if there are no any areas before @addr. 31138c2ecf20Sopenharmony_ci */ 31148c2ecf20Sopenharmony_cistatic struct vmap_area * 31158c2ecf20Sopenharmony_cipvm_find_va_enclose_addr(unsigned long addr) 31168c2ecf20Sopenharmony_ci{ 31178c2ecf20Sopenharmony_ci struct vmap_area *va, *tmp; 31188c2ecf20Sopenharmony_ci struct rb_node *n; 31198c2ecf20Sopenharmony_ci 31208c2ecf20Sopenharmony_ci n = free_vmap_area_root.rb_node; 31218c2ecf20Sopenharmony_ci va = NULL; 31228c2ecf20Sopenharmony_ci 31238c2ecf20Sopenharmony_ci while (n) { 31248c2ecf20Sopenharmony_ci tmp = rb_entry(n, struct vmap_area, rb_node); 31258c2ecf20Sopenharmony_ci if (tmp->va_start <= addr) { 31268c2ecf20Sopenharmony_ci va = tmp; 31278c2ecf20Sopenharmony_ci if (tmp->va_end >= addr) 31288c2ecf20Sopenharmony_ci break; 31298c2ecf20Sopenharmony_ci 31308c2ecf20Sopenharmony_ci n = n->rb_right; 31318c2ecf20Sopenharmony_ci } else { 31328c2ecf20Sopenharmony_ci n = n->rb_left; 31338c2ecf20Sopenharmony_ci } 31348c2ecf20Sopenharmony_ci } 31358c2ecf20Sopenharmony_ci 31368c2ecf20Sopenharmony_ci return va; 31378c2ecf20Sopenharmony_ci} 31388c2ecf20Sopenharmony_ci 31398c2ecf20Sopenharmony_ci/** 31408c2ecf20Sopenharmony_ci * pvm_determine_end_from_reverse - find the highest aligned address 31418c2ecf20Sopenharmony_ci * of free block below VMALLOC_END 31428c2ecf20Sopenharmony_ci * @va: 31438c2ecf20Sopenharmony_ci * in - the VA we start the search(reverse order); 31448c2ecf20Sopenharmony_ci * out - the VA with the highest aligned end address. 31458c2ecf20Sopenharmony_ci * 31468c2ecf20Sopenharmony_ci * Returns: determined end address within vmap_area 31478c2ecf20Sopenharmony_ci */ 31488c2ecf20Sopenharmony_cistatic unsigned long 31498c2ecf20Sopenharmony_cipvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align) 31508c2ecf20Sopenharmony_ci{ 31518c2ecf20Sopenharmony_ci unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); 31528c2ecf20Sopenharmony_ci unsigned long addr; 31538c2ecf20Sopenharmony_ci 31548c2ecf20Sopenharmony_ci if (likely(*va)) { 31558c2ecf20Sopenharmony_ci list_for_each_entry_from_reverse((*va), 31568c2ecf20Sopenharmony_ci &free_vmap_area_list, list) { 31578c2ecf20Sopenharmony_ci addr = min((*va)->va_end & ~(align - 1), vmalloc_end); 31588c2ecf20Sopenharmony_ci if ((*va)->va_start < addr) 31598c2ecf20Sopenharmony_ci return addr; 31608c2ecf20Sopenharmony_ci } 31618c2ecf20Sopenharmony_ci } 31628c2ecf20Sopenharmony_ci 31638c2ecf20Sopenharmony_ci return 0; 31648c2ecf20Sopenharmony_ci} 31658c2ecf20Sopenharmony_ci 31668c2ecf20Sopenharmony_ci/** 31678c2ecf20Sopenharmony_ci * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator 31688c2ecf20Sopenharmony_ci * @offsets: array containing offset of each area 31698c2ecf20Sopenharmony_ci * @sizes: array containing size of each area 31708c2ecf20Sopenharmony_ci * @nr_vms: the number of areas to allocate 31718c2ecf20Sopenharmony_ci * @align: alignment, all entries in @offsets and @sizes must be aligned to this 31728c2ecf20Sopenharmony_ci * 31738c2ecf20Sopenharmony_ci * Returns: kmalloc'd vm_struct pointer array pointing to allocated 31748c2ecf20Sopenharmony_ci * vm_structs on success, %NULL on failure 31758c2ecf20Sopenharmony_ci * 31768c2ecf20Sopenharmony_ci * Percpu allocator wants to use congruent vm areas so that it can 31778c2ecf20Sopenharmony_ci * maintain the offsets among percpu areas. This function allocates 31788c2ecf20Sopenharmony_ci * congruent vmalloc areas for it with GFP_KERNEL. These areas tend to 31798c2ecf20Sopenharmony_ci * be scattered pretty far, distance between two areas easily going up 31808c2ecf20Sopenharmony_ci * to gigabytes. To avoid interacting with regular vmallocs, these 31818c2ecf20Sopenharmony_ci * areas are allocated from top. 31828c2ecf20Sopenharmony_ci * 31838c2ecf20Sopenharmony_ci * Despite its complicated look, this allocator is rather simple. It 31848c2ecf20Sopenharmony_ci * does everything top-down and scans free blocks from the end looking 31858c2ecf20Sopenharmony_ci * for matching base. While scanning, if any of the areas do not fit the 31868c2ecf20Sopenharmony_ci * base address is pulled down to fit the area. Scanning is repeated till 31878c2ecf20Sopenharmony_ci * all the areas fit and then all necessary data structures are inserted 31888c2ecf20Sopenharmony_ci * and the result is returned. 31898c2ecf20Sopenharmony_ci */ 31908c2ecf20Sopenharmony_cistruct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, 31918c2ecf20Sopenharmony_ci const size_t *sizes, int nr_vms, 31928c2ecf20Sopenharmony_ci size_t align) 31938c2ecf20Sopenharmony_ci{ 31948c2ecf20Sopenharmony_ci const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); 31958c2ecf20Sopenharmony_ci const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); 31968c2ecf20Sopenharmony_ci struct vmap_area **vas, *va; 31978c2ecf20Sopenharmony_ci struct vm_struct **vms; 31988c2ecf20Sopenharmony_ci int area, area2, last_area, term_area; 31998c2ecf20Sopenharmony_ci unsigned long base, start, size, end, last_end, orig_start, orig_end; 32008c2ecf20Sopenharmony_ci bool purged = false; 32018c2ecf20Sopenharmony_ci enum fit_type type; 32028c2ecf20Sopenharmony_ci 32038c2ecf20Sopenharmony_ci /* verify parameters and allocate data structures */ 32048c2ecf20Sopenharmony_ci BUG_ON(offset_in_page(align) || !is_power_of_2(align)); 32058c2ecf20Sopenharmony_ci for (last_area = 0, area = 0; area < nr_vms; area++) { 32068c2ecf20Sopenharmony_ci start = offsets[area]; 32078c2ecf20Sopenharmony_ci end = start + sizes[area]; 32088c2ecf20Sopenharmony_ci 32098c2ecf20Sopenharmony_ci /* is everything aligned properly? */ 32108c2ecf20Sopenharmony_ci BUG_ON(!IS_ALIGNED(offsets[area], align)); 32118c2ecf20Sopenharmony_ci BUG_ON(!IS_ALIGNED(sizes[area], align)); 32128c2ecf20Sopenharmony_ci 32138c2ecf20Sopenharmony_ci /* detect the area with the highest address */ 32148c2ecf20Sopenharmony_ci if (start > offsets[last_area]) 32158c2ecf20Sopenharmony_ci last_area = area; 32168c2ecf20Sopenharmony_ci 32178c2ecf20Sopenharmony_ci for (area2 = area + 1; area2 < nr_vms; area2++) { 32188c2ecf20Sopenharmony_ci unsigned long start2 = offsets[area2]; 32198c2ecf20Sopenharmony_ci unsigned long end2 = start2 + sizes[area2]; 32208c2ecf20Sopenharmony_ci 32218c2ecf20Sopenharmony_ci BUG_ON(start2 < end && start < end2); 32228c2ecf20Sopenharmony_ci } 32238c2ecf20Sopenharmony_ci } 32248c2ecf20Sopenharmony_ci last_end = offsets[last_area] + sizes[last_area]; 32258c2ecf20Sopenharmony_ci 32268c2ecf20Sopenharmony_ci if (vmalloc_end - vmalloc_start < last_end) { 32278c2ecf20Sopenharmony_ci WARN_ON(true); 32288c2ecf20Sopenharmony_ci return NULL; 32298c2ecf20Sopenharmony_ci } 32308c2ecf20Sopenharmony_ci 32318c2ecf20Sopenharmony_ci vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL); 32328c2ecf20Sopenharmony_ci vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL); 32338c2ecf20Sopenharmony_ci if (!vas || !vms) 32348c2ecf20Sopenharmony_ci goto err_free2; 32358c2ecf20Sopenharmony_ci 32368c2ecf20Sopenharmony_ci for (area = 0; area < nr_vms; area++) { 32378c2ecf20Sopenharmony_ci vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL); 32388c2ecf20Sopenharmony_ci vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL); 32398c2ecf20Sopenharmony_ci if (!vas[area] || !vms[area]) 32408c2ecf20Sopenharmony_ci goto err_free; 32418c2ecf20Sopenharmony_ci } 32428c2ecf20Sopenharmony_ciretry: 32438c2ecf20Sopenharmony_ci spin_lock(&free_vmap_area_lock); 32448c2ecf20Sopenharmony_ci 32458c2ecf20Sopenharmony_ci /* start scanning - we scan from the top, begin with the last area */ 32468c2ecf20Sopenharmony_ci area = term_area = last_area; 32478c2ecf20Sopenharmony_ci start = offsets[area]; 32488c2ecf20Sopenharmony_ci end = start + sizes[area]; 32498c2ecf20Sopenharmony_ci 32508c2ecf20Sopenharmony_ci va = pvm_find_va_enclose_addr(vmalloc_end); 32518c2ecf20Sopenharmony_ci base = pvm_determine_end_from_reverse(&va, align) - end; 32528c2ecf20Sopenharmony_ci 32538c2ecf20Sopenharmony_ci while (true) { 32548c2ecf20Sopenharmony_ci /* 32558c2ecf20Sopenharmony_ci * base might have underflowed, add last_end before 32568c2ecf20Sopenharmony_ci * comparing. 32578c2ecf20Sopenharmony_ci */ 32588c2ecf20Sopenharmony_ci if (base + last_end < vmalloc_start + last_end) 32598c2ecf20Sopenharmony_ci goto overflow; 32608c2ecf20Sopenharmony_ci 32618c2ecf20Sopenharmony_ci /* 32628c2ecf20Sopenharmony_ci * Fitting base has not been found. 32638c2ecf20Sopenharmony_ci */ 32648c2ecf20Sopenharmony_ci if (va == NULL) 32658c2ecf20Sopenharmony_ci goto overflow; 32668c2ecf20Sopenharmony_ci 32678c2ecf20Sopenharmony_ci /* 32688c2ecf20Sopenharmony_ci * If required width exceeds current VA block, move 32698c2ecf20Sopenharmony_ci * base downwards and then recheck. 32708c2ecf20Sopenharmony_ci */ 32718c2ecf20Sopenharmony_ci if (base + end > va->va_end) { 32728c2ecf20Sopenharmony_ci base = pvm_determine_end_from_reverse(&va, align) - end; 32738c2ecf20Sopenharmony_ci term_area = area; 32748c2ecf20Sopenharmony_ci continue; 32758c2ecf20Sopenharmony_ci } 32768c2ecf20Sopenharmony_ci 32778c2ecf20Sopenharmony_ci /* 32788c2ecf20Sopenharmony_ci * If this VA does not fit, move base downwards and recheck. 32798c2ecf20Sopenharmony_ci */ 32808c2ecf20Sopenharmony_ci if (base + start < va->va_start) { 32818c2ecf20Sopenharmony_ci va = node_to_va(rb_prev(&va->rb_node)); 32828c2ecf20Sopenharmony_ci base = pvm_determine_end_from_reverse(&va, align) - end; 32838c2ecf20Sopenharmony_ci term_area = area; 32848c2ecf20Sopenharmony_ci continue; 32858c2ecf20Sopenharmony_ci } 32868c2ecf20Sopenharmony_ci 32878c2ecf20Sopenharmony_ci /* 32888c2ecf20Sopenharmony_ci * This area fits, move on to the previous one. If 32898c2ecf20Sopenharmony_ci * the previous one is the terminal one, we're done. 32908c2ecf20Sopenharmony_ci */ 32918c2ecf20Sopenharmony_ci area = (area + nr_vms - 1) % nr_vms; 32928c2ecf20Sopenharmony_ci if (area == term_area) 32938c2ecf20Sopenharmony_ci break; 32948c2ecf20Sopenharmony_ci 32958c2ecf20Sopenharmony_ci start = offsets[area]; 32968c2ecf20Sopenharmony_ci end = start + sizes[area]; 32978c2ecf20Sopenharmony_ci va = pvm_find_va_enclose_addr(base + end); 32988c2ecf20Sopenharmony_ci } 32998c2ecf20Sopenharmony_ci 33008c2ecf20Sopenharmony_ci /* we've found a fitting base, insert all va's */ 33018c2ecf20Sopenharmony_ci for (area = 0; area < nr_vms; area++) { 33028c2ecf20Sopenharmony_ci int ret; 33038c2ecf20Sopenharmony_ci 33048c2ecf20Sopenharmony_ci start = base + offsets[area]; 33058c2ecf20Sopenharmony_ci size = sizes[area]; 33068c2ecf20Sopenharmony_ci 33078c2ecf20Sopenharmony_ci va = pvm_find_va_enclose_addr(start); 33088c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(va == NULL)) 33098c2ecf20Sopenharmony_ci /* It is a BUG(), but trigger recovery instead. */ 33108c2ecf20Sopenharmony_ci goto recovery; 33118c2ecf20Sopenharmony_ci 33128c2ecf20Sopenharmony_ci type = classify_va_fit_type(va, start, size); 33138c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(type == NOTHING_FIT)) 33148c2ecf20Sopenharmony_ci /* It is a BUG(), but trigger recovery instead. */ 33158c2ecf20Sopenharmony_ci goto recovery; 33168c2ecf20Sopenharmony_ci 33178c2ecf20Sopenharmony_ci ret = adjust_va_to_fit_type(va, start, size, type); 33188c2ecf20Sopenharmony_ci if (unlikely(ret)) 33198c2ecf20Sopenharmony_ci goto recovery; 33208c2ecf20Sopenharmony_ci 33218c2ecf20Sopenharmony_ci /* Allocated area. */ 33228c2ecf20Sopenharmony_ci va = vas[area]; 33238c2ecf20Sopenharmony_ci va->va_start = start; 33248c2ecf20Sopenharmony_ci va->va_end = start + size; 33258c2ecf20Sopenharmony_ci } 33268c2ecf20Sopenharmony_ci 33278c2ecf20Sopenharmony_ci spin_unlock(&free_vmap_area_lock); 33288c2ecf20Sopenharmony_ci 33298c2ecf20Sopenharmony_ci /* populate the kasan shadow space */ 33308c2ecf20Sopenharmony_ci for (area = 0; area < nr_vms; area++) { 33318c2ecf20Sopenharmony_ci if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) 33328c2ecf20Sopenharmony_ci goto err_free_shadow; 33338c2ecf20Sopenharmony_ci 33348c2ecf20Sopenharmony_ci kasan_unpoison_vmalloc((void *)vas[area]->va_start, 33358c2ecf20Sopenharmony_ci sizes[area]); 33368c2ecf20Sopenharmony_ci } 33378c2ecf20Sopenharmony_ci 33388c2ecf20Sopenharmony_ci /* insert all vm's */ 33398c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 33408c2ecf20Sopenharmony_ci for (area = 0; area < nr_vms; area++) { 33418c2ecf20Sopenharmony_ci insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list); 33428c2ecf20Sopenharmony_ci 33438c2ecf20Sopenharmony_ci setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, 33448c2ecf20Sopenharmony_ci pcpu_get_vm_areas); 33458c2ecf20Sopenharmony_ci } 33468c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 33478c2ecf20Sopenharmony_ci 33488c2ecf20Sopenharmony_ci kfree(vas); 33498c2ecf20Sopenharmony_ci return vms; 33508c2ecf20Sopenharmony_ci 33518c2ecf20Sopenharmony_cirecovery: 33528c2ecf20Sopenharmony_ci /* 33538c2ecf20Sopenharmony_ci * Remove previously allocated areas. There is no 33548c2ecf20Sopenharmony_ci * need in removing these areas from the busy tree, 33558c2ecf20Sopenharmony_ci * because they are inserted only on the final step 33568c2ecf20Sopenharmony_ci * and when pcpu_get_vm_areas() is success. 33578c2ecf20Sopenharmony_ci */ 33588c2ecf20Sopenharmony_ci while (area--) { 33598c2ecf20Sopenharmony_ci orig_start = vas[area]->va_start; 33608c2ecf20Sopenharmony_ci orig_end = vas[area]->va_end; 33618c2ecf20Sopenharmony_ci va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root, 33628c2ecf20Sopenharmony_ci &free_vmap_area_list); 33638c2ecf20Sopenharmony_ci if (va) 33648c2ecf20Sopenharmony_ci kasan_release_vmalloc(orig_start, orig_end, 33658c2ecf20Sopenharmony_ci va->va_start, va->va_end); 33668c2ecf20Sopenharmony_ci vas[area] = NULL; 33678c2ecf20Sopenharmony_ci } 33688c2ecf20Sopenharmony_ci 33698c2ecf20Sopenharmony_cioverflow: 33708c2ecf20Sopenharmony_ci spin_unlock(&free_vmap_area_lock); 33718c2ecf20Sopenharmony_ci if (!purged) { 33728c2ecf20Sopenharmony_ci purge_vmap_area_lazy(); 33738c2ecf20Sopenharmony_ci purged = true; 33748c2ecf20Sopenharmony_ci 33758c2ecf20Sopenharmony_ci /* Before "retry", check if we recover. */ 33768c2ecf20Sopenharmony_ci for (area = 0; area < nr_vms; area++) { 33778c2ecf20Sopenharmony_ci if (vas[area]) 33788c2ecf20Sopenharmony_ci continue; 33798c2ecf20Sopenharmony_ci 33808c2ecf20Sopenharmony_ci vas[area] = kmem_cache_zalloc( 33818c2ecf20Sopenharmony_ci vmap_area_cachep, GFP_KERNEL); 33828c2ecf20Sopenharmony_ci if (!vas[area]) 33838c2ecf20Sopenharmony_ci goto err_free; 33848c2ecf20Sopenharmony_ci } 33858c2ecf20Sopenharmony_ci 33868c2ecf20Sopenharmony_ci goto retry; 33878c2ecf20Sopenharmony_ci } 33888c2ecf20Sopenharmony_ci 33898c2ecf20Sopenharmony_cierr_free: 33908c2ecf20Sopenharmony_ci for (area = 0; area < nr_vms; area++) { 33918c2ecf20Sopenharmony_ci if (vas[area]) 33928c2ecf20Sopenharmony_ci kmem_cache_free(vmap_area_cachep, vas[area]); 33938c2ecf20Sopenharmony_ci 33948c2ecf20Sopenharmony_ci kfree(vms[area]); 33958c2ecf20Sopenharmony_ci } 33968c2ecf20Sopenharmony_cierr_free2: 33978c2ecf20Sopenharmony_ci kfree(vas); 33988c2ecf20Sopenharmony_ci kfree(vms); 33998c2ecf20Sopenharmony_ci return NULL; 34008c2ecf20Sopenharmony_ci 34018c2ecf20Sopenharmony_cierr_free_shadow: 34028c2ecf20Sopenharmony_ci spin_lock(&free_vmap_area_lock); 34038c2ecf20Sopenharmony_ci /* 34048c2ecf20Sopenharmony_ci * We release all the vmalloc shadows, even the ones for regions that 34058c2ecf20Sopenharmony_ci * hadn't been successfully added. This relies on kasan_release_vmalloc 34068c2ecf20Sopenharmony_ci * being able to tolerate this case. 34078c2ecf20Sopenharmony_ci */ 34088c2ecf20Sopenharmony_ci for (area = 0; area < nr_vms; area++) { 34098c2ecf20Sopenharmony_ci orig_start = vas[area]->va_start; 34108c2ecf20Sopenharmony_ci orig_end = vas[area]->va_end; 34118c2ecf20Sopenharmony_ci va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root, 34128c2ecf20Sopenharmony_ci &free_vmap_area_list); 34138c2ecf20Sopenharmony_ci if (va) 34148c2ecf20Sopenharmony_ci kasan_release_vmalloc(orig_start, orig_end, 34158c2ecf20Sopenharmony_ci va->va_start, va->va_end); 34168c2ecf20Sopenharmony_ci vas[area] = NULL; 34178c2ecf20Sopenharmony_ci kfree(vms[area]); 34188c2ecf20Sopenharmony_ci } 34198c2ecf20Sopenharmony_ci spin_unlock(&free_vmap_area_lock); 34208c2ecf20Sopenharmony_ci kfree(vas); 34218c2ecf20Sopenharmony_ci kfree(vms); 34228c2ecf20Sopenharmony_ci return NULL; 34238c2ecf20Sopenharmony_ci} 34248c2ecf20Sopenharmony_ci 34258c2ecf20Sopenharmony_ci/** 34268c2ecf20Sopenharmony_ci * pcpu_free_vm_areas - free vmalloc areas for percpu allocator 34278c2ecf20Sopenharmony_ci * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() 34288c2ecf20Sopenharmony_ci * @nr_vms: the number of allocated areas 34298c2ecf20Sopenharmony_ci * 34308c2ecf20Sopenharmony_ci * Free vm_structs and the array allocated by pcpu_get_vm_areas(). 34318c2ecf20Sopenharmony_ci */ 34328c2ecf20Sopenharmony_civoid pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) 34338c2ecf20Sopenharmony_ci{ 34348c2ecf20Sopenharmony_ci int i; 34358c2ecf20Sopenharmony_ci 34368c2ecf20Sopenharmony_ci for (i = 0; i < nr_vms; i++) 34378c2ecf20Sopenharmony_ci free_vm_area(vms[i]); 34388c2ecf20Sopenharmony_ci kfree(vms); 34398c2ecf20Sopenharmony_ci} 34408c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */ 34418c2ecf20Sopenharmony_ci 34428c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS 34438c2ecf20Sopenharmony_cistatic void *s_start(struct seq_file *m, loff_t *pos) 34448c2ecf20Sopenharmony_ci __acquires(&vmap_purge_lock) 34458c2ecf20Sopenharmony_ci __acquires(&vmap_area_lock) 34468c2ecf20Sopenharmony_ci{ 34478c2ecf20Sopenharmony_ci mutex_lock(&vmap_purge_lock); 34488c2ecf20Sopenharmony_ci spin_lock(&vmap_area_lock); 34498c2ecf20Sopenharmony_ci 34508c2ecf20Sopenharmony_ci return seq_list_start(&vmap_area_list, *pos); 34518c2ecf20Sopenharmony_ci} 34528c2ecf20Sopenharmony_ci 34538c2ecf20Sopenharmony_cistatic void *s_next(struct seq_file *m, void *p, loff_t *pos) 34548c2ecf20Sopenharmony_ci{ 34558c2ecf20Sopenharmony_ci return seq_list_next(p, &vmap_area_list, pos); 34568c2ecf20Sopenharmony_ci} 34578c2ecf20Sopenharmony_ci 34588c2ecf20Sopenharmony_cistatic void s_stop(struct seq_file *m, void *p) 34598c2ecf20Sopenharmony_ci __releases(&vmap_area_lock) 34608c2ecf20Sopenharmony_ci __releases(&vmap_purge_lock) 34618c2ecf20Sopenharmony_ci{ 34628c2ecf20Sopenharmony_ci spin_unlock(&vmap_area_lock); 34638c2ecf20Sopenharmony_ci mutex_unlock(&vmap_purge_lock); 34648c2ecf20Sopenharmony_ci} 34658c2ecf20Sopenharmony_ci 34668c2ecf20Sopenharmony_cistatic void show_numa_info(struct seq_file *m, struct vm_struct *v) 34678c2ecf20Sopenharmony_ci{ 34688c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_NUMA)) { 34698c2ecf20Sopenharmony_ci unsigned int nr, *counters = m->private; 34708c2ecf20Sopenharmony_ci 34718c2ecf20Sopenharmony_ci if (!counters) 34728c2ecf20Sopenharmony_ci return; 34738c2ecf20Sopenharmony_ci 34748c2ecf20Sopenharmony_ci if (v->flags & VM_UNINITIALIZED) 34758c2ecf20Sopenharmony_ci return; 34768c2ecf20Sopenharmony_ci /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ 34778c2ecf20Sopenharmony_ci smp_rmb(); 34788c2ecf20Sopenharmony_ci 34798c2ecf20Sopenharmony_ci memset(counters, 0, nr_node_ids * sizeof(unsigned int)); 34808c2ecf20Sopenharmony_ci 34818c2ecf20Sopenharmony_ci for (nr = 0; nr < v->nr_pages; nr++) 34828c2ecf20Sopenharmony_ci counters[page_to_nid(v->pages[nr])]++; 34838c2ecf20Sopenharmony_ci 34848c2ecf20Sopenharmony_ci for_each_node_state(nr, N_HIGH_MEMORY) 34858c2ecf20Sopenharmony_ci if (counters[nr]) 34868c2ecf20Sopenharmony_ci seq_printf(m, " N%u=%u", nr, counters[nr]); 34878c2ecf20Sopenharmony_ci } 34888c2ecf20Sopenharmony_ci} 34898c2ecf20Sopenharmony_ci 34908c2ecf20Sopenharmony_cistatic void show_purge_info(struct seq_file *m) 34918c2ecf20Sopenharmony_ci{ 34928c2ecf20Sopenharmony_ci struct llist_node *head; 34938c2ecf20Sopenharmony_ci struct vmap_area *va; 34948c2ecf20Sopenharmony_ci 34958c2ecf20Sopenharmony_ci head = READ_ONCE(vmap_purge_list.first); 34968c2ecf20Sopenharmony_ci if (head == NULL) 34978c2ecf20Sopenharmony_ci return; 34988c2ecf20Sopenharmony_ci 34998c2ecf20Sopenharmony_ci llist_for_each_entry(va, head, purge_list) { 35008c2ecf20Sopenharmony_ci seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", 35018c2ecf20Sopenharmony_ci (void *)va->va_start, (void *)va->va_end, 35028c2ecf20Sopenharmony_ci va->va_end - va->va_start); 35038c2ecf20Sopenharmony_ci } 35048c2ecf20Sopenharmony_ci} 35058c2ecf20Sopenharmony_ci 35068c2ecf20Sopenharmony_cistatic int s_show(struct seq_file *m, void *p) 35078c2ecf20Sopenharmony_ci{ 35088c2ecf20Sopenharmony_ci struct vmap_area *va; 35098c2ecf20Sopenharmony_ci struct vm_struct *v; 35108c2ecf20Sopenharmony_ci 35118c2ecf20Sopenharmony_ci va = list_entry(p, struct vmap_area, list); 35128c2ecf20Sopenharmony_ci 35138c2ecf20Sopenharmony_ci /* 35148c2ecf20Sopenharmony_ci * s_show can encounter race with remove_vm_area, !vm on behalf 35158c2ecf20Sopenharmony_ci * of vmap area is being tear down or vm_map_ram allocation. 35168c2ecf20Sopenharmony_ci */ 35178c2ecf20Sopenharmony_ci if (!va->vm) { 35188c2ecf20Sopenharmony_ci seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", 35198c2ecf20Sopenharmony_ci (void *)va->va_start, (void *)va->va_end, 35208c2ecf20Sopenharmony_ci va->va_end - va->va_start); 35218c2ecf20Sopenharmony_ci 35228c2ecf20Sopenharmony_ci return 0; 35238c2ecf20Sopenharmony_ci } 35248c2ecf20Sopenharmony_ci 35258c2ecf20Sopenharmony_ci v = va->vm; 35268c2ecf20Sopenharmony_ci 35278c2ecf20Sopenharmony_ci seq_printf(m, "0x%pK-0x%pK %7ld", 35288c2ecf20Sopenharmony_ci v->addr, v->addr + v->size, v->size); 35298c2ecf20Sopenharmony_ci 35308c2ecf20Sopenharmony_ci if (v->caller) 35318c2ecf20Sopenharmony_ci seq_printf(m, " %pS", v->caller); 35328c2ecf20Sopenharmony_ci 35338c2ecf20Sopenharmony_ci if (v->nr_pages) 35348c2ecf20Sopenharmony_ci seq_printf(m, " pages=%d", v->nr_pages); 35358c2ecf20Sopenharmony_ci 35368c2ecf20Sopenharmony_ci if (v->phys_addr) 35378c2ecf20Sopenharmony_ci seq_printf(m, " phys=%pa", &v->phys_addr); 35388c2ecf20Sopenharmony_ci 35398c2ecf20Sopenharmony_ci if (v->flags & VM_IOREMAP) 35408c2ecf20Sopenharmony_ci seq_puts(m, " ioremap"); 35418c2ecf20Sopenharmony_ci 35428c2ecf20Sopenharmony_ci if (v->flags & VM_ALLOC) 35438c2ecf20Sopenharmony_ci seq_puts(m, " vmalloc"); 35448c2ecf20Sopenharmony_ci 35458c2ecf20Sopenharmony_ci if (v->flags & VM_MAP) 35468c2ecf20Sopenharmony_ci seq_puts(m, " vmap"); 35478c2ecf20Sopenharmony_ci 35488c2ecf20Sopenharmony_ci if (v->flags & VM_USERMAP) 35498c2ecf20Sopenharmony_ci seq_puts(m, " user"); 35508c2ecf20Sopenharmony_ci 35518c2ecf20Sopenharmony_ci if (v->flags & VM_DMA_COHERENT) 35528c2ecf20Sopenharmony_ci seq_puts(m, " dma-coherent"); 35538c2ecf20Sopenharmony_ci 35548c2ecf20Sopenharmony_ci if (is_vmalloc_addr(v->pages)) 35558c2ecf20Sopenharmony_ci seq_puts(m, " vpages"); 35568c2ecf20Sopenharmony_ci 35578c2ecf20Sopenharmony_ci show_numa_info(m, v); 35588c2ecf20Sopenharmony_ci seq_putc(m, '\n'); 35598c2ecf20Sopenharmony_ci 35608c2ecf20Sopenharmony_ci /* 35618c2ecf20Sopenharmony_ci * As a final step, dump "unpurged" areas. Note, 35628c2ecf20Sopenharmony_ci * that entire "/proc/vmallocinfo" output will not 35638c2ecf20Sopenharmony_ci * be address sorted, because the purge list is not 35648c2ecf20Sopenharmony_ci * sorted. 35658c2ecf20Sopenharmony_ci */ 35668c2ecf20Sopenharmony_ci if (list_is_last(&va->list, &vmap_area_list)) 35678c2ecf20Sopenharmony_ci show_purge_info(m); 35688c2ecf20Sopenharmony_ci 35698c2ecf20Sopenharmony_ci return 0; 35708c2ecf20Sopenharmony_ci} 35718c2ecf20Sopenharmony_ci 35728c2ecf20Sopenharmony_cistatic const struct seq_operations vmalloc_op = { 35738c2ecf20Sopenharmony_ci .start = s_start, 35748c2ecf20Sopenharmony_ci .next = s_next, 35758c2ecf20Sopenharmony_ci .stop = s_stop, 35768c2ecf20Sopenharmony_ci .show = s_show, 35778c2ecf20Sopenharmony_ci}; 35788c2ecf20Sopenharmony_ci 35798c2ecf20Sopenharmony_cistatic int __init proc_vmalloc_init(void) 35808c2ecf20Sopenharmony_ci{ 35818c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_NUMA)) 35828c2ecf20Sopenharmony_ci proc_create_seq_private("vmallocinfo", 0400, NULL, 35838c2ecf20Sopenharmony_ci &vmalloc_op, 35848c2ecf20Sopenharmony_ci nr_node_ids * sizeof(unsigned int), NULL); 35858c2ecf20Sopenharmony_ci else 35868c2ecf20Sopenharmony_ci proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op); 35878c2ecf20Sopenharmony_ci return 0; 35888c2ecf20Sopenharmony_ci} 35898c2ecf20Sopenharmony_cimodule_init(proc_vmalloc_init); 35908c2ecf20Sopenharmony_ci 35918c2ecf20Sopenharmony_ci#endif 3592