18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright 2007 Andi Kleen, SUSE Labs. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This contains most of the x86 vDSO kernel-side code. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci#include <linux/mm.h> 88c2ecf20Sopenharmony_ci#include <linux/err.h> 98c2ecf20Sopenharmony_ci#include <linux/sched.h> 108c2ecf20Sopenharmony_ci#include <linux/sched/task_stack.h> 118c2ecf20Sopenharmony_ci#include <linux/slab.h> 128c2ecf20Sopenharmony_ci#include <linux/init.h> 138c2ecf20Sopenharmony_ci#include <linux/random.h> 148c2ecf20Sopenharmony_ci#include <linux/elf.h> 158c2ecf20Sopenharmony_ci#include <linux/cpu.h> 168c2ecf20Sopenharmony_ci#include <linux/ptrace.h> 178c2ecf20Sopenharmony_ci#include <linux/time_namespace.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#include <asm/pvclock.h> 208c2ecf20Sopenharmony_ci#include <asm/vgtod.h> 218c2ecf20Sopenharmony_ci#include <asm/proto.h> 228c2ecf20Sopenharmony_ci#include <asm/vdso.h> 238c2ecf20Sopenharmony_ci#include <asm/vvar.h> 248c2ecf20Sopenharmony_ci#include <asm/tlb.h> 258c2ecf20Sopenharmony_ci#include <asm/page.h> 268c2ecf20Sopenharmony_ci#include <asm/desc.h> 278c2ecf20Sopenharmony_ci#include <asm/cpufeature.h> 288c2ecf20Sopenharmony_ci#include <clocksource/hyperv_timer.h> 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#undef _ASM_X86_VVAR_H 318c2ecf20Sopenharmony_ci#define EMIT_VVAR(name, offset) \ 328c2ecf20Sopenharmony_ci const size_t name ## _offset = offset; 338c2ecf20Sopenharmony_ci#include <asm/vvar.h> 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_cistruct vdso_data *arch_get_vdso_data(void *vvar_page) 368c2ecf20Sopenharmony_ci{ 378c2ecf20Sopenharmony_ci return (struct vdso_data *)(vvar_page + _vdso_data_offset); 388c2ecf20Sopenharmony_ci} 398c2ecf20Sopenharmony_ci#undef EMIT_VVAR 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ciunsigned int vclocks_used __read_mostly; 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#if defined(CONFIG_X86_64) 448c2ecf20Sopenharmony_ciunsigned int __read_mostly vdso64_enabled = 1; 458c2ecf20Sopenharmony_ci#endif 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_civoid __init init_vdso_image(const struct vdso_image *image) 488c2ecf20Sopenharmony_ci{ 498c2ecf20Sopenharmony_ci BUG_ON(image->size % PAGE_SIZE != 0); 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci apply_alternatives((struct alt_instr *)(image->data + image->alt), 528c2ecf20Sopenharmony_ci (struct alt_instr *)(image->data + image->alt + 538c2ecf20Sopenharmony_ci image->alt_len)); 548c2ecf20Sopenharmony_ci} 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_cistatic const struct vm_special_mapping vvar_mapping; 578c2ecf20Sopenharmony_cistruct linux_binprm; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_cistatic vm_fault_t vdso_fault(const struct vm_special_mapping *sm, 608c2ecf20Sopenharmony_ci struct vm_area_struct *vma, struct vm_fault *vmf) 618c2ecf20Sopenharmony_ci{ 628c2ecf20Sopenharmony_ci const struct vdso_image *image = vma->vm_mm->context.vdso_image; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size) 658c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT)); 688c2ecf20Sopenharmony_ci get_page(vmf->page); 698c2ecf20Sopenharmony_ci return 0; 708c2ecf20Sopenharmony_ci} 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_cistatic void vdso_fix_landing(const struct vdso_image *image, 738c2ecf20Sopenharmony_ci struct vm_area_struct *new_vma) 748c2ecf20Sopenharmony_ci{ 758c2ecf20Sopenharmony_ci#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 768c2ecf20Sopenharmony_ci if (in_ia32_syscall() && image == &vdso_image_32) { 778c2ecf20Sopenharmony_ci struct pt_regs *regs = current_pt_regs(); 788c2ecf20Sopenharmony_ci unsigned long vdso_land = image->sym_int80_landing_pad; 798c2ecf20Sopenharmony_ci unsigned long old_land_addr = vdso_land + 808c2ecf20Sopenharmony_ci (unsigned long)current->mm->context.vdso; 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci /* Fixing userspace landing - look at do_fast_syscall_32 */ 838c2ecf20Sopenharmony_ci if (regs->ip == old_land_addr) 848c2ecf20Sopenharmony_ci regs->ip = new_vma->vm_start + vdso_land; 858c2ecf20Sopenharmony_ci } 868c2ecf20Sopenharmony_ci#endif 878c2ecf20Sopenharmony_ci} 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_cistatic int vdso_mremap(const struct vm_special_mapping *sm, 908c2ecf20Sopenharmony_ci struct vm_area_struct *new_vma) 918c2ecf20Sopenharmony_ci{ 928c2ecf20Sopenharmony_ci unsigned long new_size = new_vma->vm_end - new_vma->vm_start; 938c2ecf20Sopenharmony_ci const struct vdso_image *image = current->mm->context.vdso_image; 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci if (image->size != new_size) 968c2ecf20Sopenharmony_ci return -EINVAL; 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci vdso_fix_landing(image, new_vma); 998c2ecf20Sopenharmony_ci current->mm->context.vdso = (void __user *)new_vma->vm_start; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci return 0; 1028c2ecf20Sopenharmony_ci} 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_cistatic int vvar_mremap(const struct vm_special_mapping *sm, 1058c2ecf20Sopenharmony_ci struct vm_area_struct *new_vma) 1068c2ecf20Sopenharmony_ci{ 1078c2ecf20Sopenharmony_ci const struct vdso_image *image = new_vma->vm_mm->context.vdso_image; 1088c2ecf20Sopenharmony_ci unsigned long new_size = new_vma->vm_end - new_vma->vm_start; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci if (new_size != -image->sym_vvar_start) 1118c2ecf20Sopenharmony_ci return -EINVAL; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci return 0; 1148c2ecf20Sopenharmony_ci} 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci#ifdef CONFIG_TIME_NS 1178c2ecf20Sopenharmony_cistatic struct page *find_timens_vvar_page(struct vm_area_struct *vma) 1188c2ecf20Sopenharmony_ci{ 1198c2ecf20Sopenharmony_ci if (likely(vma->vm_mm == current->mm)) 1208c2ecf20Sopenharmony_ci return current->nsproxy->time_ns->vvar_page; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci /* 1238c2ecf20Sopenharmony_ci * VM_PFNMAP | VM_IO protect .fault() handler from being called 1248c2ecf20Sopenharmony_ci * through interfaces like /proc/$pid/mem or 1258c2ecf20Sopenharmony_ci * process_vm_{readv,writev}() as long as there's no .access() 1268c2ecf20Sopenharmony_ci * in special_mapping_vmops(). 1278c2ecf20Sopenharmony_ci * For more details check_vma_flags() and __access_remote_vm() 1288c2ecf20Sopenharmony_ci */ 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci WARN(1, "vvar_page accessed remotely"); 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci return NULL; 1338c2ecf20Sopenharmony_ci} 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci/* 1368c2ecf20Sopenharmony_ci * The vvar page layout depends on whether a task belongs to the root or 1378c2ecf20Sopenharmony_ci * non-root time namespace. Whenever a task changes its namespace, the VVAR 1388c2ecf20Sopenharmony_ci * page tables are cleared and then they will re-faulted with a 1398c2ecf20Sopenharmony_ci * corresponding layout. 1408c2ecf20Sopenharmony_ci * See also the comment near timens_setup_vdso_data() for details. 1418c2ecf20Sopenharmony_ci */ 1428c2ecf20Sopenharmony_ciint vdso_join_timens(struct task_struct *task, struct time_namespace *ns) 1438c2ecf20Sopenharmony_ci{ 1448c2ecf20Sopenharmony_ci struct mm_struct *mm = task->mm; 1458c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci mmap_read_lock(mm); 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci for (vma = mm->mmap; vma; vma = vma->vm_next) { 1508c2ecf20Sopenharmony_ci unsigned long size = vma->vm_end - vma->vm_start; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci if (vma_is_special_mapping(vma, &vvar_mapping)) 1538c2ecf20Sopenharmony_ci zap_page_range(vma, vma->vm_start, size); 1548c2ecf20Sopenharmony_ci } 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 1578c2ecf20Sopenharmony_ci return 0; 1588c2ecf20Sopenharmony_ci} 1598c2ecf20Sopenharmony_ci#else 1608c2ecf20Sopenharmony_cistatic inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci return NULL; 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ci#endif 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_cistatic vm_fault_t vvar_fault(const struct vm_special_mapping *sm, 1678c2ecf20Sopenharmony_ci struct vm_area_struct *vma, struct vm_fault *vmf) 1688c2ecf20Sopenharmony_ci{ 1698c2ecf20Sopenharmony_ci const struct vdso_image *image = vma->vm_mm->context.vdso_image; 1708c2ecf20Sopenharmony_ci unsigned long pfn; 1718c2ecf20Sopenharmony_ci long sym_offset; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci if (!image) 1748c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + 1778c2ecf20Sopenharmony_ci image->sym_vvar_start; 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci /* 1808c2ecf20Sopenharmony_ci * Sanity check: a symbol offset of zero means that the page 1818c2ecf20Sopenharmony_ci * does not exist for this vdso image, not that the page is at 1828c2ecf20Sopenharmony_ci * offset zero relative to the text mapping. This should be 1838c2ecf20Sopenharmony_ci * impossible here, because sym_offset should only be zero for 1848c2ecf20Sopenharmony_ci * the page past the end of the vvar mapping. 1858c2ecf20Sopenharmony_ci */ 1868c2ecf20Sopenharmony_ci if (sym_offset == 0) 1878c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci if (sym_offset == image->sym_vvar_page) { 1908c2ecf20Sopenharmony_ci struct page *timens_page = find_timens_vvar_page(vma); 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci /* 1958c2ecf20Sopenharmony_ci * If a task belongs to a time namespace then a namespace 1968c2ecf20Sopenharmony_ci * specific VVAR is mapped with the sym_vvar_page offset and 1978c2ecf20Sopenharmony_ci * the real VVAR page is mapped with the sym_timens_page 1988c2ecf20Sopenharmony_ci * offset. 1998c2ecf20Sopenharmony_ci * See also the comment near timens_setup_vdso_data(). 2008c2ecf20Sopenharmony_ci */ 2018c2ecf20Sopenharmony_ci if (timens_page) { 2028c2ecf20Sopenharmony_ci unsigned long addr; 2038c2ecf20Sopenharmony_ci vm_fault_t err; 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci /* 2068c2ecf20Sopenharmony_ci * Optimization: inside time namespace pre-fault 2078c2ecf20Sopenharmony_ci * VVAR page too. As on timens page there are only 2088c2ecf20Sopenharmony_ci * offsets for clocks on VVAR, it'll be faulted 2098c2ecf20Sopenharmony_ci * shortly by VDSO code. 2108c2ecf20Sopenharmony_ci */ 2118c2ecf20Sopenharmony_ci addr = vmf->address + (image->sym_timens_page - sym_offset); 2128c2ecf20Sopenharmony_ci err = vmf_insert_pfn(vma, addr, pfn); 2138c2ecf20Sopenharmony_ci if (unlikely(err & VM_FAULT_ERROR)) 2148c2ecf20Sopenharmony_ci return err; 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci pfn = page_to_pfn(timens_page); 2178c2ecf20Sopenharmony_ci } 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci return vmf_insert_pfn(vma, vmf->address, pfn); 2208c2ecf20Sopenharmony_ci } else if (sym_offset == image->sym_pvclock_page) { 2218c2ecf20Sopenharmony_ci struct pvclock_vsyscall_time_info *pvti = 2228c2ecf20Sopenharmony_ci pvclock_get_pvti_cpu0_va(); 2238c2ecf20Sopenharmony_ci if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) { 2248c2ecf20Sopenharmony_ci return vmf_insert_pfn_prot(vma, vmf->address, 2258c2ecf20Sopenharmony_ci __pa(pvti) >> PAGE_SHIFT, 2268c2ecf20Sopenharmony_ci pgprot_decrypted(vma->vm_page_prot)); 2278c2ecf20Sopenharmony_ci } 2288c2ecf20Sopenharmony_ci } else if (sym_offset == image->sym_hvclock_page) { 2298c2ecf20Sopenharmony_ci struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) 2328c2ecf20Sopenharmony_ci return vmf_insert_pfn(vma, vmf->address, 2338c2ecf20Sopenharmony_ci virt_to_phys(tsc_pg) >> PAGE_SHIFT); 2348c2ecf20Sopenharmony_ci } else if (sym_offset == image->sym_timens_page) { 2358c2ecf20Sopenharmony_ci struct page *timens_page = find_timens_vvar_page(vma); 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci if (!timens_page) 2388c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; 2418c2ecf20Sopenharmony_ci return vmf_insert_pfn(vma, vmf->address, pfn); 2428c2ecf20Sopenharmony_ci } 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 2458c2ecf20Sopenharmony_ci} 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_cistatic const struct vm_special_mapping vdso_mapping = { 2488c2ecf20Sopenharmony_ci .name = "[vdso]", 2498c2ecf20Sopenharmony_ci .fault = vdso_fault, 2508c2ecf20Sopenharmony_ci .mremap = vdso_mremap, 2518c2ecf20Sopenharmony_ci}; 2528c2ecf20Sopenharmony_cistatic const struct vm_special_mapping vvar_mapping = { 2538c2ecf20Sopenharmony_ci .name = "[vvar]", 2548c2ecf20Sopenharmony_ci .fault = vvar_fault, 2558c2ecf20Sopenharmony_ci .mremap = vvar_mremap, 2568c2ecf20Sopenharmony_ci}; 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci/* 2598c2ecf20Sopenharmony_ci * Add vdso and vvar mappings to current process. 2608c2ecf20Sopenharmony_ci * @image - blob to map 2618c2ecf20Sopenharmony_ci * @addr - request a specific address (zero to map at free addr) 2628c2ecf20Sopenharmony_ci */ 2638c2ecf20Sopenharmony_cistatic int map_vdso(const struct vdso_image *image, unsigned long addr) 2648c2ecf20Sopenharmony_ci{ 2658c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 2668c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 2678c2ecf20Sopenharmony_ci unsigned long text_start; 2688c2ecf20Sopenharmony_ci int ret = 0; 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci if (mmap_write_lock_killable(mm)) 2718c2ecf20Sopenharmony_ci return -EINTR; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci addr = get_unmapped_area(NULL, addr, 2748c2ecf20Sopenharmony_ci image->size - image->sym_vvar_start, 0, 0); 2758c2ecf20Sopenharmony_ci if (IS_ERR_VALUE(addr)) { 2768c2ecf20Sopenharmony_ci ret = addr; 2778c2ecf20Sopenharmony_ci goto up_fail; 2788c2ecf20Sopenharmony_ci } 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci text_start = addr - image->sym_vvar_start; 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci /* 2838c2ecf20Sopenharmony_ci * MAYWRITE to allow gdb to COW and set breakpoints 2848c2ecf20Sopenharmony_ci */ 2858c2ecf20Sopenharmony_ci vma = _install_special_mapping(mm, 2868c2ecf20Sopenharmony_ci text_start, 2878c2ecf20Sopenharmony_ci image->size, 2888c2ecf20Sopenharmony_ci VM_READ|VM_EXEC| 2898c2ecf20Sopenharmony_ci VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 2908c2ecf20Sopenharmony_ci &vdso_mapping); 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci if (IS_ERR(vma)) { 2938c2ecf20Sopenharmony_ci ret = PTR_ERR(vma); 2948c2ecf20Sopenharmony_ci goto up_fail; 2958c2ecf20Sopenharmony_ci } 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci vma = _install_special_mapping(mm, 2988c2ecf20Sopenharmony_ci addr, 2998c2ecf20Sopenharmony_ci -image->sym_vvar_start, 3008c2ecf20Sopenharmony_ci VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| 3018c2ecf20Sopenharmony_ci VM_PFNMAP, 3028c2ecf20Sopenharmony_ci &vvar_mapping); 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci if (IS_ERR(vma)) { 3058c2ecf20Sopenharmony_ci ret = PTR_ERR(vma); 3068c2ecf20Sopenharmony_ci do_munmap(mm, text_start, image->size, NULL); 3078c2ecf20Sopenharmony_ci } else { 3088c2ecf20Sopenharmony_ci current->mm->context.vdso = (void __user *)text_start; 3098c2ecf20Sopenharmony_ci current->mm->context.vdso_image = image; 3108c2ecf20Sopenharmony_ci } 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ciup_fail: 3138c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 3148c2ecf20Sopenharmony_ci return ret; 3158c2ecf20Sopenharmony_ci} 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 3188c2ecf20Sopenharmony_ci/* 3198c2ecf20Sopenharmony_ci * Put the vdso above the (randomized) stack with another randomized 3208c2ecf20Sopenharmony_ci * offset. This way there is no hole in the middle of address space. 3218c2ecf20Sopenharmony_ci * To save memory make sure it is still in the same PTE as the stack 3228c2ecf20Sopenharmony_ci * top. This doesn't give that many random bits. 3238c2ecf20Sopenharmony_ci * 3248c2ecf20Sopenharmony_ci * Note that this algorithm is imperfect: the distribution of the vdso 3258c2ecf20Sopenharmony_ci * start address within a PMD is biased toward the end. 3268c2ecf20Sopenharmony_ci * 3278c2ecf20Sopenharmony_ci * Only used for the 64-bit and x32 vdsos. 3288c2ecf20Sopenharmony_ci */ 3298c2ecf20Sopenharmony_cistatic unsigned long vdso_addr(unsigned long start, unsigned len) 3308c2ecf20Sopenharmony_ci{ 3318c2ecf20Sopenharmony_ci unsigned long addr, end; 3328c2ecf20Sopenharmony_ci unsigned offset; 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci /* 3358c2ecf20Sopenharmony_ci * Round up the start address. It can start out unaligned as a result 3368c2ecf20Sopenharmony_ci * of stack start randomization. 3378c2ecf20Sopenharmony_ci */ 3388c2ecf20Sopenharmony_ci start = PAGE_ALIGN(start); 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci /* Round the lowest possible end address up to a PMD boundary. */ 3418c2ecf20Sopenharmony_ci end = (start + len + PMD_SIZE - 1) & PMD_MASK; 3428c2ecf20Sopenharmony_ci if (end >= DEFAULT_MAP_WINDOW) 3438c2ecf20Sopenharmony_ci end = DEFAULT_MAP_WINDOW; 3448c2ecf20Sopenharmony_ci end -= len; 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci if (end > start) { 3478c2ecf20Sopenharmony_ci offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); 3488c2ecf20Sopenharmony_ci addr = start + (offset << PAGE_SHIFT); 3498c2ecf20Sopenharmony_ci } else { 3508c2ecf20Sopenharmony_ci addr = start; 3518c2ecf20Sopenharmony_ci } 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci /* 3548c2ecf20Sopenharmony_ci * Forcibly align the final address in case we have a hardware 3558c2ecf20Sopenharmony_ci * issue that requires alignment for performance reasons. 3568c2ecf20Sopenharmony_ci */ 3578c2ecf20Sopenharmony_ci addr = align_vdso_addr(addr); 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci return addr; 3608c2ecf20Sopenharmony_ci} 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_cistatic int map_vdso_randomized(const struct vdso_image *image) 3638c2ecf20Sopenharmony_ci{ 3648c2ecf20Sopenharmony_ci unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci return map_vdso(image, addr); 3678c2ecf20Sopenharmony_ci} 3688c2ecf20Sopenharmony_ci#endif 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ciint map_vdso_once(const struct vdso_image *image, unsigned long addr) 3718c2ecf20Sopenharmony_ci{ 3728c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 3738c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci mmap_write_lock(mm); 3768c2ecf20Sopenharmony_ci /* 3778c2ecf20Sopenharmony_ci * Check if we have already mapped vdso blob - fail to prevent 3788c2ecf20Sopenharmony_ci * abusing from userspace install_speciall_mapping, which may 3798c2ecf20Sopenharmony_ci * not do accounting and rlimit right. 3808c2ecf20Sopenharmony_ci * We could search vma near context.vdso, but it's a slowpath, 3818c2ecf20Sopenharmony_ci * so let's explicitly check all VMAs to be completely sure. 3828c2ecf20Sopenharmony_ci */ 3838c2ecf20Sopenharmony_ci for (vma = mm->mmap; vma; vma = vma->vm_next) { 3848c2ecf20Sopenharmony_ci if (vma_is_special_mapping(vma, &vdso_mapping) || 3858c2ecf20Sopenharmony_ci vma_is_special_mapping(vma, &vvar_mapping)) { 3868c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 3878c2ecf20Sopenharmony_ci return -EEXIST; 3888c2ecf20Sopenharmony_ci } 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci return map_vdso(image, addr); 3938c2ecf20Sopenharmony_ci} 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) 3968c2ecf20Sopenharmony_cistatic int load_vdso32(void) 3978c2ecf20Sopenharmony_ci{ 3988c2ecf20Sopenharmony_ci if (vdso32_enabled != 1) /* Other values all mean "disabled" */ 3998c2ecf20Sopenharmony_ci return 0; 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci return map_vdso(&vdso_image_32, 0); 4028c2ecf20Sopenharmony_ci} 4038c2ecf20Sopenharmony_ci#endif 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 4068c2ecf20Sopenharmony_ciint arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci if (!vdso64_enabled) 4098c2ecf20Sopenharmony_ci return 0; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci return map_vdso_randomized(&vdso_image_64); 4128c2ecf20Sopenharmony_ci} 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 4158c2ecf20Sopenharmony_ciint compat_arch_setup_additional_pages(struct linux_binprm *bprm, 4168c2ecf20Sopenharmony_ci int uses_interp) 4178c2ecf20Sopenharmony_ci{ 4188c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_X32_ABI 4198c2ecf20Sopenharmony_ci if (test_thread_flag(TIF_X32)) { 4208c2ecf20Sopenharmony_ci if (!vdso64_enabled) 4218c2ecf20Sopenharmony_ci return 0; 4228c2ecf20Sopenharmony_ci return map_vdso_randomized(&vdso_image_x32); 4238c2ecf20Sopenharmony_ci } 4248c2ecf20Sopenharmony_ci#endif 4258c2ecf20Sopenharmony_ci#ifdef CONFIG_IA32_EMULATION 4268c2ecf20Sopenharmony_ci return load_vdso32(); 4278c2ecf20Sopenharmony_ci#else 4288c2ecf20Sopenharmony_ci return 0; 4298c2ecf20Sopenharmony_ci#endif 4308c2ecf20Sopenharmony_ci} 4318c2ecf20Sopenharmony_ci#endif 4328c2ecf20Sopenharmony_ci#else 4338c2ecf20Sopenharmony_ciint arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 4348c2ecf20Sopenharmony_ci{ 4358c2ecf20Sopenharmony_ci return load_vdso32(); 4368c2ecf20Sopenharmony_ci} 4378c2ecf20Sopenharmony_ci#endif 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 4408c2ecf20Sopenharmony_cistatic __init int vdso_setup(char *s) 4418c2ecf20Sopenharmony_ci{ 4428c2ecf20Sopenharmony_ci vdso64_enabled = simple_strtoul(s, NULL, 0); 4438c2ecf20Sopenharmony_ci return 1; 4448c2ecf20Sopenharmony_ci} 4458c2ecf20Sopenharmony_ci__setup("vdso=", vdso_setup); 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_cistatic int __init init_vdso(void) 4488c2ecf20Sopenharmony_ci{ 4498c2ecf20Sopenharmony_ci BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32); 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci init_vdso_image(&vdso_image_64); 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_X32_ABI 4548c2ecf20Sopenharmony_ci init_vdso_image(&vdso_image_x32); 4558c2ecf20Sopenharmony_ci#endif 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci return 0; 4588c2ecf20Sopenharmony_ci} 4598c2ecf20Sopenharmony_cisubsys_initcall(init_vdso); 4608c2ecf20Sopenharmony_ci#endif /* CONFIG_X86_64 */ 461