18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Flexible mmap layout support 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Based on code by Ingo Molnar and Andi Kleen, copyrighted 68c2ecf20Sopenharmony_ci * as follows: 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Copyright 2003-2009 Red Hat Inc. 98c2ecf20Sopenharmony_ci * All Rights Reserved. 108c2ecf20Sopenharmony_ci * Copyright 2005 Andi Kleen, SUSE Labs. 118c2ecf20Sopenharmony_ci * Copyright 2007 Jiri Kosina, SUSE Labs. 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <linux/personality.h> 158c2ecf20Sopenharmony_ci#include <linux/mm.h> 168c2ecf20Sopenharmony_ci#include <linux/random.h> 178c2ecf20Sopenharmony_ci#include <linux/limits.h> 188c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 198c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 208c2ecf20Sopenharmony_ci#include <linux/compat.h> 218c2ecf20Sopenharmony_ci#include <linux/elf-randomize.h> 228c2ecf20Sopenharmony_ci#include <asm/elf.h> 238c2ecf20Sopenharmony_ci#include <asm/io.h> 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include "physaddr.h" 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_cistruct va_alignment __read_mostly va_align = { 288c2ecf20Sopenharmony_ci .flags = -1, 298c2ecf20Sopenharmony_ci}; 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ciunsigned long task_size_32bit(void) 328c2ecf20Sopenharmony_ci{ 338c2ecf20Sopenharmony_ci return IA32_PAGE_OFFSET; 348c2ecf20Sopenharmony_ci} 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ciunsigned long task_size_64bit(int full_addr_space) 378c2ecf20Sopenharmony_ci{ 388c2ecf20Sopenharmony_ci return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW; 398c2ecf20Sopenharmony_ci} 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_cistatic unsigned long stack_maxrandom_size(unsigned long task_size) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci unsigned long max = 0; 448c2ecf20Sopenharmony_ci if (current->flags & PF_RANDOMIZE) { 458c2ecf20Sopenharmony_ci max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit()); 468c2ecf20Sopenharmony_ci max <<= PAGE_SHIFT; 478c2ecf20Sopenharmony_ci } 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci return max; 508c2ecf20Sopenharmony_ci} 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 538c2ecf20Sopenharmony_ci# define mmap32_rnd_bits mmap_rnd_compat_bits 548c2ecf20Sopenharmony_ci# define mmap64_rnd_bits mmap_rnd_bits 558c2ecf20Sopenharmony_ci#else 568c2ecf20Sopenharmony_ci# define mmap32_rnd_bits mmap_rnd_bits 578c2ecf20Sopenharmony_ci# define mmap64_rnd_bits mmap_rnd_bits 588c2ecf20Sopenharmony_ci#endif 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci#define SIZE_128M (128 * 1024 * 1024UL) 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_cistatic int mmap_is_legacy(void) 638c2ecf20Sopenharmony_ci{ 648c2ecf20Sopenharmony_ci if (current->personality & ADDR_COMPAT_LAYOUT) 658c2ecf20Sopenharmony_ci return 1; 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci return sysctl_legacy_va_layout; 688c2ecf20Sopenharmony_ci} 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_cistatic unsigned long arch_rnd(unsigned int rndbits) 718c2ecf20Sopenharmony_ci{ 728c2ecf20Sopenharmony_ci if (!(current->flags & PF_RANDOMIZE)) 738c2ecf20Sopenharmony_ci return 0; 748c2ecf20Sopenharmony_ci return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT; 758c2ecf20Sopenharmony_ci} 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ciunsigned long arch_mmap_rnd(void) 788c2ecf20Sopenharmony_ci{ 798c2ecf20Sopenharmony_ci return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); 808c2ecf20Sopenharmony_ci} 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_cistatic unsigned long mmap_base(unsigned long rnd, unsigned long task_size, 838c2ecf20Sopenharmony_ci struct rlimit *rlim_stack) 848c2ecf20Sopenharmony_ci{ 858c2ecf20Sopenharmony_ci unsigned long gap = rlim_stack->rlim_cur; 868c2ecf20Sopenharmony_ci unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; 878c2ecf20Sopenharmony_ci unsigned long gap_min, gap_max; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci /* Values close to RLIM_INFINITY can overflow. */ 908c2ecf20Sopenharmony_ci if (gap + pad > gap) 918c2ecf20Sopenharmony_ci gap += pad; 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci /* 948c2ecf20Sopenharmony_ci * Top of mmap area (just below the process stack). 958c2ecf20Sopenharmony_ci * Leave an at least ~128 MB hole with possible stack randomization. 968c2ecf20Sopenharmony_ci */ 978c2ecf20Sopenharmony_ci gap_min = SIZE_128M; 988c2ecf20Sopenharmony_ci gap_max = (task_size / 6) * 5; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci if (gap < gap_min) 1018c2ecf20Sopenharmony_ci gap = gap_min; 1028c2ecf20Sopenharmony_ci else if (gap > gap_max) 1038c2ecf20Sopenharmony_ci gap = gap_max; 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci return PAGE_ALIGN(task_size - gap - rnd); 1068c2ecf20Sopenharmony_ci} 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_cistatic unsigned long mmap_legacy_base(unsigned long rnd, 1098c2ecf20Sopenharmony_ci unsigned long task_size) 1108c2ecf20Sopenharmony_ci{ 1118c2ecf20Sopenharmony_ci return __TASK_UNMAPPED_BASE(task_size) + rnd; 1128c2ecf20Sopenharmony_ci} 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci/* 1158c2ecf20Sopenharmony_ci * This function, called very early during the creation of a new 1168c2ecf20Sopenharmony_ci * process VM image, sets up which VM layout function to use: 1178c2ecf20Sopenharmony_ci */ 1188c2ecf20Sopenharmony_cistatic void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, 1198c2ecf20Sopenharmony_ci unsigned long random_factor, unsigned long task_size, 1208c2ecf20Sopenharmony_ci struct rlimit *rlim_stack) 1218c2ecf20Sopenharmony_ci{ 1228c2ecf20Sopenharmony_ci *legacy_base = mmap_legacy_base(random_factor, task_size); 1238c2ecf20Sopenharmony_ci if (mmap_is_legacy()) 1248c2ecf20Sopenharmony_ci *base = *legacy_base; 1258c2ecf20Sopenharmony_ci else 1268c2ecf20Sopenharmony_ci *base = mmap_base(random_factor, task_size, rlim_stack); 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_civoid arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci if (mmap_is_legacy()) 1328c2ecf20Sopenharmony_ci mm->get_unmapped_area = arch_get_unmapped_area; 1338c2ecf20Sopenharmony_ci else 1348c2ecf20Sopenharmony_ci mm->get_unmapped_area = arch_get_unmapped_area_topdown; 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, 1378c2ecf20Sopenharmony_ci arch_rnd(mmap64_rnd_bits), task_size_64bit(0), 1388c2ecf20Sopenharmony_ci rlim_stack); 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES 1418c2ecf20Sopenharmony_ci /* 1428c2ecf20Sopenharmony_ci * The mmap syscall mapping base decision depends solely on the 1438c2ecf20Sopenharmony_ci * syscall type (64-bit or compat). This applies for 64bit 1448c2ecf20Sopenharmony_ci * applications and 32bit applications. The 64bit syscall uses 1458c2ecf20Sopenharmony_ci * mmap_base, the compat syscall uses mmap_compat_base. 1468c2ecf20Sopenharmony_ci */ 1478c2ecf20Sopenharmony_ci arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, 1488c2ecf20Sopenharmony_ci arch_rnd(mmap32_rnd_bits), task_size_32bit(), 1498c2ecf20Sopenharmony_ci rlim_stack); 1508c2ecf20Sopenharmony_ci#endif 1518c2ecf20Sopenharmony_ci} 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ciunsigned long get_mmap_base(int is_legacy) 1548c2ecf20Sopenharmony_ci{ 1558c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES 1588c2ecf20Sopenharmony_ci if (in_32bit_syscall()) { 1598c2ecf20Sopenharmony_ci return is_legacy ? mm->mmap_compat_legacy_base 1608c2ecf20Sopenharmony_ci : mm->mmap_compat_base; 1618c2ecf20Sopenharmony_ci } 1628c2ecf20Sopenharmony_ci#endif 1638c2ecf20Sopenharmony_ci return is_legacy ? mm->mmap_legacy_base : mm->mmap_base; 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ciconst char *arch_vma_name(struct vm_area_struct *vma) 1678c2ecf20Sopenharmony_ci{ 1688c2ecf20Sopenharmony_ci return NULL; 1698c2ecf20Sopenharmony_ci} 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci/** 1728c2ecf20Sopenharmony_ci * mmap_address_hint_valid - Validate the address hint of mmap 1738c2ecf20Sopenharmony_ci * @addr: Address hint 1748c2ecf20Sopenharmony_ci * @len: Mapping length 1758c2ecf20Sopenharmony_ci * 1768c2ecf20Sopenharmony_ci * Check whether @addr and @addr + @len result in a valid mapping. 1778c2ecf20Sopenharmony_ci * 1788c2ecf20Sopenharmony_ci * On 32bit this only checks whether @addr + @len is <= TASK_SIZE. 1798c2ecf20Sopenharmony_ci * 1808c2ecf20Sopenharmony_ci * On 64bit with 5-level page tables another sanity check is required 1818c2ecf20Sopenharmony_ci * because mappings requested by mmap(@addr, 0) which cross the 47-bit 1828c2ecf20Sopenharmony_ci * virtual address boundary can cause the following theoretical issue: 1838c2ecf20Sopenharmony_ci * 1848c2ecf20Sopenharmony_ci * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr 1858c2ecf20Sopenharmony_ci * is below the border of the 47-bit address space and @addr + @len is 1868c2ecf20Sopenharmony_ci * above the border. 1878c2ecf20Sopenharmony_ci * 1888c2ecf20Sopenharmony_ci * With 4-level paging this request succeeds, but the resulting mapping 1898c2ecf20Sopenharmony_ci * address will always be within the 47-bit virtual address space, because 1908c2ecf20Sopenharmony_ci * the hint address does not result in a valid mapping and is 1918c2ecf20Sopenharmony_ci * ignored. Hence applications which are not prepared to handle virtual 1928c2ecf20Sopenharmony_ci * addresses above 47-bit work correctly. 1938c2ecf20Sopenharmony_ci * 1948c2ecf20Sopenharmony_ci * With 5-level paging this request would be granted and result in a 1958c2ecf20Sopenharmony_ci * mapping which crosses the border of the 47-bit virtual address 1968c2ecf20Sopenharmony_ci * space. If the application cannot handle addresses above 47-bit this 1978c2ecf20Sopenharmony_ci * will lead to misbehaviour and hard to diagnose failures. 1988c2ecf20Sopenharmony_ci * 1998c2ecf20Sopenharmony_ci * Therefore ignore address hints which would result in a mapping crossing 2008c2ecf20Sopenharmony_ci * the 47-bit virtual address boundary. 2018c2ecf20Sopenharmony_ci * 2028c2ecf20Sopenharmony_ci * Note, that in the same scenario with MAP_FIXED the behaviour is 2038c2ecf20Sopenharmony_ci * different. The request with @addr < 47-bit and @addr + @len > 47-bit 2048c2ecf20Sopenharmony_ci * fails on a 4-level paging machine but succeeds on a 5-level paging 2058c2ecf20Sopenharmony_ci * machine. It is reasonable to expect that an application does not rely on 2068c2ecf20Sopenharmony_ci * the failure of such a fixed mapping request, so the restriction is not 2078c2ecf20Sopenharmony_ci * applied. 2088c2ecf20Sopenharmony_ci */ 2098c2ecf20Sopenharmony_cibool mmap_address_hint_valid(unsigned long addr, unsigned long len) 2108c2ecf20Sopenharmony_ci{ 2118c2ecf20Sopenharmony_ci if (TASK_SIZE - len < addr) 2128c2ecf20Sopenharmony_ci return false; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW); 2158c2ecf20Sopenharmony_ci} 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci/* Can we access it for direct reading/writing? Must be RAM: */ 2188c2ecf20Sopenharmony_ciint valid_phys_addr_range(phys_addr_t addr, size_t count) 2198c2ecf20Sopenharmony_ci{ 2208c2ecf20Sopenharmony_ci return addr + count - 1 <= __pa(high_memory - 1); 2218c2ecf20Sopenharmony_ci} 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci/* Can we access it through mmap? Must be a valid physical address: */ 2248c2ecf20Sopenharmony_ciint valid_mmap_phys_addr_range(unsigned long pfn, size_t count) 2258c2ecf20Sopenharmony_ci{ 2268c2ecf20Sopenharmony_ci phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci return phys_addr_valid(addr + count - 1); 2298c2ecf20Sopenharmony_ci} 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci/* 2328c2ecf20Sopenharmony_ci * Only allow root to set high MMIO mappings to PROT_NONE. 2338c2ecf20Sopenharmony_ci * This prevents an unpriv. user to set them to PROT_NONE and invert 2348c2ecf20Sopenharmony_ci * them, then pointing to valid memory for L1TF speculation. 2358c2ecf20Sopenharmony_ci * 2368c2ecf20Sopenharmony_ci * Note: for locked down kernels may want to disable the root override. 2378c2ecf20Sopenharmony_ci */ 2388c2ecf20Sopenharmony_cibool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) 2398c2ecf20Sopenharmony_ci{ 2408c2ecf20Sopenharmony_ci if (!boot_cpu_has_bug(X86_BUG_L1TF)) 2418c2ecf20Sopenharmony_ci return true; 2428c2ecf20Sopenharmony_ci if (!__pte_needs_invert(pgprot_val(prot))) 2438c2ecf20Sopenharmony_ci return true; 2448c2ecf20Sopenharmony_ci /* If it's real memory always allow */ 2458c2ecf20Sopenharmony_ci if (pfn_valid(pfn)) 2468c2ecf20Sopenharmony_ci return true; 2478c2ecf20Sopenharmony_ci if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) 2488c2ecf20Sopenharmony_ci return false; 2498c2ecf20Sopenharmony_ci return true; 2508c2ecf20Sopenharmony_ci} 251