18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * PowerPC version 48c2ecf20Sopenharmony_ci * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Derived from "arch/i386/mm/fault.c" 78c2ecf20Sopenharmony_ci * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Modified by Cort Dougan and Paul Mackerras. 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <linux/signal.h> 158c2ecf20Sopenharmony_ci#include <linux/sched.h> 168c2ecf20Sopenharmony_ci#include <linux/sched/task_stack.h> 178c2ecf20Sopenharmony_ci#include <linux/kernel.h> 188c2ecf20Sopenharmony_ci#include <linux/errno.h> 198c2ecf20Sopenharmony_ci#include <linux/string.h> 208c2ecf20Sopenharmony_ci#include <linux/types.h> 218c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 228c2ecf20Sopenharmony_ci#include <linux/ptrace.h> 238c2ecf20Sopenharmony_ci#include <linux/mman.h> 248c2ecf20Sopenharmony_ci#include <linux/mm.h> 258c2ecf20Sopenharmony_ci#include <linux/interrupt.h> 268c2ecf20Sopenharmony_ci#include <linux/highmem.h> 278c2ecf20Sopenharmony_ci#include <linux/extable.h> 288c2ecf20Sopenharmony_ci#include <linux/kprobes.h> 298c2ecf20Sopenharmony_ci#include <linux/kdebug.h> 308c2ecf20Sopenharmony_ci#include <linux/perf_event.h> 318c2ecf20Sopenharmony_ci#include <linux/ratelimit.h> 328c2ecf20Sopenharmony_ci#include <linux/context_tracking.h> 338c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 348c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci#include <asm/firmware.h> 378c2ecf20Sopenharmony_ci#include <asm/page.h> 388c2ecf20Sopenharmony_ci#include <asm/mmu.h> 398c2ecf20Sopenharmony_ci#include <asm/mmu_context.h> 408c2ecf20Sopenharmony_ci#include <asm/siginfo.h> 418c2ecf20Sopenharmony_ci#include <asm/debug.h> 428c2ecf20Sopenharmony_ci#include <asm/kup.h> 438c2ecf20Sopenharmony_ci#include <asm/inst.h> 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci/* 478c2ecf20Sopenharmony_ci * do_page_fault error handling helpers 488c2ecf20Sopenharmony_ci */ 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_cistatic int 518c2ecf20Sopenharmony_ci__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) 528c2ecf20Sopenharmony_ci{ 538c2ecf20Sopenharmony_ci /* 548c2ecf20Sopenharmony_ci * If we are in kernel mode, bail out with a SEGV, this will 558c2ecf20Sopenharmony_ci * be caught by the assembly which will restore the non-volatile 568c2ecf20Sopenharmony_ci * registers before calling bad_page_fault() 578c2ecf20Sopenharmony_ci */ 588c2ecf20Sopenharmony_ci if (!user_mode(regs)) 598c2ecf20Sopenharmony_ci return SIGSEGV; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci _exception(SIGSEGV, regs, si_code, address); 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci return 0; 648c2ecf20Sopenharmony_ci} 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_cistatic noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address) 678c2ecf20Sopenharmony_ci{ 688c2ecf20Sopenharmony_ci return __bad_area_nosemaphore(regs, address, SEGV_MAPERR); 698c2ecf20Sopenharmony_ci} 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_cistatic int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) 728c2ecf20Sopenharmony_ci{ 738c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci /* 768c2ecf20Sopenharmony_ci * Something tried to access memory that isn't in our memory map.. 778c2ecf20Sopenharmony_ci * Fix it, but check if it's kernel or user first.. 788c2ecf20Sopenharmony_ci */ 798c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci return __bad_area_nosemaphore(regs, address, si_code); 828c2ecf20Sopenharmony_ci} 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_cistatic noinline int bad_area(struct pt_regs *regs, unsigned long address) 858c2ecf20Sopenharmony_ci{ 868c2ecf20Sopenharmony_ci return __bad_area(regs, address, SEGV_MAPERR); 878c2ecf20Sopenharmony_ci} 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_MEM_KEYS 908c2ecf20Sopenharmony_cistatic noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, 918c2ecf20Sopenharmony_ci struct vm_area_struct *vma) 928c2ecf20Sopenharmony_ci{ 938c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 948c2ecf20Sopenharmony_ci int pkey; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci /* 978c2ecf20Sopenharmony_ci * We don't try to fetch the pkey from page table because reading 988c2ecf20Sopenharmony_ci * page table without locking doesn't guarantee stable pte value. 998c2ecf20Sopenharmony_ci * Hence the pkey value that we return to userspace can be different 1008c2ecf20Sopenharmony_ci * from the pkey that actually caused access error. 1018c2ecf20Sopenharmony_ci * 1028c2ecf20Sopenharmony_ci * It does *not* guarantee that the VMA we find here 1038c2ecf20Sopenharmony_ci * was the one that we faulted on. 1048c2ecf20Sopenharmony_ci * 1058c2ecf20Sopenharmony_ci * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); 1068c2ecf20Sopenharmony_ci * 2. T1 : set AMR to deny access to pkey=4, touches, page 1078c2ecf20Sopenharmony_ci * 3. T1 : faults... 1088c2ecf20Sopenharmony_ci * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); 1098c2ecf20Sopenharmony_ci * 5. T1 : enters fault handler, takes mmap_lock, etc... 1108c2ecf20Sopenharmony_ci * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really 1118c2ecf20Sopenharmony_ci * faulted on a pte with its pkey=4. 1128c2ecf20Sopenharmony_ci */ 1138c2ecf20Sopenharmony_ci pkey = vma_pkey(vma); 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci /* 1188c2ecf20Sopenharmony_ci * If we are in kernel mode, bail out with a SEGV, this will 1198c2ecf20Sopenharmony_ci * be caught by the assembly which will restore the non-volatile 1208c2ecf20Sopenharmony_ci * registers before calling bad_page_fault() 1218c2ecf20Sopenharmony_ci */ 1228c2ecf20Sopenharmony_ci if (!user_mode(regs)) 1238c2ecf20Sopenharmony_ci return SIGSEGV; 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci _exception_pkey(regs, address, pkey); 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci return 0; 1288c2ecf20Sopenharmony_ci} 1298c2ecf20Sopenharmony_ci#endif 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_cistatic noinline int bad_access(struct pt_regs *regs, unsigned long address) 1328c2ecf20Sopenharmony_ci{ 1338c2ecf20Sopenharmony_ci return __bad_area(regs, address, SEGV_ACCERR); 1348c2ecf20Sopenharmony_ci} 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_cistatic int do_sigbus(struct pt_regs *regs, unsigned long address, 1378c2ecf20Sopenharmony_ci vm_fault_t fault) 1388c2ecf20Sopenharmony_ci{ 1398c2ecf20Sopenharmony_ci if (!user_mode(regs)) 1408c2ecf20Sopenharmony_ci return SIGBUS; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci current->thread.trap_nr = BUS_ADRERR; 1438c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE 1448c2ecf20Sopenharmony_ci if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { 1458c2ecf20Sopenharmony_ci unsigned int lsb = 0; /* shutup gcc */ 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", 1488c2ecf20Sopenharmony_ci current->comm, current->pid, address); 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci if (fault & VM_FAULT_HWPOISON_LARGE) 1518c2ecf20Sopenharmony_ci lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); 1528c2ecf20Sopenharmony_ci if (fault & VM_FAULT_HWPOISON) 1538c2ecf20Sopenharmony_ci lsb = PAGE_SHIFT; 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb); 1568c2ecf20Sopenharmony_ci return 0; 1578c2ecf20Sopenharmony_ci } 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci#endif 1608c2ecf20Sopenharmony_ci force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); 1618c2ecf20Sopenharmony_ci return 0; 1628c2ecf20Sopenharmony_ci} 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_cistatic int mm_fault_error(struct pt_regs *regs, unsigned long addr, 1658c2ecf20Sopenharmony_ci vm_fault_t fault) 1668c2ecf20Sopenharmony_ci{ 1678c2ecf20Sopenharmony_ci /* 1688c2ecf20Sopenharmony_ci * Kernel page fault interrupted by SIGKILL. We have no reason to 1698c2ecf20Sopenharmony_ci * continue processing. 1708c2ecf20Sopenharmony_ci */ 1718c2ecf20Sopenharmony_ci if (fatal_signal_pending(current) && !user_mode(regs)) 1728c2ecf20Sopenharmony_ci return SIGKILL; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci /* Out of memory */ 1758c2ecf20Sopenharmony_ci if (fault & VM_FAULT_OOM) { 1768c2ecf20Sopenharmony_ci /* 1778c2ecf20Sopenharmony_ci * We ran out of memory, or some other thing happened to us that 1788c2ecf20Sopenharmony_ci * made us unable to handle the page fault gracefully. 1798c2ecf20Sopenharmony_ci */ 1808c2ecf20Sopenharmony_ci if (!user_mode(regs)) 1818c2ecf20Sopenharmony_ci return SIGSEGV; 1828c2ecf20Sopenharmony_ci pagefault_out_of_memory(); 1838c2ecf20Sopenharmony_ci } else { 1848c2ecf20Sopenharmony_ci if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 1858c2ecf20Sopenharmony_ci VM_FAULT_HWPOISON_LARGE)) 1868c2ecf20Sopenharmony_ci return do_sigbus(regs, addr, fault); 1878c2ecf20Sopenharmony_ci else if (fault & VM_FAULT_SIGSEGV) 1888c2ecf20Sopenharmony_ci return bad_area_nosemaphore(regs, addr); 1898c2ecf20Sopenharmony_ci else 1908c2ecf20Sopenharmony_ci BUG(); 1918c2ecf20Sopenharmony_ci } 1928c2ecf20Sopenharmony_ci return 0; 1938c2ecf20Sopenharmony_ci} 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci/* Is this a bad kernel fault ? */ 1968c2ecf20Sopenharmony_cistatic bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, 1978c2ecf20Sopenharmony_ci unsigned long address, bool is_write) 1988c2ecf20Sopenharmony_ci{ 1998c2ecf20Sopenharmony_ci int is_exec = TRAP(regs) == 0x400; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci if (is_exec) { 2028c2ecf20Sopenharmony_ci pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n", 2038c2ecf20Sopenharmony_ci address >= TASK_SIZE ? "exec-protected" : "user", 2048c2ecf20Sopenharmony_ci address, 2058c2ecf20Sopenharmony_ci from_kuid(&init_user_ns, current_uid())); 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci // Kernel exec fault is always bad 2088c2ecf20Sopenharmony_ci return true; 2098c2ecf20Sopenharmony_ci } 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && 2128c2ecf20Sopenharmony_ci !search_exception_tables(regs->nip)) { 2138c2ecf20Sopenharmony_ci pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", 2148c2ecf20Sopenharmony_ci address, 2158c2ecf20Sopenharmony_ci from_kuid(&init_user_ns, current_uid())); 2168c2ecf20Sopenharmony_ci } 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci // Kernel fault on kernel address is bad 2198c2ecf20Sopenharmony_ci if (address >= TASK_SIZE) 2208c2ecf20Sopenharmony_ci return true; 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad 2238c2ecf20Sopenharmony_ci if (!search_exception_tables(regs->nip)) 2248c2ecf20Sopenharmony_ci return true; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci // Read/write fault in a valid region (the exception table search passed 2278c2ecf20Sopenharmony_ci // above), but blocked by KUAP is bad, it can never succeed. 2288c2ecf20Sopenharmony_ci if (bad_kuap_fault(regs, address, is_write)) 2298c2ecf20Sopenharmony_ci return true; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci // What's left? Kernel fault on user in well defined regions (extable 2328c2ecf20Sopenharmony_ci // matched), and allowed by KUAP in the faulting context. 2338c2ecf20Sopenharmony_ci return false; 2348c2ecf20Sopenharmony_ci} 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_MEM_KEYS 2378c2ecf20Sopenharmony_cistatic bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, 2388c2ecf20Sopenharmony_ci struct vm_area_struct *vma) 2398c2ecf20Sopenharmony_ci{ 2408c2ecf20Sopenharmony_ci /* 2418c2ecf20Sopenharmony_ci * Make sure to check the VMA so that we do not perform 2428c2ecf20Sopenharmony_ci * faults just to hit a pkey fault as soon as we fill in a 2438c2ecf20Sopenharmony_ci * page. Only called for current mm, hence foreign == 0 2448c2ecf20Sopenharmony_ci */ 2458c2ecf20Sopenharmony_ci if (!arch_vma_access_permitted(vma, is_write, is_exec, 0)) 2468c2ecf20Sopenharmony_ci return true; 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci return false; 2498c2ecf20Sopenharmony_ci} 2508c2ecf20Sopenharmony_ci#endif 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_cistatic bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) 2538c2ecf20Sopenharmony_ci{ 2548c2ecf20Sopenharmony_ci /* 2558c2ecf20Sopenharmony_ci * Allow execution from readable areas if the MMU does not 2568c2ecf20Sopenharmony_ci * provide separate controls over reading and executing. 2578c2ecf20Sopenharmony_ci * 2588c2ecf20Sopenharmony_ci * Note: That code used to not be enabled for 4xx/BookE. 2598c2ecf20Sopenharmony_ci * It is now as I/D cache coherency for these is done at 2608c2ecf20Sopenharmony_ci * set_pte_at() time and I see no reason why the test 2618c2ecf20Sopenharmony_ci * below wouldn't be valid on those processors. This -may- 2628c2ecf20Sopenharmony_ci * break programs compiled with a really old ABI though. 2638c2ecf20Sopenharmony_ci */ 2648c2ecf20Sopenharmony_ci if (is_exec) { 2658c2ecf20Sopenharmony_ci return !(vma->vm_flags & VM_EXEC) && 2668c2ecf20Sopenharmony_ci (cpu_has_feature(CPU_FTR_NOEXECUTE) || 2678c2ecf20Sopenharmony_ci !(vma->vm_flags & (VM_READ | VM_WRITE))); 2688c2ecf20Sopenharmony_ci } 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci if (is_write) { 2718c2ecf20Sopenharmony_ci if (unlikely(!(vma->vm_flags & VM_WRITE))) 2728c2ecf20Sopenharmony_ci return true; 2738c2ecf20Sopenharmony_ci return false; 2748c2ecf20Sopenharmony_ci } 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci if (unlikely(!vma_is_accessible(vma))) 2778c2ecf20Sopenharmony_ci return true; 2788c2ecf20Sopenharmony_ci /* 2798c2ecf20Sopenharmony_ci * We should ideally do the vma pkey access check here. But in the 2808c2ecf20Sopenharmony_ci * fault path, handle_mm_fault() also does the same check. To avoid 2818c2ecf20Sopenharmony_ci * these multiple checks, we skip it here and handle access error due 2828c2ecf20Sopenharmony_ci * to pkeys later. 2838c2ecf20Sopenharmony_ci */ 2848c2ecf20Sopenharmony_ci return false; 2858c2ecf20Sopenharmony_ci} 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_SMLPAR 2888c2ecf20Sopenharmony_cistatic inline void cmo_account_page_fault(void) 2898c2ecf20Sopenharmony_ci{ 2908c2ecf20Sopenharmony_ci if (firmware_has_feature(FW_FEATURE_CMO)) { 2918c2ecf20Sopenharmony_ci u32 page_ins; 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci preempt_disable(); 2948c2ecf20Sopenharmony_ci page_ins = be32_to_cpu(get_lppaca()->page_ins); 2958c2ecf20Sopenharmony_ci page_ins += 1 << PAGE_FACTOR; 2968c2ecf20Sopenharmony_ci get_lppaca()->page_ins = cpu_to_be32(page_ins); 2978c2ecf20Sopenharmony_ci preempt_enable(); 2988c2ecf20Sopenharmony_ci } 2998c2ecf20Sopenharmony_ci} 3008c2ecf20Sopenharmony_ci#else 3018c2ecf20Sopenharmony_cistatic inline void cmo_account_page_fault(void) { } 3028c2ecf20Sopenharmony_ci#endif /* CONFIG_PPC_SMLPAR */ 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_cistatic void sanity_check_fault(bool is_write, bool is_user, 3058c2ecf20Sopenharmony_ci unsigned long error_code, unsigned long address) 3068c2ecf20Sopenharmony_ci{ 3078c2ecf20Sopenharmony_ci /* 3088c2ecf20Sopenharmony_ci * Userspace trying to access kernel address, we get PROTFAULT for that. 3098c2ecf20Sopenharmony_ci */ 3108c2ecf20Sopenharmony_ci if (is_user && address >= TASK_SIZE) { 3118c2ecf20Sopenharmony_ci if ((long)address == -1) 3128c2ecf20Sopenharmony_ci return; 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n", 3158c2ecf20Sopenharmony_ci current->comm, current->pid, address, 3168c2ecf20Sopenharmony_ci from_kuid(&init_user_ns, current_uid())); 3178c2ecf20Sopenharmony_ci return; 3188c2ecf20Sopenharmony_ci } 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) 3218c2ecf20Sopenharmony_ci return; 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci /* 3248c2ecf20Sopenharmony_ci * For hash translation mode, we should never get a 3258c2ecf20Sopenharmony_ci * PROTFAULT. Any update to pte to reduce access will result in us 3268c2ecf20Sopenharmony_ci * removing the hash page table entry, thus resulting in a DSISR_NOHPTE 3278c2ecf20Sopenharmony_ci * fault instead of DSISR_PROTFAULT. 3288c2ecf20Sopenharmony_ci * 3298c2ecf20Sopenharmony_ci * A pte update to relax the access will not result in a hash page table 3308c2ecf20Sopenharmony_ci * entry invalidate and hence can result in DSISR_PROTFAULT. 3318c2ecf20Sopenharmony_ci * ptep_set_access_flags() doesn't do a hpte flush. This is why we have 3328c2ecf20Sopenharmony_ci * the special !is_write in the below conditional. 3338c2ecf20Sopenharmony_ci * 3348c2ecf20Sopenharmony_ci * For platforms that doesn't supports coherent icache and do support 3358c2ecf20Sopenharmony_ci * per page noexec bit, we do setup things such that we do the 3368c2ecf20Sopenharmony_ci * sync between D/I cache via fault. But that is handled via low level 3378c2ecf20Sopenharmony_ci * hash fault code (hash_page_do_lazy_icache()) and we should not reach 3388c2ecf20Sopenharmony_ci * here in such case. 3398c2ecf20Sopenharmony_ci * 3408c2ecf20Sopenharmony_ci * For wrong access that can result in PROTFAULT, the above vma->vm_flags 3418c2ecf20Sopenharmony_ci * check should handle those and hence we should fall to the bad_area 3428c2ecf20Sopenharmony_ci * handling correctly. 3438c2ecf20Sopenharmony_ci * 3448c2ecf20Sopenharmony_ci * For embedded with per page exec support that doesn't support coherent 3458c2ecf20Sopenharmony_ci * icache we do get PROTFAULT and we handle that D/I cache sync in 3468c2ecf20Sopenharmony_ci * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON 3478c2ecf20Sopenharmony_ci * is conditional for server MMU. 3488c2ecf20Sopenharmony_ci * 3498c2ecf20Sopenharmony_ci * For radix, we can get prot fault for autonuma case, because radix 3508c2ecf20Sopenharmony_ci * page table will have them marked noaccess for user. 3518c2ecf20Sopenharmony_ci */ 3528c2ecf20Sopenharmony_ci if (radix_enabled() || is_write) 3538c2ecf20Sopenharmony_ci return; 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci WARN_ON_ONCE(error_code & DSISR_PROTFAULT); 3568c2ecf20Sopenharmony_ci} 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci/* 3598c2ecf20Sopenharmony_ci * Define the correct "is_write" bit in error_code based 3608c2ecf20Sopenharmony_ci * on the processor family 3618c2ecf20Sopenharmony_ci */ 3628c2ecf20Sopenharmony_ci#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) 3638c2ecf20Sopenharmony_ci#define page_fault_is_write(__err) ((__err) & ESR_DST) 3648c2ecf20Sopenharmony_ci#define page_fault_is_bad(__err) (0) 3658c2ecf20Sopenharmony_ci#else 3668c2ecf20Sopenharmony_ci#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE) 3678c2ecf20Sopenharmony_ci#if defined(CONFIG_PPC_8xx) 3688c2ecf20Sopenharmony_ci#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G) 3698c2ecf20Sopenharmony_ci#elif defined(CONFIG_PPC64) 3708c2ecf20Sopenharmony_ci#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S) 3718c2ecf20Sopenharmony_ci#else 3728c2ecf20Sopenharmony_ci#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) 3738c2ecf20Sopenharmony_ci#endif 3748c2ecf20Sopenharmony_ci#endif 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci/* 3778c2ecf20Sopenharmony_ci * For 600- and 800-family processors, the error_code parameter is DSISR 3788c2ecf20Sopenharmony_ci * for a data fault, SRR1 for an instruction fault. For 400-family processors 3798c2ecf20Sopenharmony_ci * the error_code parameter is ESR for a data fault, 0 for an instruction 3808c2ecf20Sopenharmony_ci * fault. 3818c2ecf20Sopenharmony_ci * For 64-bit processors, the error_code parameter is 3828c2ecf20Sopenharmony_ci * - DSISR for a non-SLB data access fault, 3838c2ecf20Sopenharmony_ci * - SRR1 & 0x08000000 for a non-SLB instruction access fault 3848c2ecf20Sopenharmony_ci * - 0 any SLB fault. 3858c2ecf20Sopenharmony_ci * 3868c2ecf20Sopenharmony_ci * The return value is 0 if the fault was handled, or the signal 3878c2ecf20Sopenharmony_ci * number if this is a kernel fault that can't be handled here. 3888c2ecf20Sopenharmony_ci */ 3898c2ecf20Sopenharmony_cistatic int __do_page_fault(struct pt_regs *regs, unsigned long address, 3908c2ecf20Sopenharmony_ci unsigned long error_code) 3918c2ecf20Sopenharmony_ci{ 3928c2ecf20Sopenharmony_ci struct vm_area_struct * vma; 3938c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 3948c2ecf20Sopenharmony_ci unsigned int flags = FAULT_FLAG_DEFAULT; 3958c2ecf20Sopenharmony_ci int is_exec = TRAP(regs) == 0x400; 3968c2ecf20Sopenharmony_ci int is_user = user_mode(regs); 3978c2ecf20Sopenharmony_ci int is_write = page_fault_is_write(error_code); 3988c2ecf20Sopenharmony_ci vm_fault_t fault, major = 0; 3998c2ecf20Sopenharmony_ci bool kprobe_fault = kprobe_page_fault(regs, 11); 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) 4028c2ecf20Sopenharmony_ci return 0; 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci if (unlikely(page_fault_is_bad(error_code))) { 4058c2ecf20Sopenharmony_ci if (is_user) { 4068c2ecf20Sopenharmony_ci _exception(SIGBUS, regs, BUS_OBJERR, address); 4078c2ecf20Sopenharmony_ci return 0; 4088c2ecf20Sopenharmony_ci } 4098c2ecf20Sopenharmony_ci return SIGBUS; 4108c2ecf20Sopenharmony_ci } 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci /* Additional sanity check(s) */ 4138c2ecf20Sopenharmony_ci sanity_check_fault(is_write, is_user, error_code, address); 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci /* 4168c2ecf20Sopenharmony_ci * The kernel should never take an execute fault nor should it 4178c2ecf20Sopenharmony_ci * take a page fault to a kernel address or a page fault to a user 4188c2ecf20Sopenharmony_ci * address outside of dedicated places 4198c2ecf20Sopenharmony_ci */ 4208c2ecf20Sopenharmony_ci if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) 4218c2ecf20Sopenharmony_ci return SIGSEGV; 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci /* 4248c2ecf20Sopenharmony_ci * If we're in an interrupt, have no user context or are running 4258c2ecf20Sopenharmony_ci * in a region with pagefaults disabled then we must not take the fault 4268c2ecf20Sopenharmony_ci */ 4278c2ecf20Sopenharmony_ci if (unlikely(faulthandler_disabled() || !mm)) { 4288c2ecf20Sopenharmony_ci if (is_user) 4298c2ecf20Sopenharmony_ci printk_ratelimited(KERN_ERR "Page fault in user mode" 4308c2ecf20Sopenharmony_ci " with faulthandler_disabled()=%d" 4318c2ecf20Sopenharmony_ci " mm=%p\n", 4328c2ecf20Sopenharmony_ci faulthandler_disabled(), mm); 4338c2ecf20Sopenharmony_ci return bad_area_nosemaphore(regs, address); 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci /* We restore the interrupt state now */ 4378c2ecf20Sopenharmony_ci if (!arch_irq_disabled_regs(regs)) 4388c2ecf20Sopenharmony_ci local_irq_enable(); 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_ci perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci /* 4438c2ecf20Sopenharmony_ci * We want to do this outside mmap_lock, because reading code around nip 4448c2ecf20Sopenharmony_ci * can result in fault, which will cause a deadlock when called with 4458c2ecf20Sopenharmony_ci * mmap_lock held 4468c2ecf20Sopenharmony_ci */ 4478c2ecf20Sopenharmony_ci if (is_user) 4488c2ecf20Sopenharmony_ci flags |= FAULT_FLAG_USER; 4498c2ecf20Sopenharmony_ci if (is_write) 4508c2ecf20Sopenharmony_ci flags |= FAULT_FLAG_WRITE; 4518c2ecf20Sopenharmony_ci if (is_exec) 4528c2ecf20Sopenharmony_ci flags |= FAULT_FLAG_INSTRUCTION; 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci /* When running in the kernel we expect faults to occur only to 4558c2ecf20Sopenharmony_ci * addresses in user space. All other faults represent errors in the 4568c2ecf20Sopenharmony_ci * kernel and should generate an OOPS. Unfortunately, in the case of an 4578c2ecf20Sopenharmony_ci * erroneous fault occurring in a code path which already holds mmap_lock 4588c2ecf20Sopenharmony_ci * we will deadlock attempting to validate the fault against the 4598c2ecf20Sopenharmony_ci * address space. Luckily the kernel only validly references user 4608c2ecf20Sopenharmony_ci * space from well defined areas of code, which are listed in the 4618c2ecf20Sopenharmony_ci * exceptions table. 4628c2ecf20Sopenharmony_ci * 4638c2ecf20Sopenharmony_ci * As the vast majority of faults will be valid we will only perform 4648c2ecf20Sopenharmony_ci * the source reference check when there is a possibility of a deadlock. 4658c2ecf20Sopenharmony_ci * Attempt to lock the address space, if we cannot we then validate the 4668c2ecf20Sopenharmony_ci * source. If this is invalid we can skip the address space check, 4678c2ecf20Sopenharmony_ci * thus avoiding the deadlock. 4688c2ecf20Sopenharmony_ci */ 4698c2ecf20Sopenharmony_ci if (unlikely(!mmap_read_trylock(mm))) { 4708c2ecf20Sopenharmony_ci if (!is_user && !search_exception_tables(regs->nip)) 4718c2ecf20Sopenharmony_ci return bad_area_nosemaphore(regs, address); 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ciretry: 4748c2ecf20Sopenharmony_ci mmap_read_lock(mm); 4758c2ecf20Sopenharmony_ci } else { 4768c2ecf20Sopenharmony_ci /* 4778c2ecf20Sopenharmony_ci * The above down_read_trylock() might have succeeded in 4788c2ecf20Sopenharmony_ci * which case we'll have missed the might_sleep() from 4798c2ecf20Sopenharmony_ci * down_read(): 4808c2ecf20Sopenharmony_ci */ 4818c2ecf20Sopenharmony_ci might_sleep(); 4828c2ecf20Sopenharmony_ci } 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci vma = find_vma(mm, address); 4858c2ecf20Sopenharmony_ci if (unlikely(!vma)) 4868c2ecf20Sopenharmony_ci return bad_area(regs, address); 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci if (unlikely(vma->vm_start > address)) { 4898c2ecf20Sopenharmony_ci if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) 4908c2ecf20Sopenharmony_ci return bad_area(regs, address); 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci if (unlikely(expand_stack(vma, address))) 4938c2ecf20Sopenharmony_ci return bad_area(regs, address); 4948c2ecf20Sopenharmony_ci } 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_MEM_KEYS 4978c2ecf20Sopenharmony_ci if (unlikely(access_pkey_error(is_write, is_exec, 4988c2ecf20Sopenharmony_ci (error_code & DSISR_KEYFAULT), vma))) 4998c2ecf20Sopenharmony_ci return bad_access_pkey(regs, address, vma); 5008c2ecf20Sopenharmony_ci#endif /* CONFIG_PPC_MEM_KEYS */ 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci if (unlikely(access_error(is_write, is_exec, vma))) 5038c2ecf20Sopenharmony_ci return bad_access(regs, address); 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_ci /* 5068c2ecf20Sopenharmony_ci * If for any reason at all we couldn't handle the fault, 5078c2ecf20Sopenharmony_ci * make sure we exit gracefully rather than endlessly redo 5088c2ecf20Sopenharmony_ci * the fault. 5098c2ecf20Sopenharmony_ci */ 5108c2ecf20Sopenharmony_ci fault = handle_mm_fault(vma, address, flags, regs); 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci major |= fault & VM_FAULT_MAJOR; 5138c2ecf20Sopenharmony_ci 5148c2ecf20Sopenharmony_ci if (fault_signal_pending(fault, regs)) 5158c2ecf20Sopenharmony_ci return user_mode(regs) ? 0 : SIGBUS; 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci /* 5188c2ecf20Sopenharmony_ci * Handle the retry right now, the mmap_lock has been released in that 5198c2ecf20Sopenharmony_ci * case. 5208c2ecf20Sopenharmony_ci */ 5218c2ecf20Sopenharmony_ci if (unlikely(fault & VM_FAULT_RETRY)) { 5228c2ecf20Sopenharmony_ci if (flags & FAULT_FLAG_ALLOW_RETRY) { 5238c2ecf20Sopenharmony_ci flags |= FAULT_FLAG_TRIED; 5248c2ecf20Sopenharmony_ci goto retry; 5258c2ecf20Sopenharmony_ci } 5268c2ecf20Sopenharmony_ci } 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_ci mmap_read_unlock(current->mm); 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci if (unlikely(fault & VM_FAULT_ERROR)) 5318c2ecf20Sopenharmony_ci return mm_fault_error(regs, address, fault); 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci /* 5348c2ecf20Sopenharmony_ci * Major/minor page fault accounting. 5358c2ecf20Sopenharmony_ci */ 5368c2ecf20Sopenharmony_ci if (major) 5378c2ecf20Sopenharmony_ci cmo_account_page_fault(); 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci return 0; 5408c2ecf20Sopenharmony_ci} 5418c2ecf20Sopenharmony_ciNOKPROBE_SYMBOL(__do_page_fault); 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ciint do_page_fault(struct pt_regs *regs, unsigned long address, 5448c2ecf20Sopenharmony_ci unsigned long error_code) 5458c2ecf20Sopenharmony_ci{ 5468c2ecf20Sopenharmony_ci enum ctx_state prev_state = exception_enter(); 5478c2ecf20Sopenharmony_ci int rc = __do_page_fault(regs, address, error_code); 5488c2ecf20Sopenharmony_ci exception_exit(prev_state); 5498c2ecf20Sopenharmony_ci return rc; 5508c2ecf20Sopenharmony_ci} 5518c2ecf20Sopenharmony_ciNOKPROBE_SYMBOL(do_page_fault); 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci/* 5548c2ecf20Sopenharmony_ci * bad_page_fault is called when we have a bad access from the kernel. 5558c2ecf20Sopenharmony_ci * It is called from the DSI and ISI handlers in head.S and from some 5568c2ecf20Sopenharmony_ci * of the procedures in traps.c. 5578c2ecf20Sopenharmony_ci */ 5588c2ecf20Sopenharmony_civoid bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) 5598c2ecf20Sopenharmony_ci{ 5608c2ecf20Sopenharmony_ci const struct exception_table_entry *entry; 5618c2ecf20Sopenharmony_ci int is_write = page_fault_is_write(regs->dsisr); 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci /* Are we prepared to handle this fault? */ 5648c2ecf20Sopenharmony_ci if ((entry = search_exception_tables(regs->nip)) != NULL) { 5658c2ecf20Sopenharmony_ci regs->nip = extable_fixup(entry); 5668c2ecf20Sopenharmony_ci return; 5678c2ecf20Sopenharmony_ci } 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci /* kernel has accessed a bad area */ 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_ci switch (TRAP(regs)) { 5728c2ecf20Sopenharmony_ci case 0x300: 5738c2ecf20Sopenharmony_ci case 0x380: 5748c2ecf20Sopenharmony_ci case 0xe00: 5758c2ecf20Sopenharmony_ci pr_alert("BUG: %s on %s at 0x%08lx\n", 5768c2ecf20Sopenharmony_ci regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : 5778c2ecf20Sopenharmony_ci "Unable to handle kernel data access", 5788c2ecf20Sopenharmony_ci is_write ? "write" : "read", regs->dar); 5798c2ecf20Sopenharmony_ci break; 5808c2ecf20Sopenharmony_ci case 0x400: 5818c2ecf20Sopenharmony_ci case 0x480: 5828c2ecf20Sopenharmony_ci pr_alert("BUG: Unable to handle kernel instruction fetch%s", 5838c2ecf20Sopenharmony_ci regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); 5848c2ecf20Sopenharmony_ci break; 5858c2ecf20Sopenharmony_ci case 0x600: 5868c2ecf20Sopenharmony_ci pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", 5878c2ecf20Sopenharmony_ci regs->dar); 5888c2ecf20Sopenharmony_ci break; 5898c2ecf20Sopenharmony_ci default: 5908c2ecf20Sopenharmony_ci pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n", 5918c2ecf20Sopenharmony_ci regs->dar); 5928c2ecf20Sopenharmony_ci break; 5938c2ecf20Sopenharmony_ci } 5948c2ecf20Sopenharmony_ci printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", 5958c2ecf20Sopenharmony_ci regs->nip); 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci if (task_stack_end_corrupted(current)) 5988c2ecf20Sopenharmony_ci printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci die("Kernel access of bad area", regs, sig); 6018c2ecf20Sopenharmony_ci} 602