162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. 462306a36Sopenharmony_ci * Lennox Wu <lennox.wu@sunplusct.com> 562306a36Sopenharmony_ci * Chen Liqin <liqin.chen@sunplusct.com> 662306a36Sopenharmony_ci * Copyright (C) 2012 Regents of the University of California 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/mm.h> 1162306a36Sopenharmony_ci#include <linux/kernel.h> 1262306a36Sopenharmony_ci#include <linux/interrupt.h> 1362306a36Sopenharmony_ci#include <linux/perf_event.h> 1462306a36Sopenharmony_ci#include <linux/signal.h> 1562306a36Sopenharmony_ci#include <linux/uaccess.h> 1662306a36Sopenharmony_ci#include <linux/kprobes.h> 1762306a36Sopenharmony_ci#include <linux/kfence.h> 1862306a36Sopenharmony_ci#include <linux/entry-common.h> 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <asm/ptrace.h> 2162306a36Sopenharmony_ci#include <asm/tlbflush.h> 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#include "../kernel/head.h" 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_cistatic void die_kernel_fault(const char *msg, unsigned long addr, 2662306a36Sopenharmony_ci struct pt_regs *regs) 2762306a36Sopenharmony_ci{ 2862306a36Sopenharmony_ci bust_spinlocks(1); 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg, 3162306a36Sopenharmony_ci addr); 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci bust_spinlocks(0); 3462306a36Sopenharmony_ci die(regs, "Oops"); 3562306a36Sopenharmony_ci make_task_dead(SIGKILL); 3662306a36Sopenharmony_ci} 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_cistatic inline void no_context(struct pt_regs *regs, unsigned long addr) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci const char *msg; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci /* Are we prepared to handle this kernel fault? */ 4362306a36Sopenharmony_ci if (fixup_exception(regs)) 4462306a36Sopenharmony_ci return; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci /* 4762306a36Sopenharmony_ci * Oops. The kernel tried to access some bad page. We'll have to 4862306a36Sopenharmony_ci * terminate things with extreme prejudice. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_ci if (addr < PAGE_SIZE) 5162306a36Sopenharmony_ci msg = "NULL pointer dereference"; 5262306a36Sopenharmony_ci else { 5362306a36Sopenharmony_ci if (kfence_handle_page_fault(addr, regs->cause == EXC_STORE_PAGE_FAULT, regs)) 5462306a36Sopenharmony_ci return; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci msg = "paging request"; 5762306a36Sopenharmony_ci } 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci die_kernel_fault(msg, addr, regs); 6062306a36Sopenharmony_ci} 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) 6362306a36Sopenharmony_ci{ 6462306a36Sopenharmony_ci if (fault & VM_FAULT_OOM) { 6562306a36Sopenharmony_ci /* 6662306a36Sopenharmony_ci * We ran out of memory, call the OOM killer, and return the userspace 6762306a36Sopenharmony_ci * (which will retry the fault, or kill us if we got oom-killed). 6862306a36Sopenharmony_ci */ 6962306a36Sopenharmony_ci if (!user_mode(regs)) { 7062306a36Sopenharmony_ci no_context(regs, addr); 7162306a36Sopenharmony_ci return; 7262306a36Sopenharmony_ci } 7362306a36Sopenharmony_ci pagefault_out_of_memory(); 7462306a36Sopenharmony_ci return; 7562306a36Sopenharmony_ci } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { 7662306a36Sopenharmony_ci /* Kernel mode? Handle exceptions or die */ 7762306a36Sopenharmony_ci if (!user_mode(regs)) { 7862306a36Sopenharmony_ci no_context(regs, addr); 7962306a36Sopenharmony_ci return; 8062306a36Sopenharmony_ci } 8162306a36Sopenharmony_ci do_trap(regs, SIGBUS, BUS_ADRERR, addr); 8262306a36Sopenharmony_ci return; 8362306a36Sopenharmony_ci } 8462306a36Sopenharmony_ci BUG(); 8562306a36Sopenharmony_ci} 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cistatic inline void 8862306a36Sopenharmony_cibad_area_nosemaphore(struct pt_regs *regs, int code, unsigned long addr) 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci /* 9162306a36Sopenharmony_ci * Something tried to access memory that isn't in our memory map. 9262306a36Sopenharmony_ci * Fix it, but check if it's kernel or user first. 9362306a36Sopenharmony_ci */ 9462306a36Sopenharmony_ci /* User mode accesses just cause a SIGSEGV */ 9562306a36Sopenharmony_ci if (user_mode(regs)) { 9662306a36Sopenharmony_ci do_trap(regs, SIGSEGV, code, addr); 9762306a36Sopenharmony_ci return; 9862306a36Sopenharmony_ci } 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci no_context(regs, addr); 10162306a36Sopenharmony_ci} 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_cistatic inline void 10462306a36Sopenharmony_cibad_area(struct pt_regs *regs, struct mm_struct *mm, int code, 10562306a36Sopenharmony_ci unsigned long addr) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci mmap_read_unlock(mm); 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci bad_area_nosemaphore(regs, code, addr); 11062306a36Sopenharmony_ci} 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_cistatic inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci pgd_t *pgd, *pgd_k; 11562306a36Sopenharmony_ci pud_t *pud_k; 11662306a36Sopenharmony_ci p4d_t *p4d_k; 11762306a36Sopenharmony_ci pmd_t *pmd_k; 11862306a36Sopenharmony_ci pte_t *pte_k; 11962306a36Sopenharmony_ci int index; 12062306a36Sopenharmony_ci unsigned long pfn; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci /* User mode accesses just cause a SIGSEGV */ 12362306a36Sopenharmony_ci if (user_mode(regs)) 12462306a36Sopenharmony_ci return do_trap(regs, SIGSEGV, code, addr); 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci /* 12762306a36Sopenharmony_ci * Synchronize this task's top level page-table 12862306a36Sopenharmony_ci * with the 'reference' page table. 12962306a36Sopenharmony_ci * 13062306a36Sopenharmony_ci * Do _not_ use "tsk->active_mm->pgd" here. 13162306a36Sopenharmony_ci * We might be inside an interrupt in the middle 13262306a36Sopenharmony_ci * of a task switch. 13362306a36Sopenharmony_ci */ 13462306a36Sopenharmony_ci index = pgd_index(addr); 13562306a36Sopenharmony_ci pfn = csr_read(CSR_SATP) & SATP_PPN; 13662306a36Sopenharmony_ci pgd = (pgd_t *)pfn_to_virt(pfn) + index; 13762306a36Sopenharmony_ci pgd_k = init_mm.pgd + index; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci if (!pgd_present(*pgd_k)) { 14062306a36Sopenharmony_ci no_context(regs, addr); 14162306a36Sopenharmony_ci return; 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci set_pgd(pgd, *pgd_k); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci p4d_k = p4d_offset(pgd_k, addr); 14662306a36Sopenharmony_ci if (!p4d_present(*p4d_k)) { 14762306a36Sopenharmony_ci no_context(regs, addr); 14862306a36Sopenharmony_ci return; 14962306a36Sopenharmony_ci } 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci pud_k = pud_offset(p4d_k, addr); 15262306a36Sopenharmony_ci if (!pud_present(*pud_k)) { 15362306a36Sopenharmony_ci no_context(regs, addr); 15462306a36Sopenharmony_ci return; 15562306a36Sopenharmony_ci } 15662306a36Sopenharmony_ci if (pud_leaf(*pud_k)) 15762306a36Sopenharmony_ci goto flush_tlb; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci /* 16062306a36Sopenharmony_ci * Since the vmalloc area is global, it is unnecessary 16162306a36Sopenharmony_ci * to copy individual PTEs 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ci pmd_k = pmd_offset(pud_k, addr); 16462306a36Sopenharmony_ci if (!pmd_present(*pmd_k)) { 16562306a36Sopenharmony_ci no_context(regs, addr); 16662306a36Sopenharmony_ci return; 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci if (pmd_leaf(*pmd_k)) 16962306a36Sopenharmony_ci goto flush_tlb; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci /* 17262306a36Sopenharmony_ci * Make sure the actual PTE exists as well to 17362306a36Sopenharmony_ci * catch kernel vmalloc-area accesses to non-mapped 17462306a36Sopenharmony_ci * addresses. If we don't do this, this will just 17562306a36Sopenharmony_ci * silently loop forever. 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_ci pte_k = pte_offset_kernel(pmd_k, addr); 17862306a36Sopenharmony_ci if (!pte_present(*pte_k)) { 17962306a36Sopenharmony_ci no_context(regs, addr); 18062306a36Sopenharmony_ci return; 18162306a36Sopenharmony_ci } 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci /* 18462306a36Sopenharmony_ci * The kernel assumes that TLBs don't cache invalid 18562306a36Sopenharmony_ci * entries, but in RISC-V, SFENCE.VMA specifies an 18662306a36Sopenharmony_ci * ordering constraint, not a cache flush; it is 18762306a36Sopenharmony_ci * necessary even after writing invalid entries. 18862306a36Sopenharmony_ci */ 18962306a36Sopenharmony_ciflush_tlb: 19062306a36Sopenharmony_ci local_flush_tlb_page(addr); 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_cistatic inline bool access_error(unsigned long cause, struct vm_area_struct *vma) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci switch (cause) { 19662306a36Sopenharmony_ci case EXC_INST_PAGE_FAULT: 19762306a36Sopenharmony_ci if (!(vma->vm_flags & VM_EXEC)) { 19862306a36Sopenharmony_ci return true; 19962306a36Sopenharmony_ci } 20062306a36Sopenharmony_ci break; 20162306a36Sopenharmony_ci case EXC_LOAD_PAGE_FAULT: 20262306a36Sopenharmony_ci /* Write implies read */ 20362306a36Sopenharmony_ci if (!(vma->vm_flags & (VM_READ | VM_WRITE))) { 20462306a36Sopenharmony_ci return true; 20562306a36Sopenharmony_ci } 20662306a36Sopenharmony_ci break; 20762306a36Sopenharmony_ci case EXC_STORE_PAGE_FAULT: 20862306a36Sopenharmony_ci if (!(vma->vm_flags & VM_WRITE)) { 20962306a36Sopenharmony_ci return true; 21062306a36Sopenharmony_ci } 21162306a36Sopenharmony_ci break; 21262306a36Sopenharmony_ci default: 21362306a36Sopenharmony_ci panic("%s: unhandled cause %lu", __func__, cause); 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci return false; 21662306a36Sopenharmony_ci} 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci/* 21962306a36Sopenharmony_ci * This routine handles page faults. It determines the address and the 22062306a36Sopenharmony_ci * problem, and then passes it off to one of the appropriate routines. 22162306a36Sopenharmony_ci */ 22262306a36Sopenharmony_civoid handle_page_fault(struct pt_regs *regs) 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_ci struct task_struct *tsk; 22562306a36Sopenharmony_ci struct vm_area_struct *vma; 22662306a36Sopenharmony_ci struct mm_struct *mm; 22762306a36Sopenharmony_ci unsigned long addr, cause; 22862306a36Sopenharmony_ci unsigned int flags = FAULT_FLAG_DEFAULT; 22962306a36Sopenharmony_ci int code = SEGV_MAPERR; 23062306a36Sopenharmony_ci vm_fault_t fault; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci cause = regs->cause; 23362306a36Sopenharmony_ci addr = regs->badaddr; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci tsk = current; 23662306a36Sopenharmony_ci mm = tsk->mm; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci if (kprobe_page_fault(regs, cause)) 23962306a36Sopenharmony_ci return; 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci /* 24262306a36Sopenharmony_ci * Fault-in kernel-space virtual memory on-demand. 24362306a36Sopenharmony_ci * The 'reference' page table is init_mm.pgd. 24462306a36Sopenharmony_ci * 24562306a36Sopenharmony_ci * NOTE! We MUST NOT take any locks for this case. We may 24662306a36Sopenharmony_ci * be in an interrupt or a critical region, and should 24762306a36Sopenharmony_ci * only copy the information from the master page table, 24862306a36Sopenharmony_ci * nothing more. 24962306a36Sopenharmony_ci */ 25062306a36Sopenharmony_ci if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) && 25162306a36Sopenharmony_ci unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) { 25262306a36Sopenharmony_ci vmalloc_fault(regs, code, addr); 25362306a36Sopenharmony_ci return; 25462306a36Sopenharmony_ci } 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci /* Enable interrupts if they were enabled in the parent context. */ 25762306a36Sopenharmony_ci if (!regs_irqs_disabled(regs)) 25862306a36Sopenharmony_ci local_irq_enable(); 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci /* 26162306a36Sopenharmony_ci * If we're in an interrupt, have no user context, or are running 26262306a36Sopenharmony_ci * in an atomic region, then we must not take the fault. 26362306a36Sopenharmony_ci */ 26462306a36Sopenharmony_ci if (unlikely(faulthandler_disabled() || !mm)) { 26562306a36Sopenharmony_ci tsk->thread.bad_cause = cause; 26662306a36Sopenharmony_ci no_context(regs, addr); 26762306a36Sopenharmony_ci return; 26862306a36Sopenharmony_ci } 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci if (user_mode(regs)) 27162306a36Sopenharmony_ci flags |= FAULT_FLAG_USER; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) { 27462306a36Sopenharmony_ci if (fixup_exception(regs)) 27562306a36Sopenharmony_ci return; 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci die_kernel_fault("access to user memory without uaccess routines", addr, regs); 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci if (cause == EXC_STORE_PAGE_FAULT) 28362306a36Sopenharmony_ci flags |= FAULT_FLAG_WRITE; 28462306a36Sopenharmony_ci else if (cause == EXC_INST_PAGE_FAULT) 28562306a36Sopenharmony_ci flags |= FAULT_FLAG_INSTRUCTION; 28662306a36Sopenharmony_ci if (!(flags & FAULT_FLAG_USER)) 28762306a36Sopenharmony_ci goto lock_mmap; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci vma = lock_vma_under_rcu(mm, addr); 29062306a36Sopenharmony_ci if (!vma) 29162306a36Sopenharmony_ci goto lock_mmap; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci if (unlikely(access_error(cause, vma))) { 29462306a36Sopenharmony_ci vma_end_read(vma); 29562306a36Sopenharmony_ci goto lock_mmap; 29662306a36Sopenharmony_ci } 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); 29962306a36Sopenharmony_ci if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) 30062306a36Sopenharmony_ci vma_end_read(vma); 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci if (!(fault & VM_FAULT_RETRY)) { 30362306a36Sopenharmony_ci count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 30462306a36Sopenharmony_ci goto done; 30562306a36Sopenharmony_ci } 30662306a36Sopenharmony_ci count_vm_vma_lock_event(VMA_LOCK_RETRY); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (fault_signal_pending(fault, regs)) { 30962306a36Sopenharmony_ci if (!user_mode(regs)) 31062306a36Sopenharmony_ci no_context(regs, addr); 31162306a36Sopenharmony_ci return; 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_cilock_mmap: 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ciretry: 31662306a36Sopenharmony_ci vma = lock_mm_and_find_vma(mm, addr, regs); 31762306a36Sopenharmony_ci if (unlikely(!vma)) { 31862306a36Sopenharmony_ci tsk->thread.bad_cause = cause; 31962306a36Sopenharmony_ci bad_area_nosemaphore(regs, code, addr); 32062306a36Sopenharmony_ci return; 32162306a36Sopenharmony_ci } 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci /* 32462306a36Sopenharmony_ci * Ok, we have a good vm_area for this memory access, so 32562306a36Sopenharmony_ci * we can handle it. 32662306a36Sopenharmony_ci */ 32762306a36Sopenharmony_ci code = SEGV_ACCERR; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci if (unlikely(access_error(cause, vma))) { 33062306a36Sopenharmony_ci tsk->thread.bad_cause = cause; 33162306a36Sopenharmony_ci bad_area(regs, mm, code, addr); 33262306a36Sopenharmony_ci return; 33362306a36Sopenharmony_ci } 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci /* 33662306a36Sopenharmony_ci * If for any reason at all we could not handle the fault, 33762306a36Sopenharmony_ci * make sure we exit gracefully rather than endlessly redo 33862306a36Sopenharmony_ci * the fault. 33962306a36Sopenharmony_ci */ 34062306a36Sopenharmony_ci fault = handle_mm_fault(vma, addr, flags, regs); 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci /* 34362306a36Sopenharmony_ci * If we need to retry but a fatal signal is pending, handle the 34462306a36Sopenharmony_ci * signal first. We do not need to release the mmap_lock because it 34562306a36Sopenharmony_ci * would already be released in __lock_page_or_retry in mm/filemap.c. 34662306a36Sopenharmony_ci */ 34762306a36Sopenharmony_ci if (fault_signal_pending(fault, regs)) { 34862306a36Sopenharmony_ci if (!user_mode(regs)) 34962306a36Sopenharmony_ci no_context(regs, addr); 35062306a36Sopenharmony_ci return; 35162306a36Sopenharmony_ci } 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci /* The fault is fully completed (including releasing mmap lock) */ 35462306a36Sopenharmony_ci if (fault & VM_FAULT_COMPLETED) 35562306a36Sopenharmony_ci return; 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci if (unlikely(fault & VM_FAULT_RETRY)) { 35862306a36Sopenharmony_ci flags |= FAULT_FLAG_TRIED; 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci /* 36162306a36Sopenharmony_ci * No need to mmap_read_unlock(mm) as we would 36262306a36Sopenharmony_ci * have already released it in __lock_page_or_retry 36362306a36Sopenharmony_ci * in mm/filemap.c. 36462306a36Sopenharmony_ci */ 36562306a36Sopenharmony_ci goto retry; 36662306a36Sopenharmony_ci } 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci mmap_read_unlock(mm); 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_cidone: 37162306a36Sopenharmony_ci if (unlikely(fault & VM_FAULT_ERROR)) { 37262306a36Sopenharmony_ci tsk->thread.bad_cause = cause; 37362306a36Sopenharmony_ci mm_fault_error(regs, addr, fault); 37462306a36Sopenharmony_ci return; 37562306a36Sopenharmony_ci } 37662306a36Sopenharmony_ci return; 37762306a36Sopenharmony_ci} 378