1// SPDX-License-Identifier: GPL-2.0 2// Copyright (C) 2005-2017 Andes Technology Corporation 3 4#include <linux/extable.h> 5#include <linux/module.h> 6#include <linux/signal.h> 7#include <linux/ptrace.h> 8#include <linux/mm.h> 9#include <linux/init.h> 10#include <linux/hardirq.h> 11#include <linux/uaccess.h> 12#include <linux/perf_event.h> 13 14#include <asm/tlbflush.h> 15 16extern void die(const char *str, struct pt_regs *regs, long err); 17 18/* 19 * This is useful to dump out the page tables associated with 20 * 'addr' in mm 'mm'. 21 */ 22void show_pte(struct mm_struct *mm, unsigned long addr) 23{ 24 pgd_t *pgd; 25 if (!mm) 26 mm = &init_mm; 27 28 pr_alert("pgd = %p\n", mm->pgd); 29 pgd = pgd_offset(mm, addr); 30 pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); 31 32 do { 33 p4d_t *p4d; 34 pud_t *pud; 35 pmd_t *pmd; 36 37 if (pgd_none(*pgd)) 38 break; 39 40 if (pgd_bad(*pgd)) { 41 pr_alert("(bad)"); 42 break; 43 } 44 45 p4d = p4d_offset(pgd, addr); 46 pud = pud_offset(p4d, addr); 47 pmd = pmd_offset(pud, addr); 48#if PTRS_PER_PMD != 1 49 pr_alert(", *pmd=%08lx", pmd_val(*pmd)); 50#endif 51 52 if (pmd_none(*pmd)) 53 break; 54 55 if (pmd_bad(*pmd)) { 56 pr_alert("(bad)"); 57 break; 58 } 59 60 if (IS_ENABLED(CONFIG_HIGHMEM)) 61 { 62 pte_t *pte; 63 /* We must not map this if we have highmem enabled */ 64 pte = pte_offset_map(pmd, addr); 65 pr_alert(", *pte=%08lx", pte_val(*pte)); 66 pte_unmap(pte); 67 } 68 } while (0); 69 70 pr_alert("\n"); 71} 72 73void do_page_fault(unsigned long entry, unsigned long addr, 74 unsigned int error_code, struct pt_regs *regs) 75{ 76 struct task_struct *tsk; 77 struct mm_struct *mm; 78 struct vm_area_struct *vma; 79 int si_code; 80 vm_fault_t fault; 81 unsigned int mask = VM_ACCESS_FLAGS; 82 unsigned int flags = FAULT_FLAG_DEFAULT; 83 84 error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); 85 tsk = current; 86 mm = tsk->mm; 87 si_code = SEGV_MAPERR; 88 /* 89 * We fault-in kernel-space virtual memory on-demand. The 90 * 'reference' page table is init_mm.pgd. 91 * 92 * NOTE! We MUST NOT take any locks for this case. We may 93 * be in an interrupt or a critical region, and should 94 * only copy the information from the master page table, 95 * nothing more. 96 */ 97 if (addr >= TASK_SIZE) { 98 if (user_mode(regs)) 99 goto bad_area_nosemaphore; 100 101 if (addr >= TASK_SIZE && addr < VMALLOC_END 102 && (entry == ENTRY_PTE_NOT_PRESENT)) 103 goto vmalloc_fault; 104 else 105 goto no_context; 106 } 107 108 /* Send a signal to the task for handling the unalignment access. */ 109 if (entry == ENTRY_GENERAL_EXCPETION 110 && error_code == ETYPE_ALIGNMENT_CHECK) { 111 if (user_mode(regs)) 112 goto bad_area_nosemaphore; 113 else 114 goto no_context; 115 } 116 117 /* 118 * If we're in an interrupt or have no user 119 * context, we must not take the fault.. 120 */ 121 if (unlikely(faulthandler_disabled() || !mm)) 122 goto no_context; 123 124 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 125 126 /* 127 * As per x86, we may deadlock here. However, since the kernel only 128 * validly references user space from well defined areas of the code, 129 * we can bug out early if this is from code which shouldn't. 130 */ 131 if (unlikely(!mmap_read_trylock(mm))) { 132 if (!user_mode(regs) && 133 !search_exception_tables(instruction_pointer(regs))) 134 goto no_context; 135retry: 136 mmap_read_lock(mm); 137 } else { 138 /* 139 * The above down_read_trylock() might have succeeded in which 140 * case, we'll have missed the might_sleep() from down_read(). 141 */ 142 might_sleep(); 143 if (IS_ENABLED(CONFIG_DEBUG_VM)) { 144 if (!user_mode(regs) && 145 !search_exception_tables(instruction_pointer(regs))) 146 goto no_context; 147 } 148 } 149 150 vma = find_vma(mm, addr); 151 152 if (unlikely(!vma)) 153 goto bad_area; 154 155 if (vma->vm_start <= addr) 156 goto good_area; 157 158 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) 159 goto bad_area; 160 161 if (unlikely(expand_stack(vma, addr))) 162 goto bad_area; 163 164 /* 165 * Ok, we have a good vm_area for this memory access, so 166 * we can handle it.. 167 */ 168 169good_area: 170 si_code = SEGV_ACCERR; 171 172 /* first do some preliminary protection checks */ 173 if (entry == ENTRY_PTE_NOT_PRESENT) { 174 if (error_code & ITYPE_mskINST) 175 mask = VM_EXEC; 176 else { 177 mask = VM_READ | VM_WRITE; 178 } 179 } else if (entry == ENTRY_TLB_MISC) { 180 switch (error_code & ITYPE_mskETYPE) { 181 case RD_PROT: 182 mask = VM_READ; 183 break; 184 case WRT_PROT: 185 mask = VM_WRITE; 186 flags |= FAULT_FLAG_WRITE; 187 break; 188 case NOEXEC: 189 mask = VM_EXEC; 190 break; 191 case PAGE_MODIFY: 192 mask = VM_WRITE; 193 flags |= FAULT_FLAG_WRITE; 194 break; 195 case ACC_BIT: 196 BUG(); 197 default: 198 break; 199 } 200 201 } 202 if (!(vma->vm_flags & mask)) 203 goto bad_area; 204 205 /* 206 * If for any reason at all we couldn't handle the fault, 207 * make sure we exit gracefully rather than endlessly redo 208 * the fault. 209 */ 210 211 fault = handle_mm_fault(vma, addr, flags, regs); 212 213 /* 214 * If we need to retry but a fatal signal is pending, handle the 215 * signal first. We do not need to release the mmap_lock because it 216 * would already be released in __lock_page_or_retry in mm/filemap.c. 217 */ 218 if (fault_signal_pending(fault, regs)) { 219 if (!user_mode(regs)) 220 goto no_context; 221 return; 222 } 223 224 if (unlikely(fault & VM_FAULT_ERROR)) { 225 if (fault & VM_FAULT_OOM) 226 goto out_of_memory; 227 else if (fault & VM_FAULT_SIGBUS) 228 goto do_sigbus; 229 else 230 goto bad_area; 231 } 232 233 if (flags & FAULT_FLAG_ALLOW_RETRY) { 234 if (fault & VM_FAULT_RETRY) { 235 flags |= FAULT_FLAG_TRIED; 236 237 /* No need to mmap_read_unlock(mm) as we would 238 * have already released it in __lock_page_or_retry 239 * in mm/filemap.c. 240 */ 241 goto retry; 242 } 243 } 244 245 mmap_read_unlock(mm); 246 return; 247 248 /* 249 * Something tried to access memory that isn't in our memory map.. 250 * Fix it, but check if it's kernel or user first.. 251 */ 252bad_area: 253 mmap_read_unlock(mm); 254 255bad_area_nosemaphore: 256 257 /* User mode accesses just cause a SIGSEGV */ 258 259 if (user_mode(regs)) { 260 tsk->thread.address = addr; 261 tsk->thread.error_code = error_code; 262 tsk->thread.trap_no = entry; 263 force_sig_fault(SIGSEGV, si_code, (void __user *)addr); 264 return; 265 } 266 267no_context: 268 269 /* Are we prepared to handle this kernel fault? 270 * 271 * (The kernel has valid exception-points in the source 272 * when it acesses user-memory. When it fails in one 273 * of those points, we find it in a table and do a jump 274 * to some fixup code that loads an appropriate error 275 * code) 276 */ 277 278 { 279 const struct exception_table_entry *entry; 280 281 if ((entry = 282 search_exception_tables(instruction_pointer(regs))) != 283 NULL) { 284 /* Adjust the instruction pointer in the stackframe */ 285 instruction_pointer(regs) = entry->fixup; 286 return; 287 } 288 } 289 290 /* 291 * Oops. The kernel tried to access some bad page. We'll have to 292 * terminate things with extreme prejudice. 293 */ 294 295 bust_spinlocks(1); 296 pr_alert("Unable to handle kernel %s at virtual address %08lx\n", 297 (addr < PAGE_SIZE) ? "NULL pointer dereference" : 298 "paging request", addr); 299 300 show_pte(mm, addr); 301 die("Oops", regs, error_code); 302 bust_spinlocks(0); 303 do_exit(SIGKILL); 304 305 return; 306 307 /* 308 * We ran out of memory, or some other thing happened to us that made 309 * us unable to handle the page fault gracefully. 310 */ 311 312out_of_memory: 313 mmap_read_unlock(mm); 314 if (!user_mode(regs)) 315 goto no_context; 316 pagefault_out_of_memory(); 317 return; 318 319do_sigbus: 320 mmap_read_unlock(mm); 321 322 /* Kernel mode? Handle exceptions or die */ 323 if (!user_mode(regs)) 324 goto no_context; 325 326 /* 327 * Send a sigbus 328 */ 329 tsk->thread.address = addr; 330 tsk->thread.error_code = error_code; 331 tsk->thread.trap_no = entry; 332 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr); 333 334 return; 335 336vmalloc_fault: 337 { 338 /* 339 * Synchronize this task's top level page-table 340 * with the 'reference' page table. 341 * 342 * Use current_pgd instead of tsk->active_mm->pgd 343 * since the latter might be unavailable if this 344 * code is executed in a misfortunately run irq 345 * (like inside schedule() between switch_mm and 346 * switch_to...). 347 */ 348 349 unsigned int index = pgd_index(addr); 350 pgd_t *pgd, *pgd_k; 351 p4d_t *p4d, *p4d_k; 352 pud_t *pud, *pud_k; 353 pmd_t *pmd, *pmd_k; 354 pte_t *pte_k; 355 356 pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index; 357 pgd_k = init_mm.pgd + index; 358 359 if (!pgd_present(*pgd_k)) 360 goto no_context; 361 362 p4d = p4d_offset(pgd, addr); 363 p4d_k = p4d_offset(pgd_k, addr); 364 if (!p4d_present(*p4d_k)) 365 goto no_context; 366 367 pud = pud_offset(p4d, addr); 368 pud_k = pud_offset(p4d_k, addr); 369 if (!pud_present(*pud_k)) 370 goto no_context; 371 372 pmd = pmd_offset(pud, addr); 373 pmd_k = pmd_offset(pud_k, addr); 374 if (!pmd_present(*pmd_k)) 375 goto no_context; 376 377 if (!pmd_present(*pmd)) 378 set_pmd(pmd, *pmd_k); 379 else 380 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); 381 382 /* 383 * Since the vmalloc area is global, we don't 384 * need to copy individual PTE's, it is enough to 385 * copy the pgd pointer into the pte page of the 386 * root task. If that is there, we'll find our pte if 387 * it exists. 388 */ 389 390 /* Make sure the actual PTE exists as well to 391 * catch kernel vmalloc-area accesses to non-mapped 392 * addres. If we don't do this, this will just 393 * silently loop forever. 394 */ 395 396 pte_k = pte_offset_kernel(pmd_k, addr); 397 if (!pte_present(*pte_k)) 398 goto no_context; 399 400 return; 401 } 402} 403