18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  S390 version
48c2ecf20Sopenharmony_ci *    Copyright IBM Corp. 1999
58c2ecf20Sopenharmony_ci *    Author(s): Hartmut Penner (hp@de.ibm.com)
68c2ecf20Sopenharmony_ci *               Ulrich Weigand (uweigand@de.ibm.com)
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci *  Derived from "arch/i386/mm/fault.c"
98c2ecf20Sopenharmony_ci *    Copyright (C) 1995  Linus Torvalds
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/kernel_stat.h>
138c2ecf20Sopenharmony_ci#include <linux/perf_event.h>
148c2ecf20Sopenharmony_ci#include <linux/signal.h>
158c2ecf20Sopenharmony_ci#include <linux/sched.h>
168c2ecf20Sopenharmony_ci#include <linux/sched/debug.h>
178c2ecf20Sopenharmony_ci#include <linux/kernel.h>
188c2ecf20Sopenharmony_ci#include <linux/errno.h>
198c2ecf20Sopenharmony_ci#include <linux/string.h>
208c2ecf20Sopenharmony_ci#include <linux/types.h>
218c2ecf20Sopenharmony_ci#include <linux/ptrace.h>
228c2ecf20Sopenharmony_ci#include <linux/mman.h>
238c2ecf20Sopenharmony_ci#include <linux/mm.h>
248c2ecf20Sopenharmony_ci#include <linux/compat.h>
258c2ecf20Sopenharmony_ci#include <linux/smp.h>
268c2ecf20Sopenharmony_ci#include <linux/kdebug.h>
278c2ecf20Sopenharmony_ci#include <linux/init.h>
288c2ecf20Sopenharmony_ci#include <linux/console.h>
298c2ecf20Sopenharmony_ci#include <linux/extable.h>
308c2ecf20Sopenharmony_ci#include <linux/hardirq.h>
318c2ecf20Sopenharmony_ci#include <linux/kprobes.h>
328c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
338c2ecf20Sopenharmony_ci#include <linux/hugetlb.h>
348c2ecf20Sopenharmony_ci#include <asm/asm-offsets.h>
358c2ecf20Sopenharmony_ci#include <asm/diag.h>
368c2ecf20Sopenharmony_ci#include <asm/gmap.h>
378c2ecf20Sopenharmony_ci#include <asm/irq.h>
388c2ecf20Sopenharmony_ci#include <asm/mmu_context.h>
398c2ecf20Sopenharmony_ci#include <asm/facility.h>
408c2ecf20Sopenharmony_ci#include <asm/uv.h>
418c2ecf20Sopenharmony_ci#include "../kernel/entry.h"
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#define __FAIL_ADDR_MASK -4096L
448c2ecf20Sopenharmony_ci#define __SUBCODE_MASK 0x0600
458c2ecf20Sopenharmony_ci#define __PF_RES_FIELD 0x8000000000000000ULL
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci#define VM_FAULT_BADCONTEXT	((__force vm_fault_t) 0x010000)
488c2ecf20Sopenharmony_ci#define VM_FAULT_BADMAP		((__force vm_fault_t) 0x020000)
498c2ecf20Sopenharmony_ci#define VM_FAULT_BADACCESS	((__force vm_fault_t) 0x040000)
508c2ecf20Sopenharmony_ci#define VM_FAULT_SIGNAL		((__force vm_fault_t) 0x080000)
518c2ecf20Sopenharmony_ci#define VM_FAULT_PFAULT		((__force vm_fault_t) 0x100000)
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_cienum fault_type {
548c2ecf20Sopenharmony_ci	KERNEL_FAULT,
558c2ecf20Sopenharmony_ci	USER_FAULT,
568c2ecf20Sopenharmony_ci	VDSO_FAULT,
578c2ecf20Sopenharmony_ci	GMAP_FAULT,
588c2ecf20Sopenharmony_ci};
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_cistatic unsigned long store_indication __read_mostly;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_cistatic int __init fault_init(void)
638c2ecf20Sopenharmony_ci{
648c2ecf20Sopenharmony_ci	if (test_facility(75))
658c2ecf20Sopenharmony_ci		store_indication = 0xc00;
668c2ecf20Sopenharmony_ci	return 0;
678c2ecf20Sopenharmony_ci}
688c2ecf20Sopenharmony_ciearly_initcall(fault_init);
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci/*
718c2ecf20Sopenharmony_ci * Find out which address space caused the exception.
728c2ecf20Sopenharmony_ci */
738c2ecf20Sopenharmony_cistatic enum fault_type get_fault_type(struct pt_regs *regs)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	unsigned long trans_exc_code;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	trans_exc_code = regs->int_parm_long & 3;
788c2ecf20Sopenharmony_ci	if (likely(trans_exc_code == 0)) {
798c2ecf20Sopenharmony_ci		/* primary space exception */
808c2ecf20Sopenharmony_ci		if (IS_ENABLED(CONFIG_PGSTE) &&
818c2ecf20Sopenharmony_ci		    test_pt_regs_flag(regs, PIF_GUEST_FAULT))
828c2ecf20Sopenharmony_ci			return GMAP_FAULT;
838c2ecf20Sopenharmony_ci		if (current->thread.mm_segment == USER_DS)
848c2ecf20Sopenharmony_ci			return USER_FAULT;
858c2ecf20Sopenharmony_ci		return KERNEL_FAULT;
868c2ecf20Sopenharmony_ci	}
878c2ecf20Sopenharmony_ci	if (trans_exc_code == 2) {
888c2ecf20Sopenharmony_ci		/* secondary space exception */
898c2ecf20Sopenharmony_ci		if (current->thread.mm_segment & 1) {
908c2ecf20Sopenharmony_ci			if (current->thread.mm_segment == USER_DS_SACF)
918c2ecf20Sopenharmony_ci				return USER_FAULT;
928c2ecf20Sopenharmony_ci			return KERNEL_FAULT;
938c2ecf20Sopenharmony_ci		}
948c2ecf20Sopenharmony_ci		return VDSO_FAULT;
958c2ecf20Sopenharmony_ci	}
968c2ecf20Sopenharmony_ci	if (trans_exc_code == 1) {
978c2ecf20Sopenharmony_ci		/* access register mode, not used in the kernel */
988c2ecf20Sopenharmony_ci		return USER_FAULT;
998c2ecf20Sopenharmony_ci	}
1008c2ecf20Sopenharmony_ci	/* home space exception -> access via kernel ASCE */
1018c2ecf20Sopenharmony_ci	return KERNEL_FAULT;
1028c2ecf20Sopenharmony_ci}
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_cistatic int bad_address(void *p)
1058c2ecf20Sopenharmony_ci{
1068c2ecf20Sopenharmony_ci	unsigned long dummy;
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	return get_kernel_nofault(dummy, (unsigned long *)p);
1098c2ecf20Sopenharmony_ci}
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_cistatic void dump_pagetable(unsigned long asce, unsigned long address)
1128c2ecf20Sopenharmony_ci{
1138c2ecf20Sopenharmony_ci	unsigned long *table = __va(asce & _ASCE_ORIGIN);
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	pr_alert("AS:%016lx ", asce);
1168c2ecf20Sopenharmony_ci	switch (asce & _ASCE_TYPE_MASK) {
1178c2ecf20Sopenharmony_ci	case _ASCE_TYPE_REGION1:
1188c2ecf20Sopenharmony_ci		table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
1198c2ecf20Sopenharmony_ci		if (bad_address(table))
1208c2ecf20Sopenharmony_ci			goto bad;
1218c2ecf20Sopenharmony_ci		pr_cont("R1:%016lx ", *table);
1228c2ecf20Sopenharmony_ci		if (*table & _REGION_ENTRY_INVALID)
1238c2ecf20Sopenharmony_ci			goto out;
1248c2ecf20Sopenharmony_ci		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
1258c2ecf20Sopenharmony_ci		fallthrough;
1268c2ecf20Sopenharmony_ci	case _ASCE_TYPE_REGION2:
1278c2ecf20Sopenharmony_ci		table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
1288c2ecf20Sopenharmony_ci		if (bad_address(table))
1298c2ecf20Sopenharmony_ci			goto bad;
1308c2ecf20Sopenharmony_ci		pr_cont("R2:%016lx ", *table);
1318c2ecf20Sopenharmony_ci		if (*table & _REGION_ENTRY_INVALID)
1328c2ecf20Sopenharmony_ci			goto out;
1338c2ecf20Sopenharmony_ci		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
1348c2ecf20Sopenharmony_ci		fallthrough;
1358c2ecf20Sopenharmony_ci	case _ASCE_TYPE_REGION3:
1368c2ecf20Sopenharmony_ci		table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
1378c2ecf20Sopenharmony_ci		if (bad_address(table))
1388c2ecf20Sopenharmony_ci			goto bad;
1398c2ecf20Sopenharmony_ci		pr_cont("R3:%016lx ", *table);
1408c2ecf20Sopenharmony_ci		if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
1418c2ecf20Sopenharmony_ci			goto out;
1428c2ecf20Sopenharmony_ci		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
1438c2ecf20Sopenharmony_ci		fallthrough;
1448c2ecf20Sopenharmony_ci	case _ASCE_TYPE_SEGMENT:
1458c2ecf20Sopenharmony_ci		table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
1468c2ecf20Sopenharmony_ci		if (bad_address(table))
1478c2ecf20Sopenharmony_ci			goto bad;
1488c2ecf20Sopenharmony_ci		pr_cont("S:%016lx ", *table);
1498c2ecf20Sopenharmony_ci		if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
1508c2ecf20Sopenharmony_ci			goto out;
1518c2ecf20Sopenharmony_ci		table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
1528c2ecf20Sopenharmony_ci	}
1538c2ecf20Sopenharmony_ci	table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
1548c2ecf20Sopenharmony_ci	if (bad_address(table))
1558c2ecf20Sopenharmony_ci		goto bad;
1568c2ecf20Sopenharmony_ci	pr_cont("P:%016lx ", *table);
1578c2ecf20Sopenharmony_ciout:
1588c2ecf20Sopenharmony_ci	pr_cont("\n");
1598c2ecf20Sopenharmony_ci	return;
1608c2ecf20Sopenharmony_cibad:
1618c2ecf20Sopenharmony_ci	pr_cont("BAD\n");
1628c2ecf20Sopenharmony_ci}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_cistatic void dump_fault_info(struct pt_regs *regs)
1658c2ecf20Sopenharmony_ci{
1668c2ecf20Sopenharmony_ci	unsigned long asce;
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	pr_alert("Failing address: %016lx TEID: %016lx\n",
1698c2ecf20Sopenharmony_ci		 regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
1708c2ecf20Sopenharmony_ci	pr_alert("Fault in ");
1718c2ecf20Sopenharmony_ci	switch (regs->int_parm_long & 3) {
1728c2ecf20Sopenharmony_ci	case 3:
1738c2ecf20Sopenharmony_ci		pr_cont("home space ");
1748c2ecf20Sopenharmony_ci		break;
1758c2ecf20Sopenharmony_ci	case 2:
1768c2ecf20Sopenharmony_ci		pr_cont("secondary space ");
1778c2ecf20Sopenharmony_ci		break;
1788c2ecf20Sopenharmony_ci	case 1:
1798c2ecf20Sopenharmony_ci		pr_cont("access register ");
1808c2ecf20Sopenharmony_ci		break;
1818c2ecf20Sopenharmony_ci	case 0:
1828c2ecf20Sopenharmony_ci		pr_cont("primary space ");
1838c2ecf20Sopenharmony_ci		break;
1848c2ecf20Sopenharmony_ci	}
1858c2ecf20Sopenharmony_ci	pr_cont("mode while using ");
1868c2ecf20Sopenharmony_ci	switch (get_fault_type(regs)) {
1878c2ecf20Sopenharmony_ci	case USER_FAULT:
1888c2ecf20Sopenharmony_ci		asce = S390_lowcore.user_asce;
1898c2ecf20Sopenharmony_ci		pr_cont("user ");
1908c2ecf20Sopenharmony_ci		break;
1918c2ecf20Sopenharmony_ci	case VDSO_FAULT:
1928c2ecf20Sopenharmony_ci		asce = S390_lowcore.vdso_asce;
1938c2ecf20Sopenharmony_ci		pr_cont("vdso ");
1948c2ecf20Sopenharmony_ci		break;
1958c2ecf20Sopenharmony_ci	case GMAP_FAULT:
1968c2ecf20Sopenharmony_ci		asce = ((struct gmap *) S390_lowcore.gmap)->asce;
1978c2ecf20Sopenharmony_ci		pr_cont("gmap ");
1988c2ecf20Sopenharmony_ci		break;
1998c2ecf20Sopenharmony_ci	case KERNEL_FAULT:
2008c2ecf20Sopenharmony_ci		asce = S390_lowcore.kernel_asce;
2018c2ecf20Sopenharmony_ci		pr_cont("kernel ");
2028c2ecf20Sopenharmony_ci		break;
2038c2ecf20Sopenharmony_ci	default:
2048c2ecf20Sopenharmony_ci		unreachable();
2058c2ecf20Sopenharmony_ci	}
2068c2ecf20Sopenharmony_ci	pr_cont("ASCE.\n");
2078c2ecf20Sopenharmony_ci	dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
2088c2ecf20Sopenharmony_ci}
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ciint show_unhandled_signals = 1;
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_civoid report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
2138c2ecf20Sopenharmony_ci{
2148c2ecf20Sopenharmony_ci	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
2158c2ecf20Sopenharmony_ci		return;
2168c2ecf20Sopenharmony_ci	if (!unhandled_signal(current, signr))
2178c2ecf20Sopenharmony_ci		return;
2188c2ecf20Sopenharmony_ci	if (!printk_ratelimit())
2198c2ecf20Sopenharmony_ci		return;
2208c2ecf20Sopenharmony_ci	printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
2218c2ecf20Sopenharmony_ci	       regs->int_code & 0xffff, regs->int_code >> 17);
2228c2ecf20Sopenharmony_ci	print_vma_addr(KERN_CONT "in ", regs->psw.addr);
2238c2ecf20Sopenharmony_ci	printk(KERN_CONT "\n");
2248c2ecf20Sopenharmony_ci	if (is_mm_fault)
2258c2ecf20Sopenharmony_ci		dump_fault_info(regs);
2268c2ecf20Sopenharmony_ci	show_regs(regs);
2278c2ecf20Sopenharmony_ci}
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci/*
2308c2ecf20Sopenharmony_ci * Send SIGSEGV to task.  This is an external routine
2318c2ecf20Sopenharmony_ci * to keep the stack usage of do_page_fault small.
2328c2ecf20Sopenharmony_ci */
2338c2ecf20Sopenharmony_cistatic noinline void do_sigsegv(struct pt_regs *regs, int si_code)
2348c2ecf20Sopenharmony_ci{
2358c2ecf20Sopenharmony_ci	report_user_fault(regs, SIGSEGV, 1);
2368c2ecf20Sopenharmony_ci	force_sig_fault(SIGSEGV, si_code,
2378c2ecf20Sopenharmony_ci			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
2388c2ecf20Sopenharmony_ci}
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ciconst struct exception_table_entry *s390_search_extables(unsigned long addr)
2418c2ecf20Sopenharmony_ci{
2428c2ecf20Sopenharmony_ci	const struct exception_table_entry *fixup;
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	fixup = search_extable(__start_dma_ex_table,
2458c2ecf20Sopenharmony_ci			       __stop_dma_ex_table - __start_dma_ex_table,
2468c2ecf20Sopenharmony_ci			       addr);
2478c2ecf20Sopenharmony_ci	if (!fixup)
2488c2ecf20Sopenharmony_ci		fixup = search_exception_tables(addr);
2498c2ecf20Sopenharmony_ci	return fixup;
2508c2ecf20Sopenharmony_ci}
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_cistatic noinline void do_no_context(struct pt_regs *regs)
2538c2ecf20Sopenharmony_ci{
2548c2ecf20Sopenharmony_ci	const struct exception_table_entry *fixup;
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	/* Are we prepared to handle this kernel fault?  */
2578c2ecf20Sopenharmony_ci	fixup = s390_search_extables(regs->psw.addr);
2588c2ecf20Sopenharmony_ci	if (fixup && ex_handle(fixup, regs))
2598c2ecf20Sopenharmony_ci		return;
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	/*
2628c2ecf20Sopenharmony_ci	 * Oops. The kernel tried to access some bad page. We'll have to
2638c2ecf20Sopenharmony_ci	 * terminate things with extreme prejudice.
2648c2ecf20Sopenharmony_ci	 */
2658c2ecf20Sopenharmony_ci	if (get_fault_type(regs) == KERNEL_FAULT)
2668c2ecf20Sopenharmony_ci		printk(KERN_ALERT "Unable to handle kernel pointer dereference"
2678c2ecf20Sopenharmony_ci		       " in virtual kernel address space\n");
2688c2ecf20Sopenharmony_ci	else
2698c2ecf20Sopenharmony_ci		printk(KERN_ALERT "Unable to handle kernel paging request"
2708c2ecf20Sopenharmony_ci		       " in virtual user address space\n");
2718c2ecf20Sopenharmony_ci	dump_fault_info(regs);
2728c2ecf20Sopenharmony_ci	die(regs, "Oops");
2738c2ecf20Sopenharmony_ci	do_exit(SIGKILL);
2748c2ecf20Sopenharmony_ci}
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_cistatic noinline void do_low_address(struct pt_regs *regs)
2778c2ecf20Sopenharmony_ci{
2788c2ecf20Sopenharmony_ci	/* Low-address protection hit in kernel mode means
2798c2ecf20Sopenharmony_ci	   NULL pointer write access in kernel mode.  */
2808c2ecf20Sopenharmony_ci	if (regs->psw.mask & PSW_MASK_PSTATE) {
2818c2ecf20Sopenharmony_ci		/* Low-address protection hit in user mode 'cannot happen'. */
2828c2ecf20Sopenharmony_ci		die (regs, "Low-address protection");
2838c2ecf20Sopenharmony_ci		do_exit(SIGKILL);
2848c2ecf20Sopenharmony_ci	}
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	do_no_context(regs);
2878c2ecf20Sopenharmony_ci}
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_cistatic noinline void do_sigbus(struct pt_regs *regs)
2908c2ecf20Sopenharmony_ci{
2918c2ecf20Sopenharmony_ci	/*
2928c2ecf20Sopenharmony_ci	 * Send a sigbus, regardless of whether we were in kernel
2938c2ecf20Sopenharmony_ci	 * or user mode.
2948c2ecf20Sopenharmony_ci	 */
2958c2ecf20Sopenharmony_ci	force_sig_fault(SIGBUS, BUS_ADRERR,
2968c2ecf20Sopenharmony_ci			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
2978c2ecf20Sopenharmony_ci}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_cistatic noinline int signal_return(struct pt_regs *regs)
3008c2ecf20Sopenharmony_ci{
3018c2ecf20Sopenharmony_ci	u16 instruction;
3028c2ecf20Sopenharmony_ci	int rc;
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
3058c2ecf20Sopenharmony_ci	if (rc)
3068c2ecf20Sopenharmony_ci		return rc;
3078c2ecf20Sopenharmony_ci	if (instruction == 0x0a77) {
3088c2ecf20Sopenharmony_ci		set_pt_regs_flag(regs, PIF_SYSCALL);
3098c2ecf20Sopenharmony_ci		regs->int_code = 0x00040077;
3108c2ecf20Sopenharmony_ci		return 0;
3118c2ecf20Sopenharmony_ci	} else if (instruction == 0x0aad) {
3128c2ecf20Sopenharmony_ci		set_pt_regs_flag(regs, PIF_SYSCALL);
3138c2ecf20Sopenharmony_ci		regs->int_code = 0x000400ad;
3148c2ecf20Sopenharmony_ci		return 0;
3158c2ecf20Sopenharmony_ci	}
3168c2ecf20Sopenharmony_ci	return -EACCES;
3178c2ecf20Sopenharmony_ci}
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_cistatic noinline void do_fault_error(struct pt_regs *regs, int access,
3208c2ecf20Sopenharmony_ci					vm_fault_t fault)
3218c2ecf20Sopenharmony_ci{
3228c2ecf20Sopenharmony_ci	int si_code;
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	switch (fault) {
3258c2ecf20Sopenharmony_ci	case VM_FAULT_BADACCESS:
3268c2ecf20Sopenharmony_ci		if (access == VM_EXEC && signal_return(regs) == 0)
3278c2ecf20Sopenharmony_ci			break;
3288c2ecf20Sopenharmony_ci		fallthrough;
3298c2ecf20Sopenharmony_ci	case VM_FAULT_BADMAP:
3308c2ecf20Sopenharmony_ci		/* Bad memory access. Check if it is kernel or user space. */
3318c2ecf20Sopenharmony_ci		if (user_mode(regs)) {
3328c2ecf20Sopenharmony_ci			/* User mode accesses just cause a SIGSEGV */
3338c2ecf20Sopenharmony_ci			si_code = (fault == VM_FAULT_BADMAP) ?
3348c2ecf20Sopenharmony_ci				SEGV_MAPERR : SEGV_ACCERR;
3358c2ecf20Sopenharmony_ci			do_sigsegv(regs, si_code);
3368c2ecf20Sopenharmony_ci			break;
3378c2ecf20Sopenharmony_ci		}
3388c2ecf20Sopenharmony_ci		fallthrough;
3398c2ecf20Sopenharmony_ci	case VM_FAULT_BADCONTEXT:
3408c2ecf20Sopenharmony_ci	case VM_FAULT_PFAULT:
3418c2ecf20Sopenharmony_ci		do_no_context(regs);
3428c2ecf20Sopenharmony_ci		break;
3438c2ecf20Sopenharmony_ci	case VM_FAULT_SIGNAL:
3448c2ecf20Sopenharmony_ci		if (!user_mode(regs))
3458c2ecf20Sopenharmony_ci			do_no_context(regs);
3468c2ecf20Sopenharmony_ci		break;
3478c2ecf20Sopenharmony_ci	default: /* fault & VM_FAULT_ERROR */
3488c2ecf20Sopenharmony_ci		if (fault & VM_FAULT_OOM) {
3498c2ecf20Sopenharmony_ci			if (!user_mode(regs))
3508c2ecf20Sopenharmony_ci				do_no_context(regs);
3518c2ecf20Sopenharmony_ci			else
3528c2ecf20Sopenharmony_ci				pagefault_out_of_memory();
3538c2ecf20Sopenharmony_ci		} else if (fault & VM_FAULT_SIGSEGV) {
3548c2ecf20Sopenharmony_ci			/* Kernel mode? Handle exceptions or die */
3558c2ecf20Sopenharmony_ci			if (!user_mode(regs))
3568c2ecf20Sopenharmony_ci				do_no_context(regs);
3578c2ecf20Sopenharmony_ci			else
3588c2ecf20Sopenharmony_ci				do_sigsegv(regs, SEGV_MAPERR);
3598c2ecf20Sopenharmony_ci		} else if (fault & VM_FAULT_SIGBUS) {
3608c2ecf20Sopenharmony_ci			/* Kernel mode? Handle exceptions or die */
3618c2ecf20Sopenharmony_ci			if (!user_mode(regs))
3628c2ecf20Sopenharmony_ci				do_no_context(regs);
3638c2ecf20Sopenharmony_ci			else
3648c2ecf20Sopenharmony_ci				do_sigbus(regs);
3658c2ecf20Sopenharmony_ci		} else
3668c2ecf20Sopenharmony_ci			BUG();
3678c2ecf20Sopenharmony_ci		break;
3688c2ecf20Sopenharmony_ci	}
3698c2ecf20Sopenharmony_ci}
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci/*
3728c2ecf20Sopenharmony_ci * This routine handles page faults.  It determines the address,
3738c2ecf20Sopenharmony_ci * and the problem, and then passes it off to one of the appropriate
3748c2ecf20Sopenharmony_ci * routines.
3758c2ecf20Sopenharmony_ci *
3768c2ecf20Sopenharmony_ci * interruption code (int_code):
3778c2ecf20Sopenharmony_ci *   04       Protection           ->  Write-Protection  (suppression)
3788c2ecf20Sopenharmony_ci *   10       Segment translation  ->  Not present       (nullification)
3798c2ecf20Sopenharmony_ci *   11       Page translation     ->  Not present       (nullification)
3808c2ecf20Sopenharmony_ci *   3b       Region third trans.  ->  Not present       (nullification)
3818c2ecf20Sopenharmony_ci */
3828c2ecf20Sopenharmony_cistatic inline vm_fault_t do_exception(struct pt_regs *regs, int access)
3838c2ecf20Sopenharmony_ci{
3848c2ecf20Sopenharmony_ci	struct gmap *gmap;
3858c2ecf20Sopenharmony_ci	struct task_struct *tsk;
3868c2ecf20Sopenharmony_ci	struct mm_struct *mm;
3878c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
3888c2ecf20Sopenharmony_ci	enum fault_type type;
3898c2ecf20Sopenharmony_ci	unsigned long trans_exc_code;
3908c2ecf20Sopenharmony_ci	unsigned long address;
3918c2ecf20Sopenharmony_ci	unsigned int flags;
3928c2ecf20Sopenharmony_ci	vm_fault_t fault;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	tsk = current;
3958c2ecf20Sopenharmony_ci	/*
3968c2ecf20Sopenharmony_ci	 * The instruction that caused the program check has
3978c2ecf20Sopenharmony_ci	 * been nullified. Don't signal single step via SIGTRAP.
3988c2ecf20Sopenharmony_ci	 */
3998c2ecf20Sopenharmony_ci	clear_pt_regs_flag(regs, PIF_PER_TRAP);
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	if (kprobe_page_fault(regs, 14))
4028c2ecf20Sopenharmony_ci		return 0;
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	mm = tsk->mm;
4058c2ecf20Sopenharmony_ci	trans_exc_code = regs->int_parm_long;
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci	/*
4088c2ecf20Sopenharmony_ci	 * Verify that the fault happened in user space, that
4098c2ecf20Sopenharmony_ci	 * we are not in an interrupt and that there is a
4108c2ecf20Sopenharmony_ci	 * user context.
4118c2ecf20Sopenharmony_ci	 */
4128c2ecf20Sopenharmony_ci	fault = VM_FAULT_BADCONTEXT;
4138c2ecf20Sopenharmony_ci	type = get_fault_type(regs);
4148c2ecf20Sopenharmony_ci	switch (type) {
4158c2ecf20Sopenharmony_ci	case KERNEL_FAULT:
4168c2ecf20Sopenharmony_ci		goto out;
4178c2ecf20Sopenharmony_ci	case VDSO_FAULT:
4188c2ecf20Sopenharmony_ci		fault = VM_FAULT_BADMAP;
4198c2ecf20Sopenharmony_ci		goto out;
4208c2ecf20Sopenharmony_ci	case USER_FAULT:
4218c2ecf20Sopenharmony_ci	case GMAP_FAULT:
4228c2ecf20Sopenharmony_ci		if (faulthandler_disabled() || !mm)
4238c2ecf20Sopenharmony_ci			goto out;
4248c2ecf20Sopenharmony_ci		break;
4258c2ecf20Sopenharmony_ci	}
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci	address = trans_exc_code & __FAIL_ADDR_MASK;
4288c2ecf20Sopenharmony_ci	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
4298c2ecf20Sopenharmony_ci	flags = FAULT_FLAG_DEFAULT;
4308c2ecf20Sopenharmony_ci	if (user_mode(regs))
4318c2ecf20Sopenharmony_ci		flags |= FAULT_FLAG_USER;
4328c2ecf20Sopenharmony_ci	if ((trans_exc_code & store_indication) == 0x400)
4338c2ecf20Sopenharmony_ci		access = VM_WRITE;
4348c2ecf20Sopenharmony_ci	if (access == VM_WRITE)
4358c2ecf20Sopenharmony_ci		flags |= FAULT_FLAG_WRITE;
4368c2ecf20Sopenharmony_ci	mmap_read_lock(mm);
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	gmap = NULL;
4398c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
4408c2ecf20Sopenharmony_ci		gmap = (struct gmap *) S390_lowcore.gmap;
4418c2ecf20Sopenharmony_ci		current->thread.gmap_addr = address;
4428c2ecf20Sopenharmony_ci		current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
4438c2ecf20Sopenharmony_ci		current->thread.gmap_int_code = regs->int_code & 0xffff;
4448c2ecf20Sopenharmony_ci		address = __gmap_translate(gmap, address);
4458c2ecf20Sopenharmony_ci		if (address == -EFAULT) {
4468c2ecf20Sopenharmony_ci			fault = VM_FAULT_BADMAP;
4478c2ecf20Sopenharmony_ci			goto out_up;
4488c2ecf20Sopenharmony_ci		}
4498c2ecf20Sopenharmony_ci		if (gmap->pfault_enabled)
4508c2ecf20Sopenharmony_ci			flags |= FAULT_FLAG_RETRY_NOWAIT;
4518c2ecf20Sopenharmony_ci	}
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ciretry:
4548c2ecf20Sopenharmony_ci	fault = VM_FAULT_BADMAP;
4558c2ecf20Sopenharmony_ci	vma = find_vma(mm, address);
4568c2ecf20Sopenharmony_ci	if (!vma)
4578c2ecf20Sopenharmony_ci		goto out_up;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	if (unlikely(vma->vm_start > address)) {
4608c2ecf20Sopenharmony_ci		if (!(vma->vm_flags & VM_GROWSDOWN))
4618c2ecf20Sopenharmony_ci			goto out_up;
4628c2ecf20Sopenharmony_ci		if (expand_stack(vma, address))
4638c2ecf20Sopenharmony_ci			goto out_up;
4648c2ecf20Sopenharmony_ci	}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	/*
4678c2ecf20Sopenharmony_ci	 * Ok, we have a good vm_area for this memory access, so
4688c2ecf20Sopenharmony_ci	 * we can handle it..
4698c2ecf20Sopenharmony_ci	 */
4708c2ecf20Sopenharmony_ci	fault = VM_FAULT_BADACCESS;
4718c2ecf20Sopenharmony_ci	if (unlikely(!(vma->vm_flags & access)))
4728c2ecf20Sopenharmony_ci		goto out_up;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	if (is_vm_hugetlb_page(vma))
4758c2ecf20Sopenharmony_ci		address &= HPAGE_MASK;
4768c2ecf20Sopenharmony_ci	/*
4778c2ecf20Sopenharmony_ci	 * If for any reason at all we couldn't handle the fault,
4788c2ecf20Sopenharmony_ci	 * make sure we exit gracefully rather than endlessly redo
4798c2ecf20Sopenharmony_ci	 * the fault.
4808c2ecf20Sopenharmony_ci	 */
4818c2ecf20Sopenharmony_ci	fault = handle_mm_fault(vma, address, flags, regs);
4828c2ecf20Sopenharmony_ci	if (fault_signal_pending(fault, regs)) {
4838c2ecf20Sopenharmony_ci		fault = VM_FAULT_SIGNAL;
4848c2ecf20Sopenharmony_ci		if (flags & FAULT_FLAG_RETRY_NOWAIT)
4858c2ecf20Sopenharmony_ci			goto out_up;
4868c2ecf20Sopenharmony_ci		goto out;
4878c2ecf20Sopenharmony_ci	}
4888c2ecf20Sopenharmony_ci	if (unlikely(fault & VM_FAULT_ERROR))
4898c2ecf20Sopenharmony_ci		goto out_up;
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci	if (flags & FAULT_FLAG_ALLOW_RETRY) {
4928c2ecf20Sopenharmony_ci		if (fault & VM_FAULT_RETRY) {
4938c2ecf20Sopenharmony_ci			if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
4948c2ecf20Sopenharmony_ci			    (flags & FAULT_FLAG_RETRY_NOWAIT)) {
4958c2ecf20Sopenharmony_ci				/* FAULT_FLAG_RETRY_NOWAIT has been set,
4968c2ecf20Sopenharmony_ci				 * mmap_lock has not been released */
4978c2ecf20Sopenharmony_ci				current->thread.gmap_pfault = 1;
4988c2ecf20Sopenharmony_ci				fault = VM_FAULT_PFAULT;
4998c2ecf20Sopenharmony_ci				goto out_up;
5008c2ecf20Sopenharmony_ci			}
5018c2ecf20Sopenharmony_ci			flags &= ~FAULT_FLAG_RETRY_NOWAIT;
5028c2ecf20Sopenharmony_ci			flags |= FAULT_FLAG_TRIED;
5038c2ecf20Sopenharmony_ci			mmap_read_lock(mm);
5048c2ecf20Sopenharmony_ci			goto retry;
5058c2ecf20Sopenharmony_ci		}
5068c2ecf20Sopenharmony_ci	}
5078c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
5088c2ecf20Sopenharmony_ci		address =  __gmap_link(gmap, current->thread.gmap_addr,
5098c2ecf20Sopenharmony_ci				       address);
5108c2ecf20Sopenharmony_ci		if (address == -EFAULT) {
5118c2ecf20Sopenharmony_ci			fault = VM_FAULT_BADMAP;
5128c2ecf20Sopenharmony_ci			goto out_up;
5138c2ecf20Sopenharmony_ci		}
5148c2ecf20Sopenharmony_ci		if (address == -ENOMEM) {
5158c2ecf20Sopenharmony_ci			fault = VM_FAULT_OOM;
5168c2ecf20Sopenharmony_ci			goto out_up;
5178c2ecf20Sopenharmony_ci		}
5188c2ecf20Sopenharmony_ci	}
5198c2ecf20Sopenharmony_ci	fault = 0;
5208c2ecf20Sopenharmony_ciout_up:
5218c2ecf20Sopenharmony_ci	mmap_read_unlock(mm);
5228c2ecf20Sopenharmony_ciout:
5238c2ecf20Sopenharmony_ci	return fault;
5248c2ecf20Sopenharmony_ci}
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_civoid do_protection_exception(struct pt_regs *regs)
5278c2ecf20Sopenharmony_ci{
5288c2ecf20Sopenharmony_ci	unsigned long trans_exc_code;
5298c2ecf20Sopenharmony_ci	int access;
5308c2ecf20Sopenharmony_ci	vm_fault_t fault;
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	trans_exc_code = regs->int_parm_long;
5338c2ecf20Sopenharmony_ci	/*
5348c2ecf20Sopenharmony_ci	 * Protection exceptions are suppressing, decrement psw address.
5358c2ecf20Sopenharmony_ci	 * The exception to this rule are aborted transactions, for these
5368c2ecf20Sopenharmony_ci	 * the PSW already points to the correct location.
5378c2ecf20Sopenharmony_ci	 */
5388c2ecf20Sopenharmony_ci	if (!(regs->int_code & 0x200))
5398c2ecf20Sopenharmony_ci		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
5408c2ecf20Sopenharmony_ci	/*
5418c2ecf20Sopenharmony_ci	 * Check for low-address protection.  This needs to be treated
5428c2ecf20Sopenharmony_ci	 * as a special case because the translation exception code
5438c2ecf20Sopenharmony_ci	 * field is not guaranteed to contain valid data in this case.
5448c2ecf20Sopenharmony_ci	 */
5458c2ecf20Sopenharmony_ci	if (unlikely(!(trans_exc_code & 4))) {
5468c2ecf20Sopenharmony_ci		do_low_address(regs);
5478c2ecf20Sopenharmony_ci		return;
5488c2ecf20Sopenharmony_ci	}
5498c2ecf20Sopenharmony_ci	if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) {
5508c2ecf20Sopenharmony_ci		regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) |
5518c2ecf20Sopenharmony_ci					(regs->psw.addr & PAGE_MASK);
5528c2ecf20Sopenharmony_ci		access = VM_EXEC;
5538c2ecf20Sopenharmony_ci		fault = VM_FAULT_BADACCESS;
5548c2ecf20Sopenharmony_ci	} else {
5558c2ecf20Sopenharmony_ci		access = VM_WRITE;
5568c2ecf20Sopenharmony_ci		fault = do_exception(regs, access);
5578c2ecf20Sopenharmony_ci	}
5588c2ecf20Sopenharmony_ci	if (unlikely(fault))
5598c2ecf20Sopenharmony_ci		do_fault_error(regs, access, fault);
5608c2ecf20Sopenharmony_ci}
5618c2ecf20Sopenharmony_ciNOKPROBE_SYMBOL(do_protection_exception);
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_civoid do_dat_exception(struct pt_regs *regs)
5648c2ecf20Sopenharmony_ci{
5658c2ecf20Sopenharmony_ci	int access;
5668c2ecf20Sopenharmony_ci	vm_fault_t fault;
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	access = VM_ACCESS_FLAGS;
5698c2ecf20Sopenharmony_ci	fault = do_exception(regs, access);
5708c2ecf20Sopenharmony_ci	if (unlikely(fault))
5718c2ecf20Sopenharmony_ci		do_fault_error(regs, access, fault);
5728c2ecf20Sopenharmony_ci}
5738c2ecf20Sopenharmony_ciNOKPROBE_SYMBOL(do_dat_exception);
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci#ifdef CONFIG_PFAULT
5768c2ecf20Sopenharmony_ci/*
5778c2ecf20Sopenharmony_ci * 'pfault' pseudo page faults routines.
5788c2ecf20Sopenharmony_ci */
5798c2ecf20Sopenharmony_cistatic int pfault_disable;
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_cistatic int __init nopfault(char *str)
5828c2ecf20Sopenharmony_ci{
5838c2ecf20Sopenharmony_ci	pfault_disable = 1;
5848c2ecf20Sopenharmony_ci	return 1;
5858c2ecf20Sopenharmony_ci}
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci__setup("nopfault", nopfault);
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_cistruct pfault_refbk {
5908c2ecf20Sopenharmony_ci	u16 refdiagc;
5918c2ecf20Sopenharmony_ci	u16 reffcode;
5928c2ecf20Sopenharmony_ci	u16 refdwlen;
5938c2ecf20Sopenharmony_ci	u16 refversn;
5948c2ecf20Sopenharmony_ci	u64 refgaddr;
5958c2ecf20Sopenharmony_ci	u64 refselmk;
5968c2ecf20Sopenharmony_ci	u64 refcmpmk;
5978c2ecf20Sopenharmony_ci	u64 reserved;
5988c2ecf20Sopenharmony_ci} __attribute__ ((packed, aligned(8)));
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_cistatic struct pfault_refbk pfault_init_refbk = {
6018c2ecf20Sopenharmony_ci	.refdiagc = 0x258,
6028c2ecf20Sopenharmony_ci	.reffcode = 0,
6038c2ecf20Sopenharmony_ci	.refdwlen = 5,
6048c2ecf20Sopenharmony_ci	.refversn = 2,
6058c2ecf20Sopenharmony_ci	.refgaddr = __LC_LPP,
6068c2ecf20Sopenharmony_ci	.refselmk = 1ULL << 48,
6078c2ecf20Sopenharmony_ci	.refcmpmk = 1ULL << 48,
6088c2ecf20Sopenharmony_ci	.reserved = __PF_RES_FIELD
6098c2ecf20Sopenharmony_ci};
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ciint pfault_init(void)
6128c2ecf20Sopenharmony_ci{
6138c2ecf20Sopenharmony_ci        int rc;
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	if (pfault_disable)
6168c2ecf20Sopenharmony_ci		return -1;
6178c2ecf20Sopenharmony_ci	diag_stat_inc(DIAG_STAT_X258);
6188c2ecf20Sopenharmony_ci	asm volatile(
6198c2ecf20Sopenharmony_ci		"	diag	%1,%0,0x258\n"
6208c2ecf20Sopenharmony_ci		"0:	j	2f\n"
6218c2ecf20Sopenharmony_ci		"1:	la	%0,8\n"
6228c2ecf20Sopenharmony_ci		"2:\n"
6238c2ecf20Sopenharmony_ci		EX_TABLE(0b,1b)
6248c2ecf20Sopenharmony_ci		: "=d" (rc)
6258c2ecf20Sopenharmony_ci		: "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
6268c2ecf20Sopenharmony_ci        return rc;
6278c2ecf20Sopenharmony_ci}
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_cistatic struct pfault_refbk pfault_fini_refbk = {
6308c2ecf20Sopenharmony_ci	.refdiagc = 0x258,
6318c2ecf20Sopenharmony_ci	.reffcode = 1,
6328c2ecf20Sopenharmony_ci	.refdwlen = 5,
6338c2ecf20Sopenharmony_ci	.refversn = 2,
6348c2ecf20Sopenharmony_ci};
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_civoid pfault_fini(void)
6378c2ecf20Sopenharmony_ci{
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ci	if (pfault_disable)
6408c2ecf20Sopenharmony_ci		return;
6418c2ecf20Sopenharmony_ci	diag_stat_inc(DIAG_STAT_X258);
6428c2ecf20Sopenharmony_ci	asm volatile(
6438c2ecf20Sopenharmony_ci		"	diag	%0,0,0x258\n"
6448c2ecf20Sopenharmony_ci		"0:	nopr	%%r7\n"
6458c2ecf20Sopenharmony_ci		EX_TABLE(0b,0b)
6468c2ecf20Sopenharmony_ci		: : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
6478c2ecf20Sopenharmony_ci}
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(pfault_lock);
6508c2ecf20Sopenharmony_cistatic LIST_HEAD(pfault_list);
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci#define PF_COMPLETE	0x0080
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci/*
6558c2ecf20Sopenharmony_ci * The mechanism of our pfault code: if Linux is running as guest, runs a user
6568c2ecf20Sopenharmony_ci * space process and the user space process accesses a page that the host has
6578c2ecf20Sopenharmony_ci * paged out we get a pfault interrupt.
6588c2ecf20Sopenharmony_ci *
6598c2ecf20Sopenharmony_ci * This allows us, within the guest, to schedule a different process. Without
6608c2ecf20Sopenharmony_ci * this mechanism the host would have to suspend the whole virtual cpu until
6618c2ecf20Sopenharmony_ci * the page has been paged in.
6628c2ecf20Sopenharmony_ci *
6638c2ecf20Sopenharmony_ci * So when we get such an interrupt then we set the state of the current task
6648c2ecf20Sopenharmony_ci * to uninterruptible and also set the need_resched flag. Both happens within
6658c2ecf20Sopenharmony_ci * interrupt context(!). If we later on want to return to user space we
6668c2ecf20Sopenharmony_ci * recognize the need_resched flag and then call schedule().  It's not very
6678c2ecf20Sopenharmony_ci * obvious how this works...
6688c2ecf20Sopenharmony_ci *
6698c2ecf20Sopenharmony_ci * Of course we have a lot of additional fun with the completion interrupt (->
6708c2ecf20Sopenharmony_ci * host signals that a page of a process has been paged in and the process can
6718c2ecf20Sopenharmony_ci * continue to run). This interrupt can arrive on any cpu and, since we have
6728c2ecf20Sopenharmony_ci * virtual cpus, actually appear before the interrupt that signals that a page
6738c2ecf20Sopenharmony_ci * is missing.
6748c2ecf20Sopenharmony_ci */
6758c2ecf20Sopenharmony_cistatic void pfault_interrupt(struct ext_code ext_code,
6768c2ecf20Sopenharmony_ci			     unsigned int param32, unsigned long param64)
6778c2ecf20Sopenharmony_ci{
6788c2ecf20Sopenharmony_ci	struct task_struct *tsk;
6798c2ecf20Sopenharmony_ci	__u16 subcode;
6808c2ecf20Sopenharmony_ci	pid_t pid;
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci	/*
6838c2ecf20Sopenharmony_ci	 * Get the external interruption subcode & pfault initial/completion
6848c2ecf20Sopenharmony_ci	 * signal bit. VM stores this in the 'cpu address' field associated
6858c2ecf20Sopenharmony_ci	 * with the external interrupt.
6868c2ecf20Sopenharmony_ci	 */
6878c2ecf20Sopenharmony_ci	subcode = ext_code.subcode;
6888c2ecf20Sopenharmony_ci	if ((subcode & 0xff00) != __SUBCODE_MASK)
6898c2ecf20Sopenharmony_ci		return;
6908c2ecf20Sopenharmony_ci	inc_irq_stat(IRQEXT_PFL);
6918c2ecf20Sopenharmony_ci	/* Get the token (= pid of the affected task). */
6928c2ecf20Sopenharmony_ci	pid = param64 & LPP_PID_MASK;
6938c2ecf20Sopenharmony_ci	rcu_read_lock();
6948c2ecf20Sopenharmony_ci	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
6958c2ecf20Sopenharmony_ci	if (tsk)
6968c2ecf20Sopenharmony_ci		get_task_struct(tsk);
6978c2ecf20Sopenharmony_ci	rcu_read_unlock();
6988c2ecf20Sopenharmony_ci	if (!tsk)
6998c2ecf20Sopenharmony_ci		return;
7008c2ecf20Sopenharmony_ci	spin_lock(&pfault_lock);
7018c2ecf20Sopenharmony_ci	if (subcode & PF_COMPLETE) {
7028c2ecf20Sopenharmony_ci		/* signal bit is set -> a page has been swapped in by VM */
7038c2ecf20Sopenharmony_ci		if (tsk->thread.pfault_wait == 1) {
7048c2ecf20Sopenharmony_ci			/* Initial interrupt was faster than the completion
7058c2ecf20Sopenharmony_ci			 * interrupt. pfault_wait is valid. Set pfault_wait
7068c2ecf20Sopenharmony_ci			 * back to zero and wake up the process. This can
7078c2ecf20Sopenharmony_ci			 * safely be done because the task is still sleeping
7088c2ecf20Sopenharmony_ci			 * and can't produce new pfaults. */
7098c2ecf20Sopenharmony_ci			tsk->thread.pfault_wait = 0;
7108c2ecf20Sopenharmony_ci			list_del(&tsk->thread.list);
7118c2ecf20Sopenharmony_ci			wake_up_process(tsk);
7128c2ecf20Sopenharmony_ci			put_task_struct(tsk);
7138c2ecf20Sopenharmony_ci		} else {
7148c2ecf20Sopenharmony_ci			/* Completion interrupt was faster than initial
7158c2ecf20Sopenharmony_ci			 * interrupt. Set pfault_wait to -1 so the initial
7168c2ecf20Sopenharmony_ci			 * interrupt doesn't put the task to sleep.
7178c2ecf20Sopenharmony_ci			 * If the task is not running, ignore the completion
7188c2ecf20Sopenharmony_ci			 * interrupt since it must be a leftover of a PFAULT
7198c2ecf20Sopenharmony_ci			 * CANCEL operation which didn't remove all pending
7208c2ecf20Sopenharmony_ci			 * completion interrupts. */
7218c2ecf20Sopenharmony_ci			if (tsk->state == TASK_RUNNING)
7228c2ecf20Sopenharmony_ci				tsk->thread.pfault_wait = -1;
7238c2ecf20Sopenharmony_ci		}
7248c2ecf20Sopenharmony_ci	} else {
7258c2ecf20Sopenharmony_ci		/* signal bit not set -> a real page is missing. */
7268c2ecf20Sopenharmony_ci		if (WARN_ON_ONCE(tsk != current))
7278c2ecf20Sopenharmony_ci			goto out;
7288c2ecf20Sopenharmony_ci		if (tsk->thread.pfault_wait == 1) {
7298c2ecf20Sopenharmony_ci			/* Already on the list with a reference: put to sleep */
7308c2ecf20Sopenharmony_ci			goto block;
7318c2ecf20Sopenharmony_ci		} else if (tsk->thread.pfault_wait == -1) {
7328c2ecf20Sopenharmony_ci			/* Completion interrupt was faster than the initial
7338c2ecf20Sopenharmony_ci			 * interrupt (pfault_wait == -1). Set pfault_wait
7348c2ecf20Sopenharmony_ci			 * back to zero and exit. */
7358c2ecf20Sopenharmony_ci			tsk->thread.pfault_wait = 0;
7368c2ecf20Sopenharmony_ci		} else {
7378c2ecf20Sopenharmony_ci			/* Initial interrupt arrived before completion
7388c2ecf20Sopenharmony_ci			 * interrupt. Let the task sleep.
7398c2ecf20Sopenharmony_ci			 * An extra task reference is needed since a different
7408c2ecf20Sopenharmony_ci			 * cpu may set the task state to TASK_RUNNING again
7418c2ecf20Sopenharmony_ci			 * before the scheduler is reached. */
7428c2ecf20Sopenharmony_ci			get_task_struct(tsk);
7438c2ecf20Sopenharmony_ci			tsk->thread.pfault_wait = 1;
7448c2ecf20Sopenharmony_ci			list_add(&tsk->thread.list, &pfault_list);
7458c2ecf20Sopenharmony_ciblock:
7468c2ecf20Sopenharmony_ci			/* Since this must be a userspace fault, there
7478c2ecf20Sopenharmony_ci			 * is no kernel task state to trample. Rely on the
7488c2ecf20Sopenharmony_ci			 * return to userspace schedule() to block. */
7498c2ecf20Sopenharmony_ci			__set_current_state(TASK_UNINTERRUPTIBLE);
7508c2ecf20Sopenharmony_ci			set_tsk_need_resched(tsk);
7518c2ecf20Sopenharmony_ci			set_preempt_need_resched();
7528c2ecf20Sopenharmony_ci		}
7538c2ecf20Sopenharmony_ci	}
7548c2ecf20Sopenharmony_ciout:
7558c2ecf20Sopenharmony_ci	spin_unlock(&pfault_lock);
7568c2ecf20Sopenharmony_ci	put_task_struct(tsk);
7578c2ecf20Sopenharmony_ci}
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_cistatic int pfault_cpu_dead(unsigned int cpu)
7608c2ecf20Sopenharmony_ci{
7618c2ecf20Sopenharmony_ci	struct thread_struct *thread, *next;
7628c2ecf20Sopenharmony_ci	struct task_struct *tsk;
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci	spin_lock_irq(&pfault_lock);
7658c2ecf20Sopenharmony_ci	list_for_each_entry_safe(thread, next, &pfault_list, list) {
7668c2ecf20Sopenharmony_ci		thread->pfault_wait = 0;
7678c2ecf20Sopenharmony_ci		list_del(&thread->list);
7688c2ecf20Sopenharmony_ci		tsk = container_of(thread, struct task_struct, thread);
7698c2ecf20Sopenharmony_ci		wake_up_process(tsk);
7708c2ecf20Sopenharmony_ci		put_task_struct(tsk);
7718c2ecf20Sopenharmony_ci	}
7728c2ecf20Sopenharmony_ci	spin_unlock_irq(&pfault_lock);
7738c2ecf20Sopenharmony_ci	return 0;
7748c2ecf20Sopenharmony_ci}
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_cistatic int __init pfault_irq_init(void)
7778c2ecf20Sopenharmony_ci{
7788c2ecf20Sopenharmony_ci	int rc;
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_ci	rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
7818c2ecf20Sopenharmony_ci	if (rc)
7828c2ecf20Sopenharmony_ci		goto out_extint;
7838c2ecf20Sopenharmony_ci	rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
7848c2ecf20Sopenharmony_ci	if (rc)
7858c2ecf20Sopenharmony_ci		goto out_pfault;
7868c2ecf20Sopenharmony_ci	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
7878c2ecf20Sopenharmony_ci	cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
7888c2ecf20Sopenharmony_ci				  NULL, pfault_cpu_dead);
7898c2ecf20Sopenharmony_ci	return 0;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ciout_pfault:
7928c2ecf20Sopenharmony_ci	unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
7938c2ecf20Sopenharmony_ciout_extint:
7948c2ecf20Sopenharmony_ci	pfault_disable = 1;
7958c2ecf20Sopenharmony_ci	return rc;
7968c2ecf20Sopenharmony_ci}
7978c2ecf20Sopenharmony_ciearly_initcall(pfault_irq_init);
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ci#endif /* CONFIG_PFAULT */
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_PGSTE)
8028c2ecf20Sopenharmony_civoid do_secure_storage_access(struct pt_regs *regs)
8038c2ecf20Sopenharmony_ci{
8048c2ecf20Sopenharmony_ci	unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
8058c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
8068c2ecf20Sopenharmony_ci	struct mm_struct *mm;
8078c2ecf20Sopenharmony_ci	struct page *page;
8088c2ecf20Sopenharmony_ci	int rc;
8098c2ecf20Sopenharmony_ci
8108c2ecf20Sopenharmony_ci	/*
8118c2ecf20Sopenharmony_ci	 * bit 61 tells us if the address is valid, if it's not we
8128c2ecf20Sopenharmony_ci	 * have a major problem and should stop the kernel or send a
8138c2ecf20Sopenharmony_ci	 * SIGSEGV to the process. Unfortunately bit 61 is not
8148c2ecf20Sopenharmony_ci	 * reliable without the misc UV feature so we need to check
8158c2ecf20Sopenharmony_ci	 * for that as well.
8168c2ecf20Sopenharmony_ci	 */
8178c2ecf20Sopenharmony_ci	if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
8188c2ecf20Sopenharmony_ci	    !test_bit_inv(61, &regs->int_parm_long)) {
8198c2ecf20Sopenharmony_ci		/*
8208c2ecf20Sopenharmony_ci		 * When this happens, userspace did something that it
8218c2ecf20Sopenharmony_ci		 * was not supposed to do, e.g. branching into secure
8228c2ecf20Sopenharmony_ci		 * memory. Trigger a segmentation fault.
8238c2ecf20Sopenharmony_ci		 */
8248c2ecf20Sopenharmony_ci		if (user_mode(regs)) {
8258c2ecf20Sopenharmony_ci			send_sig(SIGSEGV, current, 0);
8268c2ecf20Sopenharmony_ci			return;
8278c2ecf20Sopenharmony_ci		}
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci		/*
8308c2ecf20Sopenharmony_ci		 * The kernel should never run into this case and we
8318c2ecf20Sopenharmony_ci		 * have no way out of this situation.
8328c2ecf20Sopenharmony_ci		 */
8338c2ecf20Sopenharmony_ci		panic("Unexpected PGM 0x3d with TEID bit 61=0");
8348c2ecf20Sopenharmony_ci	}
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci	switch (get_fault_type(regs)) {
8378c2ecf20Sopenharmony_ci	case USER_FAULT:
8388c2ecf20Sopenharmony_ci		mm = current->mm;
8398c2ecf20Sopenharmony_ci		mmap_read_lock(mm);
8408c2ecf20Sopenharmony_ci		vma = find_vma(mm, addr);
8418c2ecf20Sopenharmony_ci		if (!vma) {
8428c2ecf20Sopenharmony_ci			mmap_read_unlock(mm);
8438c2ecf20Sopenharmony_ci			do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
8448c2ecf20Sopenharmony_ci			break;
8458c2ecf20Sopenharmony_ci		}
8468c2ecf20Sopenharmony_ci		page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
8478c2ecf20Sopenharmony_ci		if (IS_ERR_OR_NULL(page)) {
8488c2ecf20Sopenharmony_ci			mmap_read_unlock(mm);
8498c2ecf20Sopenharmony_ci			break;
8508c2ecf20Sopenharmony_ci		}
8518c2ecf20Sopenharmony_ci		if (arch_make_page_accessible(page))
8528c2ecf20Sopenharmony_ci			send_sig(SIGSEGV, current, 0);
8538c2ecf20Sopenharmony_ci		put_page(page);
8548c2ecf20Sopenharmony_ci		mmap_read_unlock(mm);
8558c2ecf20Sopenharmony_ci		break;
8568c2ecf20Sopenharmony_ci	case KERNEL_FAULT:
8578c2ecf20Sopenharmony_ci		page = phys_to_page(addr);
8588c2ecf20Sopenharmony_ci		if (unlikely(!try_get_page(page)))
8598c2ecf20Sopenharmony_ci			break;
8608c2ecf20Sopenharmony_ci		rc = arch_make_page_accessible(page);
8618c2ecf20Sopenharmony_ci		put_page(page);
8628c2ecf20Sopenharmony_ci		if (rc)
8638c2ecf20Sopenharmony_ci			BUG();
8648c2ecf20Sopenharmony_ci		break;
8658c2ecf20Sopenharmony_ci	case VDSO_FAULT:
8668c2ecf20Sopenharmony_ci	case GMAP_FAULT:
8678c2ecf20Sopenharmony_ci	default:
8688c2ecf20Sopenharmony_ci		do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
8698c2ecf20Sopenharmony_ci		WARN_ON_ONCE(1);
8708c2ecf20Sopenharmony_ci	}
8718c2ecf20Sopenharmony_ci}
8728c2ecf20Sopenharmony_ciNOKPROBE_SYMBOL(do_secure_storage_access);
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_civoid do_non_secure_storage_access(struct pt_regs *regs)
8758c2ecf20Sopenharmony_ci{
8768c2ecf20Sopenharmony_ci	unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
8778c2ecf20Sopenharmony_ci	struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci	if (get_fault_type(regs) != GMAP_FAULT) {
8808c2ecf20Sopenharmony_ci		do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
8818c2ecf20Sopenharmony_ci		WARN_ON_ONCE(1);
8828c2ecf20Sopenharmony_ci		return;
8838c2ecf20Sopenharmony_ci	}
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci	if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
8868c2ecf20Sopenharmony_ci		send_sig(SIGSEGV, current, 0);
8878c2ecf20Sopenharmony_ci}
8888c2ecf20Sopenharmony_ciNOKPROBE_SYMBOL(do_non_secure_storage_access);
8898c2ecf20Sopenharmony_ci
8908c2ecf20Sopenharmony_civoid do_secure_storage_violation(struct pt_regs *regs)
8918c2ecf20Sopenharmony_ci{
8928c2ecf20Sopenharmony_ci	/*
8938c2ecf20Sopenharmony_ci	 * Either KVM messed up the secure guest mapping or the same
8948c2ecf20Sopenharmony_ci	 * page is mapped into multiple secure guests.
8958c2ecf20Sopenharmony_ci	 *
8968c2ecf20Sopenharmony_ci	 * This exception is only triggered when a guest 2 is running
8978c2ecf20Sopenharmony_ci	 * and can therefore never occur in kernel context.
8988c2ecf20Sopenharmony_ci	 */
8998c2ecf20Sopenharmony_ci	printk_ratelimited(KERN_WARNING
9008c2ecf20Sopenharmony_ci			   "Secure storage violation in task: %s, pid %d\n",
9018c2ecf20Sopenharmony_ci			   current->comm, current->pid);
9028c2ecf20Sopenharmony_ci	send_sig(SIGSEGV, current, 0);
9038c2ecf20Sopenharmony_ci}
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci#else
9068c2ecf20Sopenharmony_civoid do_secure_storage_access(struct pt_regs *regs)
9078c2ecf20Sopenharmony_ci{
9088c2ecf20Sopenharmony_ci	default_trap_handler(regs);
9098c2ecf20Sopenharmony_ci}
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_civoid do_non_secure_storage_access(struct pt_regs *regs)
9128c2ecf20Sopenharmony_ci{
9138c2ecf20Sopenharmony_ci	default_trap_handler(regs);
9148c2ecf20Sopenharmony_ci}
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_civoid do_secure_storage_violation(struct pt_regs *regs)
9178c2ecf20Sopenharmony_ci{
9188c2ecf20Sopenharmony_ci	default_trap_handler(regs);
9198c2ecf20Sopenharmony_ci}
9208c2ecf20Sopenharmony_ci#endif
921