xref: /kernel/linux/linux-5.10/arch/s390/kvm/kvm-s390.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
8 *               Christian Borntraeger <borntraeger@de.ibm.com>
9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11 *               Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14#define KMSG_COMPONENT "kvm-s390"
15#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17#include <linux/compiler.h>
18#include <linux/err.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/init.h>
22#include <linux/kvm.h>
23#include <linux/kvm_host.h>
24#include <linux/mman.h>
25#include <linux/module.h>
26#include <linux/moduleparam.h>
27#include <linux/random.h>
28#include <linux/slab.h>
29#include <linux/timer.h>
30#include <linux/vmalloc.h>
31#include <linux/bitmap.h>
32#include <linux/sched/signal.h>
33#include <linux/string.h>
34#include <linux/pgtable.h>
35
36#include <asm/asm-offsets.h>
37#include <asm/lowcore.h>
38#include <asm/stp.h>
39#include <asm/gmap.h>
40#include <asm/nmi.h>
41#include <asm/switch_to.h>
42#include <asm/isc.h>
43#include <asm/sclp.h>
44#include <asm/cpacf.h>
45#include <asm/timex.h>
46#include <asm/ap.h>
47#include <asm/uv.h>
48#include "kvm-s390.h"
49#include "gaccess.h"
50
51#define CREATE_TRACE_POINTS
52#include "trace.h"
53#include "trace-s390.h"
54
55#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56#define LOCAL_IRQS 32
57#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60struct kvm_stats_debugfs_item debugfs_entries[] = {
61	VCPU_STAT("userspace_handled", exit_userspace),
62	VCPU_STAT("exit_null", exit_null),
63	VCPU_STAT("exit_validity", exit_validity),
64	VCPU_STAT("exit_stop_request", exit_stop_request),
65	VCPU_STAT("exit_external_request", exit_external_request),
66	VCPU_STAT("exit_io_request", exit_io_request),
67	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68	VCPU_STAT("exit_instruction", exit_instruction),
69	VCPU_STAT("exit_pei", exit_pei),
70	VCPU_STAT("exit_program_interruption", exit_program_interruption),
71	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72	VCPU_STAT("exit_operation_exception", exit_operation_exception),
73	VCPU_STAT("halt_successful_poll", halt_successful_poll),
74	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77	VCPU_STAT("halt_wakeup", halt_wakeup),
78	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80	VCPU_STAT("instruction_lctlg", instruction_lctlg),
81	VCPU_STAT("instruction_lctl", instruction_lctl),
82	VCPU_STAT("instruction_stctl", instruction_stctl),
83	VCPU_STAT("instruction_stctg", instruction_stctg),
84	VCPU_STAT("deliver_ckc", deliver_ckc),
85	VCPU_STAT("deliver_cputm", deliver_cputm),
86	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87	VCPU_STAT("deliver_external_call", deliver_external_call),
88	VCPU_STAT("deliver_service_signal", deliver_service_signal),
89	VCPU_STAT("deliver_virtio", deliver_virtio),
90	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93	VCPU_STAT("deliver_program", deliver_program),
94	VCPU_STAT("deliver_io", deliver_io),
95	VCPU_STAT("deliver_machine_check", deliver_machine_check),
96	VCPU_STAT("exit_wait_state", exit_wait_state),
97	VCPU_STAT("inject_ckc", inject_ckc),
98	VCPU_STAT("inject_cputm", inject_cputm),
99	VCPU_STAT("inject_external_call", inject_external_call),
100	VM_STAT("inject_float_mchk", inject_float_mchk),
101	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102	VM_STAT("inject_io", inject_io),
103	VCPU_STAT("inject_mchk", inject_mchk),
104	VM_STAT("inject_pfault_done", inject_pfault_done),
105	VCPU_STAT("inject_program", inject_program),
106	VCPU_STAT("inject_restart", inject_restart),
107	VM_STAT("inject_service_signal", inject_service_signal),
108	VCPU_STAT("inject_set_prefix", inject_set_prefix),
109	VCPU_STAT("inject_stop_signal", inject_stop_signal),
110	VCPU_STAT("inject_pfault_init", inject_pfault_init),
111	VM_STAT("inject_virtio", inject_virtio),
112	VCPU_STAT("instruction_epsw", instruction_epsw),
113	VCPU_STAT("instruction_gs", instruction_gs),
114	VCPU_STAT("instruction_io_other", instruction_io_other),
115	VCPU_STAT("instruction_lpsw", instruction_lpsw),
116	VCPU_STAT("instruction_lpswe", instruction_lpswe),
117	VCPU_STAT("instruction_pfmf", instruction_pfmf),
118	VCPU_STAT("instruction_ptff", instruction_ptff),
119	VCPU_STAT("instruction_stidp", instruction_stidp),
120	VCPU_STAT("instruction_sck", instruction_sck),
121	VCPU_STAT("instruction_sckpf", instruction_sckpf),
122	VCPU_STAT("instruction_spx", instruction_spx),
123	VCPU_STAT("instruction_stpx", instruction_stpx),
124	VCPU_STAT("instruction_stap", instruction_stap),
125	VCPU_STAT("instruction_iske", instruction_iske),
126	VCPU_STAT("instruction_ri", instruction_ri),
127	VCPU_STAT("instruction_rrbe", instruction_rrbe),
128	VCPU_STAT("instruction_sske", instruction_sske),
129	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130	VCPU_STAT("instruction_essa", instruction_essa),
131	VCPU_STAT("instruction_stsi", instruction_stsi),
132	VCPU_STAT("instruction_stfl", instruction_stfl),
133	VCPU_STAT("instruction_tb", instruction_tb),
134	VCPU_STAT("instruction_tpi", instruction_tpi),
135	VCPU_STAT("instruction_tprot", instruction_tprot),
136	VCPU_STAT("instruction_tsch", instruction_tsch),
137	VCPU_STAT("instruction_sthyi", instruction_sthyi),
138	VCPU_STAT("instruction_sie", instruction_sie),
139	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155	VCPU_STAT("instruction_diag_10", diagnose_10),
156	VCPU_STAT("instruction_diag_44", diagnose_44),
157	VCPU_STAT("instruction_diag_9c", diagnose_9c),
158	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159	VCPU_STAT("instruction_diag_258", diagnose_258),
160	VCPU_STAT("instruction_diag_308", diagnose_308),
161	VCPU_STAT("instruction_diag_500", diagnose_500),
162	VCPU_STAT("instruction_diag_other", diagnose_other),
163	{ NULL }
164};
165
166struct kvm_s390_tod_clock_ext {
167	__u8 epoch_idx;
168	__u64 tod;
169	__u8 reserved[7];
170} __packed;
171
172/* allow nested virtualization in KVM (if enabled by user space) */
173static int nested;
174module_param(nested, int, S_IRUGO);
175MODULE_PARM_DESC(nested, "Nested virtualization support");
176
177/* allow 1m huge page guest backing, if !nested */
178static int hpage;
179module_param(hpage, int, 0444);
180MODULE_PARM_DESC(hpage, "1m huge page backing support");
181
182/* maximum percentage of steal time for polling.  >100 is treated like 100 */
183static u8 halt_poll_max_steal = 10;
184module_param(halt_poll_max_steal, byte, 0644);
185MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186
187/* if set to true, the GISA will be initialized and used if available */
188static bool use_gisa  = true;
189module_param(use_gisa, bool, 0644);
190MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191
192/*
193 * For now we handle at most 16 double words as this is what the s390 base
194 * kernel handles and stores in the prefix page. If we ever need to go beyond
195 * this, this requires changes to code, but the external uapi can stay.
196 */
197#define SIZE_INTERNAL 16
198
199/*
200 * Base feature mask that defines default mask for facilities. Consists of the
201 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202 */
203static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204/*
205 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206 * and defines the facilities that can be enabled via a cpu model.
207 */
208static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209
210static unsigned long kvm_s390_fac_size(void)
211{
212	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215		sizeof(S390_lowcore.stfle_fac_list));
216
217	return SIZE_INTERNAL;
218}
219
220/* available cpu features supported by kvm */
221static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222/* available subfunctions indicated via query / "test bit" */
223static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224
225static struct gmap_notifier gmap_notifier;
226static struct gmap_notifier vsie_gmap_notifier;
227debug_info_t *kvm_s390_dbf;
228debug_info_t *kvm_s390_dbf_uv;
229
230/* Section: not file related */
231int kvm_arch_hardware_enable(void)
232{
233	/* every s390 is virtualization enabled ;-) */
234	return 0;
235}
236
237int kvm_arch_check_processor_compat(void *opaque)
238{
239	return 0;
240}
241
242/* forward declarations */
243static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244			      unsigned long end);
245static int sca_switch_to_extended(struct kvm *kvm);
246
247static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248{
249	u8 delta_idx = 0;
250
251	/*
252	 * The TOD jumps by delta, we have to compensate this by adding
253	 * -delta to the epoch.
254	 */
255	delta = -delta;
256
257	/* sign-extension - we're adding to signed values below */
258	if ((s64)delta < 0)
259		delta_idx = -1;
260
261	scb->epoch += delta;
262	if (scb->ecd & ECD_MEF) {
263		scb->epdx += delta_idx;
264		if (scb->epoch < delta)
265			scb->epdx += 1;
266	}
267}
268
269/*
270 * This callback is executed during stop_machine(). All CPUs are therefore
271 * temporarily stopped. In order not to change guest behavior, we have to
272 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273 * so a CPU won't be stopped while calculating with the epoch.
274 */
275static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276			  void *v)
277{
278	struct kvm *kvm;
279	struct kvm_vcpu *vcpu;
280	int i;
281	unsigned long long *delta = v;
282
283	list_for_each_entry(kvm, &vm_list, vm_list) {
284		kvm_for_each_vcpu(i, vcpu, kvm) {
285			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286			if (i == 0) {
287				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289			}
290			if (vcpu->arch.cputm_enabled)
291				vcpu->arch.cputm_start += *delta;
292			if (vcpu->arch.vsie_block)
293				kvm_clock_sync_scb(vcpu->arch.vsie_block,
294						   *delta);
295		}
296	}
297	return NOTIFY_OK;
298}
299
300static struct notifier_block kvm_clock_notifier = {
301	.notifier_call = kvm_clock_sync,
302};
303
304int kvm_arch_hardware_setup(void *opaque)
305{
306	gmap_notifier.notifier_call = kvm_gmap_notifier;
307	gmap_register_pte_notifier(&gmap_notifier);
308	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309	gmap_register_pte_notifier(&vsie_gmap_notifier);
310	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311				       &kvm_clock_notifier);
312	return 0;
313}
314
315void kvm_arch_hardware_unsetup(void)
316{
317	gmap_unregister_pte_notifier(&gmap_notifier);
318	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320					 &kvm_clock_notifier);
321}
322
323static void allow_cpu_feat(unsigned long nr)
324{
325	set_bit_inv(nr, kvm_s390_available_cpu_feat);
326}
327
328static inline int plo_test_bit(unsigned char nr)
329{
330	unsigned long function = (unsigned long)nr | 0x100;
331	int cc;
332
333	asm volatile(
334		"	lgr	0,%[function]\n"
335		/* Parameter registers are ignored for "test bit" */
336		"	plo	0,0,0,0(0)\n"
337		"	ipm	%0\n"
338		"	srl	%0,28\n"
339		: "=d" (cc)
340		: [function] "d" (function)
341		: "cc", "0");
342	return cc == 0;
343}
344
345static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346{
347	asm volatile(
348		"	lghi	0,0\n"
349		"	lgr	1,%[query]\n"
350		/* Parameter registers are ignored */
351		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
352		:
353		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
354		: "cc", "memory", "0", "1");
355}
356
357#define INSN_SORTL 0xb938
358#define INSN_DFLTCC 0xb939
359
360static void kvm_s390_cpu_feat_init(void)
361{
362	int i;
363
364	for (i = 0; i < 256; ++i) {
365		if (plo_test_bit(i))
366			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367	}
368
369	if (test_facility(28)) /* TOD-clock steering */
370		ptff(kvm_s390_available_subfunc.ptff,
371		     sizeof(kvm_s390_available_subfunc.ptff),
372		     PTFF_QAF);
373
374	if (test_facility(17)) { /* MSA */
375		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376			      kvm_s390_available_subfunc.kmac);
377		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378			      kvm_s390_available_subfunc.kmc);
379		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
380			      kvm_s390_available_subfunc.km);
381		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382			      kvm_s390_available_subfunc.kimd);
383		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384			      kvm_s390_available_subfunc.klmd);
385	}
386	if (test_facility(76)) /* MSA3 */
387		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388			      kvm_s390_available_subfunc.pckmo);
389	if (test_facility(77)) { /* MSA4 */
390		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391			      kvm_s390_available_subfunc.kmctr);
392		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393			      kvm_s390_available_subfunc.kmf);
394		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395			      kvm_s390_available_subfunc.kmo);
396		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397			      kvm_s390_available_subfunc.pcc);
398	}
399	if (test_facility(57)) /* MSA5 */
400		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401			      kvm_s390_available_subfunc.ppno);
402
403	if (test_facility(146)) /* MSA8 */
404		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405			      kvm_s390_available_subfunc.kma);
406
407	if (test_facility(155)) /* MSA9 */
408		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409			      kvm_s390_available_subfunc.kdsa);
410
411	if (test_facility(150)) /* SORTL */
412		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413
414	if (test_facility(151)) /* DFLTCC */
415		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416
417	if (MACHINE_HAS_ESOP)
418		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419	/*
420	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422	 */
423	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424	    !test_facility(3) || !nested)
425		return;
426	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427	if (sclp.has_64bscao)
428		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429	if (sclp.has_siif)
430		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431	if (sclp.has_gpere)
432		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433	if (sclp.has_gsls)
434		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435	if (sclp.has_ib)
436		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437	if (sclp.has_cei)
438		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439	if (sclp.has_ibs)
440		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441	if (sclp.has_kss)
442		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443	/*
444	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445	 * all skey handling functions read/set the skey from the PGSTE
446	 * instead of the real storage key.
447	 *
448	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449	 * pages being detected as preserved although they are resident.
450	 *
451	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453	 *
454	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457	 *
458	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459	 * cannot easily shadow the SCA because of the ipte lock.
460	 */
461}
462
463int kvm_arch_init(void *opaque)
464{
465	int rc = -ENOMEM;
466
467	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468	if (!kvm_s390_dbf)
469		return -ENOMEM;
470
471	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472	if (!kvm_s390_dbf_uv)
473		goto out;
474
475	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477		goto out;
478
479	kvm_s390_cpu_feat_init();
480
481	/* Register floating interrupt controller interface. */
482	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483	if (rc) {
484		pr_err("A FLIC registration call failed with rc=%d\n", rc);
485		goto out;
486	}
487
488	rc = kvm_s390_gib_init(GAL_ISC);
489	if (rc)
490		goto out;
491
492	return 0;
493
494out:
495	kvm_arch_exit();
496	return rc;
497}
498
499void kvm_arch_exit(void)
500{
501	kvm_s390_gib_destroy();
502	debug_unregister(kvm_s390_dbf);
503	debug_unregister(kvm_s390_dbf_uv);
504}
505
506/* Section: device related */
507long kvm_arch_dev_ioctl(struct file *filp,
508			unsigned int ioctl, unsigned long arg)
509{
510	if (ioctl == KVM_S390_ENABLE_SIE)
511		return s390_enable_sie();
512	return -EINVAL;
513}
514
515int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516{
517	int r;
518
519	switch (ext) {
520	case KVM_CAP_S390_PSW:
521	case KVM_CAP_S390_GMAP:
522	case KVM_CAP_SYNC_MMU:
523#ifdef CONFIG_KVM_S390_UCONTROL
524	case KVM_CAP_S390_UCONTROL:
525#endif
526	case KVM_CAP_ASYNC_PF:
527	case KVM_CAP_SYNC_REGS:
528	case KVM_CAP_ONE_REG:
529	case KVM_CAP_ENABLE_CAP:
530	case KVM_CAP_S390_CSS_SUPPORT:
531	case KVM_CAP_IOEVENTFD:
532	case KVM_CAP_DEVICE_CTRL:
533	case KVM_CAP_S390_IRQCHIP:
534	case KVM_CAP_VM_ATTRIBUTES:
535	case KVM_CAP_MP_STATE:
536	case KVM_CAP_IMMEDIATE_EXIT:
537	case KVM_CAP_S390_INJECT_IRQ:
538	case KVM_CAP_S390_USER_SIGP:
539	case KVM_CAP_S390_USER_STSI:
540	case KVM_CAP_S390_SKEYS:
541	case KVM_CAP_S390_IRQ_STATE:
542	case KVM_CAP_S390_USER_INSTR0:
543	case KVM_CAP_S390_CMMA_MIGRATION:
544	case KVM_CAP_S390_AIS:
545	case KVM_CAP_S390_AIS_MIGRATION:
546	case KVM_CAP_S390_VCPU_RESETS:
547	case KVM_CAP_SET_GUEST_DEBUG:
548	case KVM_CAP_S390_DIAG318:
549		r = 1;
550		break;
551	case KVM_CAP_S390_HPAGE_1M:
552		r = 0;
553		if (hpage && !kvm_is_ucontrol(kvm))
554			r = 1;
555		break;
556	case KVM_CAP_S390_MEM_OP:
557		r = MEM_OP_MAX_SIZE;
558		break;
559	case KVM_CAP_NR_VCPUS:
560	case KVM_CAP_MAX_VCPUS:
561	case KVM_CAP_MAX_VCPU_ID:
562		r = KVM_S390_BSCA_CPU_SLOTS;
563		if (!kvm_s390_use_sca_entries())
564			r = KVM_MAX_VCPUS;
565		else if (sclp.has_esca && sclp.has_64bscao)
566			r = KVM_S390_ESCA_CPU_SLOTS;
567		break;
568	case KVM_CAP_S390_COW:
569		r = MACHINE_HAS_ESOP;
570		break;
571	case KVM_CAP_S390_VECTOR_REGISTERS:
572		r = MACHINE_HAS_VX;
573		break;
574	case KVM_CAP_S390_RI:
575		r = test_facility(64);
576		break;
577	case KVM_CAP_S390_GS:
578		r = test_facility(133);
579		break;
580	case KVM_CAP_S390_BPB:
581		r = test_facility(82);
582		break;
583	case KVM_CAP_S390_PROTECTED:
584		r = is_prot_virt_host();
585		break;
586	default:
587		r = 0;
588	}
589	return r;
590}
591
592void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593{
594	int i;
595	gfn_t cur_gfn, last_gfn;
596	unsigned long gaddr, vmaddr;
597	struct gmap *gmap = kvm->arch.gmap;
598	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599
600	/* Loop over all guest segments */
601	cur_gfn = memslot->base_gfn;
602	last_gfn = memslot->base_gfn + memslot->npages;
603	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604		gaddr = gfn_to_gpa(cur_gfn);
605		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606		if (kvm_is_error_hva(vmaddr))
607			continue;
608
609		bitmap_zero(bitmap, _PAGE_ENTRIES);
610		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611		for (i = 0; i < _PAGE_ENTRIES; i++) {
612			if (test_bit(i, bitmap))
613				mark_page_dirty(kvm, cur_gfn + i);
614		}
615
616		if (fatal_signal_pending(current))
617			return;
618		cond_resched();
619	}
620}
621
622/* Section: vm related */
623static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624
625/*
626 * Get (and clear) the dirty memory log for a memory slot.
627 */
628int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629			       struct kvm_dirty_log *log)
630{
631	int r;
632	unsigned long n;
633	struct kvm_memory_slot *memslot;
634	int is_dirty;
635
636	if (kvm_is_ucontrol(kvm))
637		return -EINVAL;
638
639	mutex_lock(&kvm->slots_lock);
640
641	r = -EINVAL;
642	if (log->slot >= KVM_USER_MEM_SLOTS)
643		goto out;
644
645	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646	if (r)
647		goto out;
648
649	/* Clear the dirty log */
650	if (is_dirty) {
651		n = kvm_dirty_bitmap_bytes(memslot);
652		memset(memslot->dirty_bitmap, 0, n);
653	}
654	r = 0;
655out:
656	mutex_unlock(&kvm->slots_lock);
657	return r;
658}
659
660static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661{
662	unsigned int i;
663	struct kvm_vcpu *vcpu;
664
665	kvm_for_each_vcpu(i, vcpu, kvm) {
666		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667	}
668}
669
670int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671{
672	int r;
673
674	if (cap->flags)
675		return -EINVAL;
676
677	switch (cap->cap) {
678	case KVM_CAP_S390_IRQCHIP:
679		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680		kvm->arch.use_irqchip = 1;
681		r = 0;
682		break;
683	case KVM_CAP_S390_USER_SIGP:
684		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685		kvm->arch.user_sigp = 1;
686		r = 0;
687		break;
688	case KVM_CAP_S390_VECTOR_REGISTERS:
689		mutex_lock(&kvm->lock);
690		if (kvm->created_vcpus) {
691			r = -EBUSY;
692		} else if (MACHINE_HAS_VX) {
693			set_kvm_facility(kvm->arch.model.fac_mask, 129);
694			set_kvm_facility(kvm->arch.model.fac_list, 129);
695			if (test_facility(134)) {
696				set_kvm_facility(kvm->arch.model.fac_mask, 134);
697				set_kvm_facility(kvm->arch.model.fac_list, 134);
698			}
699			if (test_facility(135)) {
700				set_kvm_facility(kvm->arch.model.fac_mask, 135);
701				set_kvm_facility(kvm->arch.model.fac_list, 135);
702			}
703			if (test_facility(148)) {
704				set_kvm_facility(kvm->arch.model.fac_mask, 148);
705				set_kvm_facility(kvm->arch.model.fac_list, 148);
706			}
707			if (test_facility(152)) {
708				set_kvm_facility(kvm->arch.model.fac_mask, 152);
709				set_kvm_facility(kvm->arch.model.fac_list, 152);
710			}
711			r = 0;
712		} else
713			r = -EINVAL;
714		mutex_unlock(&kvm->lock);
715		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716			 r ? "(not available)" : "(success)");
717		break;
718	case KVM_CAP_S390_RI:
719		r = -EINVAL;
720		mutex_lock(&kvm->lock);
721		if (kvm->created_vcpus) {
722			r = -EBUSY;
723		} else if (test_facility(64)) {
724			set_kvm_facility(kvm->arch.model.fac_mask, 64);
725			set_kvm_facility(kvm->arch.model.fac_list, 64);
726			r = 0;
727		}
728		mutex_unlock(&kvm->lock);
729		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730			 r ? "(not available)" : "(success)");
731		break;
732	case KVM_CAP_S390_AIS:
733		mutex_lock(&kvm->lock);
734		if (kvm->created_vcpus) {
735			r = -EBUSY;
736		} else {
737			set_kvm_facility(kvm->arch.model.fac_mask, 72);
738			set_kvm_facility(kvm->arch.model.fac_list, 72);
739			r = 0;
740		}
741		mutex_unlock(&kvm->lock);
742		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743			 r ? "(not available)" : "(success)");
744		break;
745	case KVM_CAP_S390_GS:
746		r = -EINVAL;
747		mutex_lock(&kvm->lock);
748		if (kvm->created_vcpus) {
749			r = -EBUSY;
750		} else if (test_facility(133)) {
751			set_kvm_facility(kvm->arch.model.fac_mask, 133);
752			set_kvm_facility(kvm->arch.model.fac_list, 133);
753			r = 0;
754		}
755		mutex_unlock(&kvm->lock);
756		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757			 r ? "(not available)" : "(success)");
758		break;
759	case KVM_CAP_S390_HPAGE_1M:
760		mutex_lock(&kvm->lock);
761		if (kvm->created_vcpus)
762			r = -EBUSY;
763		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764			r = -EINVAL;
765		else {
766			r = 0;
767			mmap_write_lock(kvm->mm);
768			kvm->mm->context.allow_gmap_hpage_1m = 1;
769			mmap_write_unlock(kvm->mm);
770			/*
771			 * We might have to create fake 4k page
772			 * tables. To avoid that the hardware works on
773			 * stale PGSTEs, we emulate these instructions.
774			 */
775			kvm->arch.use_skf = 0;
776			kvm->arch.use_pfmfi = 0;
777		}
778		mutex_unlock(&kvm->lock);
779		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780			 r ? "(not available)" : "(success)");
781		break;
782	case KVM_CAP_S390_USER_STSI:
783		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784		kvm->arch.user_stsi = 1;
785		r = 0;
786		break;
787	case KVM_CAP_S390_USER_INSTR0:
788		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789		kvm->arch.user_instr0 = 1;
790		icpt_operexc_on_all_vcpus(kvm);
791		r = 0;
792		break;
793	default:
794		r = -EINVAL;
795		break;
796	}
797	return r;
798}
799
800static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801{
802	int ret;
803
804	switch (attr->attr) {
805	case KVM_S390_VM_MEM_LIMIT_SIZE:
806		ret = 0;
807		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808			 kvm->arch.mem_limit);
809		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810			ret = -EFAULT;
811		break;
812	default:
813		ret = -ENXIO;
814		break;
815	}
816	return ret;
817}
818
819static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820{
821	int ret;
822	unsigned int idx;
823	switch (attr->attr) {
824	case KVM_S390_VM_MEM_ENABLE_CMMA:
825		ret = -ENXIO;
826		if (!sclp.has_cmma)
827			break;
828
829		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830		mutex_lock(&kvm->lock);
831		if (kvm->created_vcpus)
832			ret = -EBUSY;
833		else if (kvm->mm->context.allow_gmap_hpage_1m)
834			ret = -EINVAL;
835		else {
836			kvm->arch.use_cmma = 1;
837			/* Not compatible with cmma. */
838			kvm->arch.use_pfmfi = 0;
839			ret = 0;
840		}
841		mutex_unlock(&kvm->lock);
842		break;
843	case KVM_S390_VM_MEM_CLR_CMMA:
844		ret = -ENXIO;
845		if (!sclp.has_cmma)
846			break;
847		ret = -EINVAL;
848		if (!kvm->arch.use_cmma)
849			break;
850
851		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852		mutex_lock(&kvm->lock);
853		idx = srcu_read_lock(&kvm->srcu);
854		s390_reset_cmma(kvm->arch.gmap->mm);
855		srcu_read_unlock(&kvm->srcu, idx);
856		mutex_unlock(&kvm->lock);
857		ret = 0;
858		break;
859	case KVM_S390_VM_MEM_LIMIT_SIZE: {
860		unsigned long new_limit;
861
862		if (kvm_is_ucontrol(kvm))
863			return -EINVAL;
864
865		if (get_user(new_limit, (u64 __user *)attr->addr))
866			return -EFAULT;
867
868		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869		    new_limit > kvm->arch.mem_limit)
870			return -E2BIG;
871
872		if (!new_limit)
873			return -EINVAL;
874
875		/* gmap_create takes last usable address */
876		if (new_limit != KVM_S390_NO_MEM_LIMIT)
877			new_limit -= 1;
878
879		ret = -EBUSY;
880		mutex_lock(&kvm->lock);
881		if (!kvm->created_vcpus) {
882			/* gmap_create will round the limit up */
883			struct gmap *new = gmap_create(current->mm, new_limit);
884
885			if (!new) {
886				ret = -ENOMEM;
887			} else {
888				gmap_remove(kvm->arch.gmap);
889				new->private = kvm;
890				kvm->arch.gmap = new;
891				ret = 0;
892			}
893		}
894		mutex_unlock(&kvm->lock);
895		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897			 (void *) kvm->arch.gmap->asce);
898		break;
899	}
900	default:
901		ret = -ENXIO;
902		break;
903	}
904	return ret;
905}
906
907static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908
909void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910{
911	struct kvm_vcpu *vcpu;
912	int i;
913
914	kvm_s390_vcpu_block_all(kvm);
915
916	kvm_for_each_vcpu(i, vcpu, kvm) {
917		kvm_s390_vcpu_crypto_setup(vcpu);
918		/* recreate the shadow crycb by leaving the VSIE handler */
919		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920	}
921
922	kvm_s390_vcpu_unblock_all(kvm);
923}
924
925static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926{
927	mutex_lock(&kvm->lock);
928	switch (attr->attr) {
929	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930		if (!test_kvm_facility(kvm, 76)) {
931			mutex_unlock(&kvm->lock);
932			return -EINVAL;
933		}
934		get_random_bytes(
935			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937		kvm->arch.crypto.aes_kw = 1;
938		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939		break;
940	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941		if (!test_kvm_facility(kvm, 76)) {
942			mutex_unlock(&kvm->lock);
943			return -EINVAL;
944		}
945		get_random_bytes(
946			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948		kvm->arch.crypto.dea_kw = 1;
949		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950		break;
951	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952		if (!test_kvm_facility(kvm, 76)) {
953			mutex_unlock(&kvm->lock);
954			return -EINVAL;
955		}
956		kvm->arch.crypto.aes_kw = 0;
957		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960		break;
961	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962		if (!test_kvm_facility(kvm, 76)) {
963			mutex_unlock(&kvm->lock);
964			return -EINVAL;
965		}
966		kvm->arch.crypto.dea_kw = 0;
967		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970		break;
971	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972		if (!ap_instructions_available()) {
973			mutex_unlock(&kvm->lock);
974			return -EOPNOTSUPP;
975		}
976		kvm->arch.crypto.apie = 1;
977		break;
978	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979		if (!ap_instructions_available()) {
980			mutex_unlock(&kvm->lock);
981			return -EOPNOTSUPP;
982		}
983		kvm->arch.crypto.apie = 0;
984		break;
985	default:
986		mutex_unlock(&kvm->lock);
987		return -ENXIO;
988	}
989
990	kvm_s390_vcpu_crypto_reset_all(kvm);
991	mutex_unlock(&kvm->lock);
992	return 0;
993}
994
995static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996{
997	int cx;
998	struct kvm_vcpu *vcpu;
999
1000	kvm_for_each_vcpu(cx, vcpu, kvm)
1001		kvm_s390_sync_request(req, vcpu);
1002}
1003
1004/*
1005 * Must be called with kvm->srcu held to avoid races on memslots, and with
1006 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007 */
1008static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009{
1010	struct kvm_memory_slot *ms;
1011	struct kvm_memslots *slots;
1012	unsigned long ram_pages = 0;
1013	int slotnr;
1014
1015	/* migration mode already enabled */
1016	if (kvm->arch.migration_mode)
1017		return 0;
1018	slots = kvm_memslots(kvm);
1019	if (!slots || !slots->used_slots)
1020		return -EINVAL;
1021
1022	if (!kvm->arch.use_cmma) {
1023		kvm->arch.migration_mode = 1;
1024		return 0;
1025	}
1026	/* mark all the pages in active slots as dirty */
1027	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028		ms = slots->memslots + slotnr;
1029		if (!ms->dirty_bitmap)
1030			return -EINVAL;
1031		/*
1032		 * The second half of the bitmap is only used on x86,
1033		 * and would be wasted otherwise, so we put it to good
1034		 * use here to keep track of the state of the storage
1035		 * attributes.
1036		 */
1037		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038		ram_pages += ms->npages;
1039	}
1040	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041	kvm->arch.migration_mode = 1;
1042	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043	return 0;
1044}
1045
1046/*
1047 * Must be called with kvm->slots_lock to avoid races with ourselves and
1048 * kvm_s390_vm_start_migration.
1049 */
1050static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051{
1052	/* migration mode already disabled */
1053	if (!kvm->arch.migration_mode)
1054		return 0;
1055	kvm->arch.migration_mode = 0;
1056	if (kvm->arch.use_cmma)
1057		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058	return 0;
1059}
1060
1061static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062				     struct kvm_device_attr *attr)
1063{
1064	int res = -ENXIO;
1065
1066	mutex_lock(&kvm->slots_lock);
1067	switch (attr->attr) {
1068	case KVM_S390_VM_MIGRATION_START:
1069		res = kvm_s390_vm_start_migration(kvm);
1070		break;
1071	case KVM_S390_VM_MIGRATION_STOP:
1072		res = kvm_s390_vm_stop_migration(kvm);
1073		break;
1074	default:
1075		break;
1076	}
1077	mutex_unlock(&kvm->slots_lock);
1078
1079	return res;
1080}
1081
1082static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083				     struct kvm_device_attr *attr)
1084{
1085	u64 mig = kvm->arch.migration_mode;
1086
1087	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088		return -ENXIO;
1089
1090	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091		return -EFAULT;
1092	return 0;
1093}
1094
1095static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1096
1097static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1098{
1099	struct kvm_s390_vm_tod_clock gtod;
1100
1101	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1102		return -EFAULT;
1103
1104	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1105		return -EINVAL;
1106	__kvm_s390_set_tod_clock(kvm, &gtod);
1107
1108	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1109		gtod.epoch_idx, gtod.tod);
1110
1111	return 0;
1112}
1113
1114static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1115{
1116	u8 gtod_high;
1117
1118	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1119					   sizeof(gtod_high)))
1120		return -EFAULT;
1121
1122	if (gtod_high != 0)
1123		return -EINVAL;
1124	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1125
1126	return 0;
1127}
1128
1129static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1130{
1131	struct kvm_s390_vm_tod_clock gtod = { 0 };
1132
1133	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1134			   sizeof(gtod.tod)))
1135		return -EFAULT;
1136
1137	__kvm_s390_set_tod_clock(kvm, &gtod);
1138	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1139	return 0;
1140}
1141
1142static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1143{
1144	int ret;
1145
1146	if (attr->flags)
1147		return -EINVAL;
1148
1149	mutex_lock(&kvm->lock);
1150	/*
1151	 * For protected guests, the TOD is managed by the ultravisor, so trying
1152	 * to change it will never bring the expected results.
1153	 */
1154	if (kvm_s390_pv_is_protected(kvm)) {
1155		ret = -EOPNOTSUPP;
1156		goto out_unlock;
1157	}
1158
1159	switch (attr->attr) {
1160	case KVM_S390_VM_TOD_EXT:
1161		ret = kvm_s390_set_tod_ext(kvm, attr);
1162		break;
1163	case KVM_S390_VM_TOD_HIGH:
1164		ret = kvm_s390_set_tod_high(kvm, attr);
1165		break;
1166	case KVM_S390_VM_TOD_LOW:
1167		ret = kvm_s390_set_tod_low(kvm, attr);
1168		break;
1169	default:
1170		ret = -ENXIO;
1171		break;
1172	}
1173
1174out_unlock:
1175	mutex_unlock(&kvm->lock);
1176	return ret;
1177}
1178
1179static void kvm_s390_get_tod_clock(struct kvm *kvm,
1180				   struct kvm_s390_vm_tod_clock *gtod)
1181{
1182	struct kvm_s390_tod_clock_ext htod;
1183
1184	preempt_disable();
1185
1186	get_tod_clock_ext((char *)&htod);
1187
1188	gtod->tod = htod.tod + kvm->arch.epoch;
1189	gtod->epoch_idx = 0;
1190	if (test_kvm_facility(kvm, 139)) {
1191		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1192		if (gtod->tod < htod.tod)
1193			gtod->epoch_idx += 1;
1194	}
1195
1196	preempt_enable();
1197}
1198
1199static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1200{
1201	struct kvm_s390_vm_tod_clock gtod;
1202
1203	memset(&gtod, 0, sizeof(gtod));
1204	kvm_s390_get_tod_clock(kvm, &gtod);
1205	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206		return -EFAULT;
1207
1208	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1209		gtod.epoch_idx, gtod.tod);
1210	return 0;
1211}
1212
1213static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1214{
1215	u8 gtod_high = 0;
1216
1217	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1218					 sizeof(gtod_high)))
1219		return -EFAULT;
1220	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1221
1222	return 0;
1223}
1224
1225static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1226{
1227	u64 gtod;
1228
1229	gtod = kvm_s390_get_tod_clock_fast(kvm);
1230	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1231		return -EFAULT;
1232	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1233
1234	return 0;
1235}
1236
1237static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1238{
1239	int ret;
1240
1241	if (attr->flags)
1242		return -EINVAL;
1243
1244	switch (attr->attr) {
1245	case KVM_S390_VM_TOD_EXT:
1246		ret = kvm_s390_get_tod_ext(kvm, attr);
1247		break;
1248	case KVM_S390_VM_TOD_HIGH:
1249		ret = kvm_s390_get_tod_high(kvm, attr);
1250		break;
1251	case KVM_S390_VM_TOD_LOW:
1252		ret = kvm_s390_get_tod_low(kvm, attr);
1253		break;
1254	default:
1255		ret = -ENXIO;
1256		break;
1257	}
1258	return ret;
1259}
1260
1261static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1262{
1263	struct kvm_s390_vm_cpu_processor *proc;
1264	u16 lowest_ibc, unblocked_ibc;
1265	int ret = 0;
1266
1267	mutex_lock(&kvm->lock);
1268	if (kvm->created_vcpus) {
1269		ret = -EBUSY;
1270		goto out;
1271	}
1272	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1273	if (!proc) {
1274		ret = -ENOMEM;
1275		goto out;
1276	}
1277	if (!copy_from_user(proc, (void __user *)attr->addr,
1278			    sizeof(*proc))) {
1279		kvm->arch.model.cpuid = proc->cpuid;
1280		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1281		unblocked_ibc = sclp.ibc & 0xfff;
1282		if (lowest_ibc && proc->ibc) {
1283			if (proc->ibc > unblocked_ibc)
1284				kvm->arch.model.ibc = unblocked_ibc;
1285			else if (proc->ibc < lowest_ibc)
1286				kvm->arch.model.ibc = lowest_ibc;
1287			else
1288				kvm->arch.model.ibc = proc->ibc;
1289		}
1290		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1291		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1292		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1293			 kvm->arch.model.ibc,
1294			 kvm->arch.model.cpuid);
1295		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1296			 kvm->arch.model.fac_list[0],
1297			 kvm->arch.model.fac_list[1],
1298			 kvm->arch.model.fac_list[2]);
1299	} else
1300		ret = -EFAULT;
1301	kfree(proc);
1302out:
1303	mutex_unlock(&kvm->lock);
1304	return ret;
1305}
1306
1307static int kvm_s390_set_processor_feat(struct kvm *kvm,
1308				       struct kvm_device_attr *attr)
1309{
1310	struct kvm_s390_vm_cpu_feat data;
1311
1312	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1313		return -EFAULT;
1314	if (!bitmap_subset((unsigned long *) data.feat,
1315			   kvm_s390_available_cpu_feat,
1316			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1317		return -EINVAL;
1318
1319	mutex_lock(&kvm->lock);
1320	if (kvm->created_vcpus) {
1321		mutex_unlock(&kvm->lock);
1322		return -EBUSY;
1323	}
1324	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1325		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1326	mutex_unlock(&kvm->lock);
1327	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1328			 data.feat[0],
1329			 data.feat[1],
1330			 data.feat[2]);
1331	return 0;
1332}
1333
1334static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1335					  struct kvm_device_attr *attr)
1336{
1337	mutex_lock(&kvm->lock);
1338	if (kvm->created_vcpus) {
1339		mutex_unlock(&kvm->lock);
1340		return -EBUSY;
1341	}
1342
1343	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1344			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1345		mutex_unlock(&kvm->lock);
1346		return -EFAULT;
1347	}
1348	mutex_unlock(&kvm->lock);
1349
1350	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1351		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1352		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1353		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1354		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1355	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1356		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1357		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1358	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1359		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1360		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1361	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1362		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1363		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1364	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1365		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1366		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1367	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1368		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1369		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1370	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1371		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1372		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1373	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1374		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1375		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1376	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1377		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1378		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1379	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1380		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1381		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1382	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1383		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1384		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1385	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1386		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1387		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1388	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1389		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1390		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1391	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1392		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1393		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1394	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1395		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1396		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1397	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1398		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1399		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1400		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1401		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1402	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1403		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1404		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1405		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1406		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1407
1408	return 0;
1409}
1410
1411static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1412{
1413	int ret = -ENXIO;
1414
1415	switch (attr->attr) {
1416	case KVM_S390_VM_CPU_PROCESSOR:
1417		ret = kvm_s390_set_processor(kvm, attr);
1418		break;
1419	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1420		ret = kvm_s390_set_processor_feat(kvm, attr);
1421		break;
1422	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1423		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1424		break;
1425	}
1426	return ret;
1427}
1428
1429static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1430{
1431	struct kvm_s390_vm_cpu_processor *proc;
1432	int ret = 0;
1433
1434	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1435	if (!proc) {
1436		ret = -ENOMEM;
1437		goto out;
1438	}
1439	proc->cpuid = kvm->arch.model.cpuid;
1440	proc->ibc = kvm->arch.model.ibc;
1441	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1442	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1443	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1444		 kvm->arch.model.ibc,
1445		 kvm->arch.model.cpuid);
1446	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1447		 kvm->arch.model.fac_list[0],
1448		 kvm->arch.model.fac_list[1],
1449		 kvm->arch.model.fac_list[2]);
1450	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1451		ret = -EFAULT;
1452	kfree(proc);
1453out:
1454	return ret;
1455}
1456
1457static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1458{
1459	struct kvm_s390_vm_cpu_machine *mach;
1460	int ret = 0;
1461
1462	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1463	if (!mach) {
1464		ret = -ENOMEM;
1465		goto out;
1466	}
1467	get_cpu_id((struct cpuid *) &mach->cpuid);
1468	mach->ibc = sclp.ibc;
1469	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1470	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1471	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1472	       sizeof(S390_lowcore.stfle_fac_list));
1473	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1474		 kvm->arch.model.ibc,
1475		 kvm->arch.model.cpuid);
1476	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1477		 mach->fac_mask[0],
1478		 mach->fac_mask[1],
1479		 mach->fac_mask[2]);
1480	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1481		 mach->fac_list[0],
1482		 mach->fac_list[1],
1483		 mach->fac_list[2]);
1484	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1485		ret = -EFAULT;
1486	kfree(mach);
1487out:
1488	return ret;
1489}
1490
1491static int kvm_s390_get_processor_feat(struct kvm *kvm,
1492				       struct kvm_device_attr *attr)
1493{
1494	struct kvm_s390_vm_cpu_feat data;
1495
1496	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1497		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1498	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1499		return -EFAULT;
1500	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1501			 data.feat[0],
1502			 data.feat[1],
1503			 data.feat[2]);
1504	return 0;
1505}
1506
1507static int kvm_s390_get_machine_feat(struct kvm *kvm,
1508				     struct kvm_device_attr *attr)
1509{
1510	struct kvm_s390_vm_cpu_feat data;
1511
1512	bitmap_copy((unsigned long *) data.feat,
1513		    kvm_s390_available_cpu_feat,
1514		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1515	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1516		return -EFAULT;
1517	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518			 data.feat[0],
1519			 data.feat[1],
1520			 data.feat[2]);
1521	return 0;
1522}
1523
1524static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1525					  struct kvm_device_attr *attr)
1526{
1527	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1528	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1529		return -EFAULT;
1530
1531	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1532		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1533		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1534		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1535		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1536	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1537		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1538		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1539	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1540		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1541		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1542	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1543		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1544		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1545	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1546		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1547		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1548	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1549		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1550		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1551	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1552		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1553		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1554	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1555		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1556		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1557	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1558		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1559		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1560	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1561		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1562		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1563	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1564		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1565		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1566	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1567		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1568		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1569	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1570		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1571		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1572	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1573		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1574		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1575	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1576		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1577		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1578	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1579		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1580		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1581		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1582		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1583	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1585		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1586		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1587		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1588
1589	return 0;
1590}
1591
1592static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1593					struct kvm_device_attr *attr)
1594{
1595	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1596	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1597		return -EFAULT;
1598
1599	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1600		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1601		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1602		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1603		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1604	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1605		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1606		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1607	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1608		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1609		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1610	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1611		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1612		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1613	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1614		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1615		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1616	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1617		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1618		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1619	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1620		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1621		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1622	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1623		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1624		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1625	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1626		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1627		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1628	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1629		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1630		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1631	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1632		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1633		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1634	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1635		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1636		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1637	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1638		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1639		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1640	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1641		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1642		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1643	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1644		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1645		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1646	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1647		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1648		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1649		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1650		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1651	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1652		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1653		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1654		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1655		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1656
1657	return 0;
1658}
1659
1660static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1661{
1662	int ret = -ENXIO;
1663
1664	switch (attr->attr) {
1665	case KVM_S390_VM_CPU_PROCESSOR:
1666		ret = kvm_s390_get_processor(kvm, attr);
1667		break;
1668	case KVM_S390_VM_CPU_MACHINE:
1669		ret = kvm_s390_get_machine(kvm, attr);
1670		break;
1671	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1672		ret = kvm_s390_get_processor_feat(kvm, attr);
1673		break;
1674	case KVM_S390_VM_CPU_MACHINE_FEAT:
1675		ret = kvm_s390_get_machine_feat(kvm, attr);
1676		break;
1677	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1678		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1679		break;
1680	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1681		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1682		break;
1683	}
1684	return ret;
1685}
1686
1687static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1688{
1689	int ret;
1690
1691	switch (attr->group) {
1692	case KVM_S390_VM_MEM_CTRL:
1693		ret = kvm_s390_set_mem_control(kvm, attr);
1694		break;
1695	case KVM_S390_VM_TOD:
1696		ret = kvm_s390_set_tod(kvm, attr);
1697		break;
1698	case KVM_S390_VM_CPU_MODEL:
1699		ret = kvm_s390_set_cpu_model(kvm, attr);
1700		break;
1701	case KVM_S390_VM_CRYPTO:
1702		ret = kvm_s390_vm_set_crypto(kvm, attr);
1703		break;
1704	case KVM_S390_VM_MIGRATION:
1705		ret = kvm_s390_vm_set_migration(kvm, attr);
1706		break;
1707	default:
1708		ret = -ENXIO;
1709		break;
1710	}
1711
1712	return ret;
1713}
1714
1715static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716{
1717	int ret;
1718
1719	switch (attr->group) {
1720	case KVM_S390_VM_MEM_CTRL:
1721		ret = kvm_s390_get_mem_control(kvm, attr);
1722		break;
1723	case KVM_S390_VM_TOD:
1724		ret = kvm_s390_get_tod(kvm, attr);
1725		break;
1726	case KVM_S390_VM_CPU_MODEL:
1727		ret = kvm_s390_get_cpu_model(kvm, attr);
1728		break;
1729	case KVM_S390_VM_MIGRATION:
1730		ret = kvm_s390_vm_get_migration(kvm, attr);
1731		break;
1732	default:
1733		ret = -ENXIO;
1734		break;
1735	}
1736
1737	return ret;
1738}
1739
1740static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741{
1742	int ret;
1743
1744	switch (attr->group) {
1745	case KVM_S390_VM_MEM_CTRL:
1746		switch (attr->attr) {
1747		case KVM_S390_VM_MEM_ENABLE_CMMA:
1748		case KVM_S390_VM_MEM_CLR_CMMA:
1749			ret = sclp.has_cmma ? 0 : -ENXIO;
1750			break;
1751		case KVM_S390_VM_MEM_LIMIT_SIZE:
1752			ret = 0;
1753			break;
1754		default:
1755			ret = -ENXIO;
1756			break;
1757		}
1758		break;
1759	case KVM_S390_VM_TOD:
1760		switch (attr->attr) {
1761		case KVM_S390_VM_TOD_LOW:
1762		case KVM_S390_VM_TOD_HIGH:
1763			ret = 0;
1764			break;
1765		default:
1766			ret = -ENXIO;
1767			break;
1768		}
1769		break;
1770	case KVM_S390_VM_CPU_MODEL:
1771		switch (attr->attr) {
1772		case KVM_S390_VM_CPU_PROCESSOR:
1773		case KVM_S390_VM_CPU_MACHINE:
1774		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1775		case KVM_S390_VM_CPU_MACHINE_FEAT:
1776		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1777		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1778			ret = 0;
1779			break;
1780		default:
1781			ret = -ENXIO;
1782			break;
1783		}
1784		break;
1785	case KVM_S390_VM_CRYPTO:
1786		switch (attr->attr) {
1787		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1788		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1789		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1790		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1791			ret = 0;
1792			break;
1793		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1794		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1795			ret = ap_instructions_available() ? 0 : -ENXIO;
1796			break;
1797		default:
1798			ret = -ENXIO;
1799			break;
1800		}
1801		break;
1802	case KVM_S390_VM_MIGRATION:
1803		ret = 0;
1804		break;
1805	default:
1806		ret = -ENXIO;
1807		break;
1808	}
1809
1810	return ret;
1811}
1812
1813static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1814{
1815	uint8_t *keys;
1816	uint64_t hva;
1817	int srcu_idx, i, r = 0;
1818
1819	if (args->flags != 0)
1820		return -EINVAL;
1821
1822	/* Is this guest using storage keys? */
1823	if (!mm_uses_skeys(current->mm))
1824		return KVM_S390_GET_SKEYS_NONE;
1825
1826	/* Enforce sane limit on memory allocation */
1827	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1828		return -EINVAL;
1829
1830	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1831	if (!keys)
1832		return -ENOMEM;
1833
1834	mmap_read_lock(current->mm);
1835	srcu_idx = srcu_read_lock(&kvm->srcu);
1836	for (i = 0; i < args->count; i++) {
1837		hva = gfn_to_hva(kvm, args->start_gfn + i);
1838		if (kvm_is_error_hva(hva)) {
1839			r = -EFAULT;
1840			break;
1841		}
1842
1843		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1844		if (r)
1845			break;
1846	}
1847	srcu_read_unlock(&kvm->srcu, srcu_idx);
1848	mmap_read_unlock(current->mm);
1849
1850	if (!r) {
1851		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1852				 sizeof(uint8_t) * args->count);
1853		if (r)
1854			r = -EFAULT;
1855	}
1856
1857	kvfree(keys);
1858	return r;
1859}
1860
1861static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1862{
1863	uint8_t *keys;
1864	uint64_t hva;
1865	int srcu_idx, i, r = 0;
1866	bool unlocked;
1867
1868	if (args->flags != 0)
1869		return -EINVAL;
1870
1871	/* Enforce sane limit on memory allocation */
1872	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1873		return -EINVAL;
1874
1875	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1876	if (!keys)
1877		return -ENOMEM;
1878
1879	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1880			   sizeof(uint8_t) * args->count);
1881	if (r) {
1882		r = -EFAULT;
1883		goto out;
1884	}
1885
1886	/* Enable storage key handling for the guest */
1887	r = s390_enable_skey();
1888	if (r)
1889		goto out;
1890
1891	i = 0;
1892	mmap_read_lock(current->mm);
1893	srcu_idx = srcu_read_lock(&kvm->srcu);
1894        while (i < args->count) {
1895		unlocked = false;
1896		hva = gfn_to_hva(kvm, args->start_gfn + i);
1897		if (kvm_is_error_hva(hva)) {
1898			r = -EFAULT;
1899			break;
1900		}
1901
1902		/* Lowest order bit is reserved */
1903		if (keys[i] & 0x01) {
1904			r = -EINVAL;
1905			break;
1906		}
1907
1908		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1909		if (r) {
1910			r = fixup_user_fault(current->mm, hva,
1911					     FAULT_FLAG_WRITE, &unlocked);
1912			if (r)
1913				break;
1914		}
1915		if (!r)
1916			i++;
1917	}
1918	srcu_read_unlock(&kvm->srcu, srcu_idx);
1919	mmap_read_unlock(current->mm);
1920out:
1921	kvfree(keys);
1922	return r;
1923}
1924
1925/*
1926 * Base address and length must be sent at the start of each block, therefore
1927 * it's cheaper to send some clean data, as long as it's less than the size of
1928 * two longs.
1929 */
1930#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1931/* for consistency */
1932#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1933
1934/*
1935 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1936 * address falls in a hole. In that case the index of one of the memslots
1937 * bordering the hole is returned.
1938 */
1939static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1940{
1941	int start = 0, end = slots->used_slots;
1942	int slot = atomic_read(&slots->lru_slot);
1943	struct kvm_memory_slot *memslots = slots->memslots;
1944
1945	if (gfn >= memslots[slot].base_gfn &&
1946	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1947		return slot;
1948
1949	while (start < end) {
1950		slot = start + (end - start) / 2;
1951
1952		if (gfn >= memslots[slot].base_gfn)
1953			end = slot;
1954		else
1955			start = slot + 1;
1956	}
1957
1958	if (start >= slots->used_slots)
1959		return slots->used_slots - 1;
1960
1961	if (gfn >= memslots[start].base_gfn &&
1962	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1963		atomic_set(&slots->lru_slot, start);
1964	}
1965
1966	return start;
1967}
1968
1969static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1970			      u8 *res, unsigned long bufsize)
1971{
1972	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1973
1974	args->count = 0;
1975	while (args->count < bufsize) {
1976		hva = gfn_to_hva(kvm, cur_gfn);
1977		/*
1978		 * We return an error if the first value was invalid, but we
1979		 * return successfully if at least one value was copied.
1980		 */
1981		if (kvm_is_error_hva(hva))
1982			return args->count ? 0 : -EFAULT;
1983		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1984			pgstev = 0;
1985		res[args->count++] = (pgstev >> 24) & 0x43;
1986		cur_gfn++;
1987	}
1988
1989	return 0;
1990}
1991
1992static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1993					      unsigned long cur_gfn)
1994{
1995	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1996	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1997	unsigned long ofs = cur_gfn - ms->base_gfn;
1998
1999	if (ms->base_gfn + ms->npages <= cur_gfn) {
2000		slotidx--;
2001		/* If we are above the highest slot, wrap around */
2002		if (slotidx < 0)
2003			slotidx = slots->used_slots - 1;
2004
2005		ms = slots->memslots + slotidx;
2006		ofs = 0;
2007	}
2008
2009	if (cur_gfn < ms->base_gfn)
2010		ofs = 0;
2011
2012	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2013	while ((slotidx > 0) && (ofs >= ms->npages)) {
2014		slotidx--;
2015		ms = slots->memslots + slotidx;
2016		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2017	}
2018	return ms->base_gfn + ofs;
2019}
2020
2021static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2022			     u8 *res, unsigned long bufsize)
2023{
2024	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2025	struct kvm_memslots *slots = kvm_memslots(kvm);
2026	struct kvm_memory_slot *ms;
2027
2028	if (unlikely(!slots->used_slots))
2029		return 0;
2030
2031	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2032	ms = gfn_to_memslot(kvm, cur_gfn);
2033	args->count = 0;
2034	args->start_gfn = cur_gfn;
2035	if (!ms)
2036		return 0;
2037	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2039
2040	while (args->count < bufsize) {
2041		hva = gfn_to_hva(kvm, cur_gfn);
2042		if (kvm_is_error_hva(hva))
2043			return 0;
2044		/* Decrement only if we actually flipped the bit to 0 */
2045		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2046			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2047		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2048			pgstev = 0;
2049		/* Save the value */
2050		res[args->count++] = (pgstev >> 24) & 0x43;
2051		/* If the next bit is too far away, stop. */
2052		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2053			return 0;
2054		/* If we reached the previous "next", find the next one */
2055		if (cur_gfn == next_gfn)
2056			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2057		/* Reached the end of memory or of the buffer, stop */
2058		if ((next_gfn >= mem_end) ||
2059		    (next_gfn - args->start_gfn >= bufsize))
2060			return 0;
2061		cur_gfn++;
2062		/* Reached the end of the current memslot, take the next one. */
2063		if (cur_gfn - ms->base_gfn >= ms->npages) {
2064			ms = gfn_to_memslot(kvm, cur_gfn);
2065			if (!ms)
2066				return 0;
2067		}
2068	}
2069	return 0;
2070}
2071
2072/*
2073 * This function searches for the next page with dirty CMMA attributes, and
2074 * saves the attributes in the buffer up to either the end of the buffer or
2075 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2076 * no trailing clean bytes are saved.
2077 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2078 * output buffer will indicate 0 as length.
2079 */
2080static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2081				  struct kvm_s390_cmma_log *args)
2082{
2083	unsigned long bufsize;
2084	int srcu_idx, peek, ret;
2085	u8 *values;
2086
2087	if (!kvm->arch.use_cmma)
2088		return -ENXIO;
2089	/* Invalid/unsupported flags were specified */
2090	if (args->flags & ~KVM_S390_CMMA_PEEK)
2091		return -EINVAL;
2092	/* Migration mode query, and we are not doing a migration */
2093	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2094	if (!peek && !kvm->arch.migration_mode)
2095		return -EINVAL;
2096	/* CMMA is disabled or was not used, or the buffer has length zero */
2097	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2098	if (!bufsize || !kvm->mm->context.uses_cmm) {
2099		memset(args, 0, sizeof(*args));
2100		return 0;
2101	}
2102	/* We are not peeking, and there are no dirty pages */
2103	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2104		memset(args, 0, sizeof(*args));
2105		return 0;
2106	}
2107
2108	values = vmalloc(bufsize);
2109	if (!values)
2110		return -ENOMEM;
2111
2112	mmap_read_lock(kvm->mm);
2113	srcu_idx = srcu_read_lock(&kvm->srcu);
2114	if (peek)
2115		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2116	else
2117		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2118	srcu_read_unlock(&kvm->srcu, srcu_idx);
2119	mmap_read_unlock(kvm->mm);
2120
2121	if (kvm->arch.migration_mode)
2122		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2123	else
2124		args->remaining = 0;
2125
2126	if (copy_to_user((void __user *)args->values, values, args->count))
2127		ret = -EFAULT;
2128
2129	vfree(values);
2130	return ret;
2131}
2132
2133/*
2134 * This function sets the CMMA attributes for the given pages. If the input
2135 * buffer has zero length, no action is taken, otherwise the attributes are
2136 * set and the mm->context.uses_cmm flag is set.
2137 */
2138static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2139				  const struct kvm_s390_cmma_log *args)
2140{
2141	unsigned long hva, mask, pgstev, i;
2142	uint8_t *bits;
2143	int srcu_idx, r = 0;
2144
2145	mask = args->mask;
2146
2147	if (!kvm->arch.use_cmma)
2148		return -ENXIO;
2149	/* invalid/unsupported flags */
2150	if (args->flags != 0)
2151		return -EINVAL;
2152	/* Enforce sane limit on memory allocation */
2153	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2154		return -EINVAL;
2155	/* Nothing to do */
2156	if (args->count == 0)
2157		return 0;
2158
2159	bits = vmalloc(array_size(sizeof(*bits), args->count));
2160	if (!bits)
2161		return -ENOMEM;
2162
2163	r = copy_from_user(bits, (void __user *)args->values, args->count);
2164	if (r) {
2165		r = -EFAULT;
2166		goto out;
2167	}
2168
2169	mmap_read_lock(kvm->mm);
2170	srcu_idx = srcu_read_lock(&kvm->srcu);
2171	for (i = 0; i < args->count; i++) {
2172		hva = gfn_to_hva(kvm, args->start_gfn + i);
2173		if (kvm_is_error_hva(hva)) {
2174			r = -EFAULT;
2175			break;
2176		}
2177
2178		pgstev = bits[i];
2179		pgstev = pgstev << 24;
2180		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2181		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2182	}
2183	srcu_read_unlock(&kvm->srcu, srcu_idx);
2184	mmap_read_unlock(kvm->mm);
2185
2186	if (!kvm->mm->context.uses_cmm) {
2187		mmap_write_lock(kvm->mm);
2188		kvm->mm->context.uses_cmm = 1;
2189		mmap_write_unlock(kvm->mm);
2190	}
2191out:
2192	vfree(bits);
2193	return r;
2194}
2195
2196static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2197{
2198	struct kvm_vcpu *vcpu;
2199	u16 rc, rrc;
2200	int ret = 0;
2201	int i;
2202
2203	/*
2204	 * We ignore failures and try to destroy as many CPUs as possible.
2205	 * At the same time we must not free the assigned resources when
2206	 * this fails, as the ultravisor has still access to that memory.
2207	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2208	 * behind.
2209	 * We want to return the first failure rc and rrc, though.
2210	 */
2211	kvm_for_each_vcpu(i, vcpu, kvm) {
2212		mutex_lock(&vcpu->mutex);
2213		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2214			*rcp = rc;
2215			*rrcp = rrc;
2216			ret = -EIO;
2217		}
2218		mutex_unlock(&vcpu->mutex);
2219	}
2220	return ret;
2221}
2222
2223static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2224{
2225	int i, r = 0;
2226	u16 dummy;
2227
2228	struct kvm_vcpu *vcpu;
2229
2230	kvm_for_each_vcpu(i, vcpu, kvm) {
2231		mutex_lock(&vcpu->mutex);
2232		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2233		mutex_unlock(&vcpu->mutex);
2234		if (r)
2235			break;
2236	}
2237	if (r)
2238		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2239	return r;
2240}
2241
2242static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2243{
2244	int r = 0;
2245	u16 dummy;
2246	void __user *argp = (void __user *)cmd->data;
2247
2248	switch (cmd->cmd) {
2249	case KVM_PV_ENABLE: {
2250		r = -EINVAL;
2251		if (kvm_s390_pv_is_protected(kvm))
2252			break;
2253
2254		/*
2255		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2256		 *  esca, we need no cleanup in the error cases below
2257		 */
2258		r = sca_switch_to_extended(kvm);
2259		if (r)
2260			break;
2261
2262		mmap_write_lock(current->mm);
2263		r = gmap_mark_unmergeable();
2264		mmap_write_unlock(current->mm);
2265		if (r)
2266			break;
2267
2268		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2269		if (r)
2270			break;
2271
2272		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2273		if (r)
2274			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2275
2276		/* we need to block service interrupts from now on */
2277		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278		break;
2279	}
2280	case KVM_PV_DISABLE: {
2281		r = -EINVAL;
2282		if (!kvm_s390_pv_is_protected(kvm))
2283			break;
2284
2285		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2286		/*
2287		 * If a CPU could not be destroyed, destroy VM will also fail.
2288		 * There is no point in trying to destroy it. Instead return
2289		 * the rc and rrc from the first CPU that failed destroying.
2290		 */
2291		if (r)
2292			break;
2293		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2294
2295		/* no need to block service interrupts any more */
2296		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2297		break;
2298	}
2299	case KVM_PV_SET_SEC_PARMS: {
2300		struct kvm_s390_pv_sec_parm parms = {};
2301		void *hdr;
2302
2303		r = -EINVAL;
2304		if (!kvm_s390_pv_is_protected(kvm))
2305			break;
2306
2307		r = -EFAULT;
2308		if (copy_from_user(&parms, argp, sizeof(parms)))
2309			break;
2310
2311		/* Currently restricted to 8KB */
2312		r = -EINVAL;
2313		if (parms.length > PAGE_SIZE * 2)
2314			break;
2315
2316		r = -ENOMEM;
2317		hdr = vmalloc(parms.length);
2318		if (!hdr)
2319			break;
2320
2321		r = -EFAULT;
2322		if (!copy_from_user(hdr, (void __user *)parms.origin,
2323				    parms.length))
2324			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2325						      &cmd->rc, &cmd->rrc);
2326
2327		vfree(hdr);
2328		break;
2329	}
2330	case KVM_PV_UNPACK: {
2331		struct kvm_s390_pv_unp unp = {};
2332
2333		r = -EINVAL;
2334		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2335			break;
2336
2337		r = -EFAULT;
2338		if (copy_from_user(&unp, argp, sizeof(unp)))
2339			break;
2340
2341		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2342				       &cmd->rc, &cmd->rrc);
2343		break;
2344	}
2345	case KVM_PV_VERIFY: {
2346		r = -EINVAL;
2347		if (!kvm_s390_pv_is_protected(kvm))
2348			break;
2349
2350		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2352		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2353			     cmd->rrc);
2354		break;
2355	}
2356	case KVM_PV_PREP_RESET: {
2357		r = -EINVAL;
2358		if (!kvm_s390_pv_is_protected(kvm))
2359			break;
2360
2361		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2362				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2363		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2364			     cmd->rc, cmd->rrc);
2365		break;
2366	}
2367	case KVM_PV_UNSHARE_ALL: {
2368		r = -EINVAL;
2369		if (!kvm_s390_pv_is_protected(kvm))
2370			break;
2371
2372		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2373				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2374		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2375			     cmd->rc, cmd->rrc);
2376		break;
2377	}
2378	default:
2379		r = -ENOTTY;
2380	}
2381	return r;
2382}
2383
2384long kvm_arch_vm_ioctl(struct file *filp,
2385		       unsigned int ioctl, unsigned long arg)
2386{
2387	struct kvm *kvm = filp->private_data;
2388	void __user *argp = (void __user *)arg;
2389	struct kvm_device_attr attr;
2390	int r;
2391
2392	switch (ioctl) {
2393	case KVM_S390_INTERRUPT: {
2394		struct kvm_s390_interrupt s390int;
2395
2396		r = -EFAULT;
2397		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2398			break;
2399		r = kvm_s390_inject_vm(kvm, &s390int);
2400		break;
2401	}
2402	case KVM_CREATE_IRQCHIP: {
2403		struct kvm_irq_routing_entry routing;
2404
2405		r = -EINVAL;
2406		if (kvm->arch.use_irqchip) {
2407			/* Set up dummy routing. */
2408			memset(&routing, 0, sizeof(routing));
2409			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2410		}
2411		break;
2412	}
2413	case KVM_SET_DEVICE_ATTR: {
2414		r = -EFAULT;
2415		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2416			break;
2417		r = kvm_s390_vm_set_attr(kvm, &attr);
2418		break;
2419	}
2420	case KVM_GET_DEVICE_ATTR: {
2421		r = -EFAULT;
2422		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2423			break;
2424		r = kvm_s390_vm_get_attr(kvm, &attr);
2425		break;
2426	}
2427	case KVM_HAS_DEVICE_ATTR: {
2428		r = -EFAULT;
2429		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2430			break;
2431		r = kvm_s390_vm_has_attr(kvm, &attr);
2432		break;
2433	}
2434	case KVM_S390_GET_SKEYS: {
2435		struct kvm_s390_skeys args;
2436
2437		r = -EFAULT;
2438		if (copy_from_user(&args, argp,
2439				   sizeof(struct kvm_s390_skeys)))
2440			break;
2441		r = kvm_s390_get_skeys(kvm, &args);
2442		break;
2443	}
2444	case KVM_S390_SET_SKEYS: {
2445		struct kvm_s390_skeys args;
2446
2447		r = -EFAULT;
2448		if (copy_from_user(&args, argp,
2449				   sizeof(struct kvm_s390_skeys)))
2450			break;
2451		r = kvm_s390_set_skeys(kvm, &args);
2452		break;
2453	}
2454	case KVM_S390_GET_CMMA_BITS: {
2455		struct kvm_s390_cmma_log args;
2456
2457		r = -EFAULT;
2458		if (copy_from_user(&args, argp, sizeof(args)))
2459			break;
2460		mutex_lock(&kvm->slots_lock);
2461		r = kvm_s390_get_cmma_bits(kvm, &args);
2462		mutex_unlock(&kvm->slots_lock);
2463		if (!r) {
2464			r = copy_to_user(argp, &args, sizeof(args));
2465			if (r)
2466				r = -EFAULT;
2467		}
2468		break;
2469	}
2470	case KVM_S390_SET_CMMA_BITS: {
2471		struct kvm_s390_cmma_log args;
2472
2473		r = -EFAULT;
2474		if (copy_from_user(&args, argp, sizeof(args)))
2475			break;
2476		mutex_lock(&kvm->slots_lock);
2477		r = kvm_s390_set_cmma_bits(kvm, &args);
2478		mutex_unlock(&kvm->slots_lock);
2479		break;
2480	}
2481	case KVM_S390_PV_COMMAND: {
2482		struct kvm_pv_cmd args;
2483
2484		/* protvirt means user sigp */
2485		kvm->arch.user_cpu_state_ctrl = 1;
2486		r = 0;
2487		if (!is_prot_virt_host()) {
2488			r = -EINVAL;
2489			break;
2490		}
2491		if (copy_from_user(&args, argp, sizeof(args))) {
2492			r = -EFAULT;
2493			break;
2494		}
2495		if (args.flags) {
2496			r = -EINVAL;
2497			break;
2498		}
2499		mutex_lock(&kvm->lock);
2500		r = kvm_s390_handle_pv(kvm, &args);
2501		mutex_unlock(&kvm->lock);
2502		if (copy_to_user(argp, &args, sizeof(args))) {
2503			r = -EFAULT;
2504			break;
2505		}
2506		break;
2507	}
2508	default:
2509		r = -ENOTTY;
2510	}
2511
2512	return r;
2513}
2514
2515static int kvm_s390_apxa_installed(void)
2516{
2517	struct ap_config_info info;
2518
2519	if (ap_instructions_available()) {
2520		if (ap_qci(&info) == 0)
2521			return info.apxa;
2522	}
2523
2524	return 0;
2525}
2526
2527/*
2528 * The format of the crypto control block (CRYCB) is specified in the 3 low
2529 * order bits of the CRYCB designation (CRYCBD) field as follows:
2530 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2531 *	     AP extended addressing (APXA) facility are installed.
2532 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2533 * Format 2: Both the APXA and MSAX3 facilities are installed
2534 */
2535static void kvm_s390_set_crycb_format(struct kvm *kvm)
2536{
2537	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2538
2539	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2540	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2541
2542	/* Check whether MSAX3 is installed */
2543	if (!test_kvm_facility(kvm, 76))
2544		return;
2545
2546	if (kvm_s390_apxa_installed())
2547		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2548	else
2549		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2550}
2551
2552void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2553			       unsigned long *aqm, unsigned long *adm)
2554{
2555	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2556
2557	mutex_lock(&kvm->lock);
2558	kvm_s390_vcpu_block_all(kvm);
2559
2560	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2561	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2562		memcpy(crycb->apcb1.apm, apm, 32);
2563		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2564			 apm[0], apm[1], apm[2], apm[3]);
2565		memcpy(crycb->apcb1.aqm, aqm, 32);
2566		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2567			 aqm[0], aqm[1], aqm[2], aqm[3]);
2568		memcpy(crycb->apcb1.adm, adm, 32);
2569		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2570			 adm[0], adm[1], adm[2], adm[3]);
2571		break;
2572	case CRYCB_FORMAT1:
2573	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2574		memcpy(crycb->apcb0.apm, apm, 8);
2575		memcpy(crycb->apcb0.aqm, aqm, 2);
2576		memcpy(crycb->apcb0.adm, adm, 2);
2577		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2578			 apm[0], *((unsigned short *)aqm),
2579			 *((unsigned short *)adm));
2580		break;
2581	default:	/* Can not happen */
2582		break;
2583	}
2584
2585	/* recreate the shadow crycb for each vcpu */
2586	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2587	kvm_s390_vcpu_unblock_all(kvm);
2588	mutex_unlock(&kvm->lock);
2589}
2590EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2591
2592void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2593{
2594	mutex_lock(&kvm->lock);
2595	kvm_s390_vcpu_block_all(kvm);
2596
2597	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2598	       sizeof(kvm->arch.crypto.crycb->apcb0));
2599	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2600	       sizeof(kvm->arch.crypto.crycb->apcb1));
2601
2602	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2603	/* recreate the shadow crycb for each vcpu */
2604	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2605	kvm_s390_vcpu_unblock_all(kvm);
2606	mutex_unlock(&kvm->lock);
2607}
2608EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2609
2610static u64 kvm_s390_get_initial_cpuid(void)
2611{
2612	struct cpuid cpuid;
2613
2614	get_cpu_id(&cpuid);
2615	cpuid.version = 0xff;
2616	return *((u64 *) &cpuid);
2617}
2618
2619static void kvm_s390_crypto_init(struct kvm *kvm)
2620{
2621	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2622	kvm_s390_set_crycb_format(kvm);
2623
2624	if (!test_kvm_facility(kvm, 76))
2625		return;
2626
2627	/* Enable AES/DEA protected key functions by default */
2628	kvm->arch.crypto.aes_kw = 1;
2629	kvm->arch.crypto.dea_kw = 1;
2630	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2631			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2632	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2633			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2634}
2635
2636static void sca_dispose(struct kvm *kvm)
2637{
2638	if (kvm->arch.use_esca)
2639		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2640	else
2641		free_page((unsigned long)(kvm->arch.sca));
2642	kvm->arch.sca = NULL;
2643}
2644
2645int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2646{
2647	gfp_t alloc_flags = GFP_KERNEL;
2648	int i, rc;
2649	char debug_name[16];
2650	static unsigned long sca_offset;
2651
2652	rc = -EINVAL;
2653#ifdef CONFIG_KVM_S390_UCONTROL
2654	if (type & ~KVM_VM_S390_UCONTROL)
2655		goto out_err;
2656	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2657		goto out_err;
2658#else
2659	if (type)
2660		goto out_err;
2661#endif
2662
2663	rc = s390_enable_sie();
2664	if (rc)
2665		goto out_err;
2666
2667	rc = -ENOMEM;
2668
2669	if (!sclp.has_64bscao)
2670		alloc_flags |= GFP_DMA;
2671	rwlock_init(&kvm->arch.sca_lock);
2672	/* start with basic SCA */
2673	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2674	if (!kvm->arch.sca)
2675		goto out_err;
2676	mutex_lock(&kvm_lock);
2677	sca_offset += 16;
2678	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2679		sca_offset = 0;
2680	kvm->arch.sca = (struct bsca_block *)
2681			((char *) kvm->arch.sca + sca_offset);
2682	mutex_unlock(&kvm_lock);
2683
2684	sprintf(debug_name, "kvm-%u", current->pid);
2685
2686	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2687	if (!kvm->arch.dbf)
2688		goto out_err;
2689
2690	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2691	kvm->arch.sie_page2 =
2692	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2693	if (!kvm->arch.sie_page2)
2694		goto out_err;
2695
2696	kvm->arch.sie_page2->kvm = kvm;
2697	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2698
2699	for (i = 0; i < kvm_s390_fac_size(); i++) {
2700		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2701					      (kvm_s390_fac_base[i] |
2702					       kvm_s390_fac_ext[i]);
2703		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2704					      kvm_s390_fac_base[i];
2705	}
2706	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2707
2708	/* we are always in czam mode - even on pre z14 machines */
2709	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2710	set_kvm_facility(kvm->arch.model.fac_list, 138);
2711	/* we emulate STHYI in kvm */
2712	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2713	set_kvm_facility(kvm->arch.model.fac_list, 74);
2714	if (MACHINE_HAS_TLB_GUEST) {
2715		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2716		set_kvm_facility(kvm->arch.model.fac_list, 147);
2717	}
2718
2719	if (css_general_characteristics.aiv && test_facility(65))
2720		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2721
2722	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2723	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2724
2725	kvm_s390_crypto_init(kvm);
2726
2727	mutex_init(&kvm->arch.float_int.ais_lock);
2728	spin_lock_init(&kvm->arch.float_int.lock);
2729	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2730		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2731	init_waitqueue_head(&kvm->arch.ipte_wq);
2732	mutex_init(&kvm->arch.ipte_mutex);
2733
2734	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2735	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2736
2737	if (type & KVM_VM_S390_UCONTROL) {
2738		kvm->arch.gmap = NULL;
2739		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2740	} else {
2741		if (sclp.hamax == U64_MAX)
2742			kvm->arch.mem_limit = TASK_SIZE_MAX;
2743		else
2744			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2745						    sclp.hamax + 1);
2746		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2747		if (!kvm->arch.gmap)
2748			goto out_err;
2749		kvm->arch.gmap->private = kvm;
2750		kvm->arch.gmap->pfault_enabled = 0;
2751	}
2752
2753	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2754	kvm->arch.use_skf = sclp.has_skey;
2755	spin_lock_init(&kvm->arch.start_stop_lock);
2756	kvm_s390_vsie_init(kvm);
2757	if (use_gisa)
2758		kvm_s390_gisa_init(kvm);
2759	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2760
2761	return 0;
2762out_err:
2763	free_page((unsigned long)kvm->arch.sie_page2);
2764	debug_unregister(kvm->arch.dbf);
2765	sca_dispose(kvm);
2766	KVM_EVENT(3, "creation of vm failed: %d", rc);
2767	return rc;
2768}
2769
2770void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2771{
2772	u16 rc, rrc;
2773
2774	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2775	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2776	kvm_s390_clear_local_irqs(vcpu);
2777	kvm_clear_async_pf_completion_queue(vcpu);
2778	if (!kvm_is_ucontrol(vcpu->kvm))
2779		sca_del_vcpu(vcpu);
2780
2781	if (kvm_is_ucontrol(vcpu->kvm))
2782		gmap_remove(vcpu->arch.gmap);
2783
2784	if (vcpu->kvm->arch.use_cmma)
2785		kvm_s390_vcpu_unsetup_cmma(vcpu);
2786	/* We can not hold the vcpu mutex here, we are already dying */
2787	if (kvm_s390_pv_cpu_get_handle(vcpu))
2788		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2789	free_page((unsigned long)(vcpu->arch.sie_block));
2790}
2791
2792static void kvm_free_vcpus(struct kvm *kvm)
2793{
2794	unsigned int i;
2795	struct kvm_vcpu *vcpu;
2796
2797	kvm_for_each_vcpu(i, vcpu, kvm)
2798		kvm_vcpu_destroy(vcpu);
2799
2800	mutex_lock(&kvm->lock);
2801	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2802		kvm->vcpus[i] = NULL;
2803
2804	atomic_set(&kvm->online_vcpus, 0);
2805	mutex_unlock(&kvm->lock);
2806}
2807
2808void kvm_arch_destroy_vm(struct kvm *kvm)
2809{
2810	u16 rc, rrc;
2811
2812	kvm_free_vcpus(kvm);
2813	sca_dispose(kvm);
2814	kvm_s390_gisa_destroy(kvm);
2815	/*
2816	 * We are already at the end of life and kvm->lock is not taken.
2817	 * This is ok as the file descriptor is closed by now and nobody
2818	 * can mess with the pv state. To avoid lockdep_assert_held from
2819	 * complaining we do not use kvm_s390_pv_is_protected.
2820	 */
2821	if (kvm_s390_pv_get_handle(kvm))
2822		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2823	debug_unregister(kvm->arch.dbf);
2824	free_page((unsigned long)kvm->arch.sie_page2);
2825	if (!kvm_is_ucontrol(kvm))
2826		gmap_remove(kvm->arch.gmap);
2827	kvm_s390_destroy_adapters(kvm);
2828	kvm_s390_clear_float_irqs(kvm);
2829	kvm_s390_vsie_destroy(kvm);
2830	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2831}
2832
2833/* Section: vcpu related */
2834static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2835{
2836	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2837	if (!vcpu->arch.gmap)
2838		return -ENOMEM;
2839	vcpu->arch.gmap->private = vcpu->kvm;
2840
2841	return 0;
2842}
2843
2844static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2845{
2846	if (!kvm_s390_use_sca_entries())
2847		return;
2848	read_lock(&vcpu->kvm->arch.sca_lock);
2849	if (vcpu->kvm->arch.use_esca) {
2850		struct esca_block *sca = vcpu->kvm->arch.sca;
2851
2852		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2853		sca->cpu[vcpu->vcpu_id].sda = 0;
2854	} else {
2855		struct bsca_block *sca = vcpu->kvm->arch.sca;
2856
2857		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2858		sca->cpu[vcpu->vcpu_id].sda = 0;
2859	}
2860	read_unlock(&vcpu->kvm->arch.sca_lock);
2861}
2862
2863static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2864{
2865	if (!kvm_s390_use_sca_entries()) {
2866		struct bsca_block *sca = vcpu->kvm->arch.sca;
2867
2868		/* we still need the basic sca for the ipte control */
2869		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2870		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2871		return;
2872	}
2873	read_lock(&vcpu->kvm->arch.sca_lock);
2874	if (vcpu->kvm->arch.use_esca) {
2875		struct esca_block *sca = vcpu->kvm->arch.sca;
2876
2877		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2878		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2879		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2880		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2881		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2882	} else {
2883		struct bsca_block *sca = vcpu->kvm->arch.sca;
2884
2885		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2886		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2887		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2888		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2889	}
2890	read_unlock(&vcpu->kvm->arch.sca_lock);
2891}
2892
2893/* Basic SCA to Extended SCA data copy routines */
2894static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2895{
2896	d->sda = s->sda;
2897	d->sigp_ctrl.c = s->sigp_ctrl.c;
2898	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2899}
2900
2901static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2902{
2903	int i;
2904
2905	d->ipte_control = s->ipte_control;
2906	d->mcn[0] = s->mcn;
2907	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2908		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2909}
2910
2911static int sca_switch_to_extended(struct kvm *kvm)
2912{
2913	struct bsca_block *old_sca = kvm->arch.sca;
2914	struct esca_block *new_sca;
2915	struct kvm_vcpu *vcpu;
2916	unsigned int vcpu_idx;
2917	u32 scaol, scaoh;
2918
2919	if (kvm->arch.use_esca)
2920		return 0;
2921
2922	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2923	if (!new_sca)
2924		return -ENOMEM;
2925
2926	scaoh = (u32)((u64)(new_sca) >> 32);
2927	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2928
2929	kvm_s390_vcpu_block_all(kvm);
2930	write_lock(&kvm->arch.sca_lock);
2931
2932	sca_copy_b_to_e(new_sca, old_sca);
2933
2934	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2935		vcpu->arch.sie_block->scaoh = scaoh;
2936		vcpu->arch.sie_block->scaol = scaol;
2937		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2938	}
2939	kvm->arch.sca = new_sca;
2940	kvm->arch.use_esca = 1;
2941
2942	write_unlock(&kvm->arch.sca_lock);
2943	kvm_s390_vcpu_unblock_all(kvm);
2944
2945	free_page((unsigned long)old_sca);
2946
2947	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2948		 old_sca, kvm->arch.sca);
2949	return 0;
2950}
2951
2952static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2953{
2954	int rc;
2955
2956	if (!kvm_s390_use_sca_entries()) {
2957		if (id < KVM_MAX_VCPUS)
2958			return true;
2959		return false;
2960	}
2961	if (id < KVM_S390_BSCA_CPU_SLOTS)
2962		return true;
2963	if (!sclp.has_esca || !sclp.has_64bscao)
2964		return false;
2965
2966	mutex_lock(&kvm->lock);
2967	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2968	mutex_unlock(&kvm->lock);
2969
2970	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2971}
2972
2973/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2974static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975{
2976	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2977	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2978	vcpu->arch.cputm_start = get_tod_clock_fast();
2979	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2980}
2981
2982/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2983static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2984{
2985	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2986	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2987	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2988	vcpu->arch.cputm_start = 0;
2989	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2990}
2991
2992/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2993static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2994{
2995	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2996	vcpu->arch.cputm_enabled = true;
2997	__start_cpu_timer_accounting(vcpu);
2998}
2999
3000/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3001static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3002{
3003	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3004	__stop_cpu_timer_accounting(vcpu);
3005	vcpu->arch.cputm_enabled = false;
3006}
3007
3008static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3009{
3010	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3011	__enable_cpu_timer_accounting(vcpu);
3012	preempt_enable();
3013}
3014
3015static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3016{
3017	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3018	__disable_cpu_timer_accounting(vcpu);
3019	preempt_enable();
3020}
3021
3022/* set the cpu timer - may only be called from the VCPU thread itself */
3023void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3024{
3025	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3026	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3027	if (vcpu->arch.cputm_enabled)
3028		vcpu->arch.cputm_start = get_tod_clock_fast();
3029	vcpu->arch.sie_block->cputm = cputm;
3030	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3031	preempt_enable();
3032}
3033
3034/* update and get the cpu timer - can also be called from other VCPU threads */
3035__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3036{
3037	unsigned int seq;
3038	__u64 value;
3039
3040	if (unlikely(!vcpu->arch.cputm_enabled))
3041		return vcpu->arch.sie_block->cputm;
3042
3043	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3044	do {
3045		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3046		/*
3047		 * If the writer would ever execute a read in the critical
3048		 * section, e.g. in irq context, we have a deadlock.
3049		 */
3050		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3051		value = vcpu->arch.sie_block->cputm;
3052		/* if cputm_start is 0, accounting is being started/stopped */
3053		if (likely(vcpu->arch.cputm_start))
3054			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3055	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3056	preempt_enable();
3057	return value;
3058}
3059
3060void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3061{
3062
3063	gmap_enable(vcpu->arch.enabled_gmap);
3064	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3065	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3066		__start_cpu_timer_accounting(vcpu);
3067	vcpu->cpu = cpu;
3068}
3069
3070void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3071{
3072	vcpu->cpu = -1;
3073	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3074		__stop_cpu_timer_accounting(vcpu);
3075	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3076	vcpu->arch.enabled_gmap = gmap_get_enabled();
3077	gmap_disable(vcpu->arch.enabled_gmap);
3078
3079}
3080
3081void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3082{
3083	mutex_lock(&vcpu->kvm->lock);
3084	preempt_disable();
3085	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3086	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3087	preempt_enable();
3088	mutex_unlock(&vcpu->kvm->lock);
3089	if (!kvm_is_ucontrol(vcpu->kvm)) {
3090		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3091		sca_add_vcpu(vcpu);
3092	}
3093	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3094		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3095	/* make vcpu_load load the right gmap on the first trigger */
3096	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3097}
3098
3099static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3100{
3101	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3102	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3103		return true;
3104	return false;
3105}
3106
3107static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3108{
3109	/* At least one ECC subfunction must be present */
3110	return kvm_has_pckmo_subfunc(kvm, 32) ||
3111	       kvm_has_pckmo_subfunc(kvm, 33) ||
3112	       kvm_has_pckmo_subfunc(kvm, 34) ||
3113	       kvm_has_pckmo_subfunc(kvm, 40) ||
3114	       kvm_has_pckmo_subfunc(kvm, 41);
3115
3116}
3117
3118static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3119{
3120	/*
3121	 * If the AP instructions are not being interpreted and the MSAX3
3122	 * facility is not configured for the guest, there is nothing to set up.
3123	 */
3124	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3125		return;
3126
3127	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3128	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3129	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3130	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3131
3132	if (vcpu->kvm->arch.crypto.apie)
3133		vcpu->arch.sie_block->eca |= ECA_APIE;
3134
3135	/* Set up protected key support */
3136	if (vcpu->kvm->arch.crypto.aes_kw) {
3137		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3138		/* ecc is also wrapped with AES key */
3139		if (kvm_has_pckmo_ecc(vcpu->kvm))
3140			vcpu->arch.sie_block->ecd |= ECD_ECC;
3141	}
3142
3143	if (vcpu->kvm->arch.crypto.dea_kw)
3144		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3145}
3146
3147void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3148{
3149	free_page(vcpu->arch.sie_block->cbrlo);
3150	vcpu->arch.sie_block->cbrlo = 0;
3151}
3152
3153int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3154{
3155	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3156	if (!vcpu->arch.sie_block->cbrlo)
3157		return -ENOMEM;
3158	return 0;
3159}
3160
3161static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3162{
3163	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3164
3165	vcpu->arch.sie_block->ibc = model->ibc;
3166	if (test_kvm_facility(vcpu->kvm, 7))
3167		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3168}
3169
3170static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3171{
3172	int rc = 0;
3173	u16 uvrc, uvrrc;
3174
3175	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3176						    CPUSTAT_SM |
3177						    CPUSTAT_STOPPED);
3178
3179	if (test_kvm_facility(vcpu->kvm, 78))
3180		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3181	else if (test_kvm_facility(vcpu->kvm, 8))
3182		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3183
3184	kvm_s390_vcpu_setup_model(vcpu);
3185
3186	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3187	if (MACHINE_HAS_ESOP)
3188		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3189	if (test_kvm_facility(vcpu->kvm, 9))
3190		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3191	if (test_kvm_facility(vcpu->kvm, 73))
3192		vcpu->arch.sie_block->ecb |= ECB_TE;
3193
3194	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3195		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3196	if (test_kvm_facility(vcpu->kvm, 130))
3197		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3198	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3199	if (sclp.has_cei)
3200		vcpu->arch.sie_block->eca |= ECA_CEI;
3201	if (sclp.has_ib)
3202		vcpu->arch.sie_block->eca |= ECA_IB;
3203	if (sclp.has_siif)
3204		vcpu->arch.sie_block->eca |= ECA_SII;
3205	if (sclp.has_sigpif)
3206		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3207	if (test_kvm_facility(vcpu->kvm, 129)) {
3208		vcpu->arch.sie_block->eca |= ECA_VX;
3209		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3210	}
3211	if (test_kvm_facility(vcpu->kvm, 139))
3212		vcpu->arch.sie_block->ecd |= ECD_MEF;
3213	if (test_kvm_facility(vcpu->kvm, 156))
3214		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3215	if (vcpu->arch.sie_block->gd) {
3216		vcpu->arch.sie_block->eca |= ECA_AIV;
3217		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3218			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3219	}
3220	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3221					| SDNXC;
3222	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3223
3224	if (sclp.has_kss)
3225		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3226	else
3227		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3228
3229	if (vcpu->kvm->arch.use_cmma) {
3230		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3231		if (rc)
3232			return rc;
3233	}
3234	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3235	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3236
3237	vcpu->arch.sie_block->hpid = HPID_KVM;
3238
3239	kvm_s390_vcpu_crypto_setup(vcpu);
3240
3241	mutex_lock(&vcpu->kvm->lock);
3242	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3243		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3244		if (rc)
3245			kvm_s390_vcpu_unsetup_cmma(vcpu);
3246	}
3247	mutex_unlock(&vcpu->kvm->lock);
3248
3249	return rc;
3250}
3251
3252int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3253{
3254	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3255		return -EINVAL;
3256	return 0;
3257}
3258
3259int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3260{
3261	struct sie_page *sie_page;
3262	int rc;
3263
3264	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3265	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3266	if (!sie_page)
3267		return -ENOMEM;
3268
3269	vcpu->arch.sie_block = &sie_page->sie_block;
3270	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3271
3272	/* the real guest size will always be smaller than msl */
3273	vcpu->arch.sie_block->mso = 0;
3274	vcpu->arch.sie_block->msl = sclp.hamax;
3275
3276	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3277	spin_lock_init(&vcpu->arch.local_int.lock);
3278	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3279	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3280		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3281	seqcount_init(&vcpu->arch.cputm_seqcount);
3282
3283	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3284	kvm_clear_async_pf_completion_queue(vcpu);
3285	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3286				    KVM_SYNC_GPRS |
3287				    KVM_SYNC_ACRS |
3288				    KVM_SYNC_CRS |
3289				    KVM_SYNC_ARCH0 |
3290				    KVM_SYNC_PFAULT |
3291				    KVM_SYNC_DIAG318;
3292	kvm_s390_set_prefix(vcpu, 0);
3293	if (test_kvm_facility(vcpu->kvm, 64))
3294		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3295	if (test_kvm_facility(vcpu->kvm, 82))
3296		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3297	if (test_kvm_facility(vcpu->kvm, 133))
3298		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3299	if (test_kvm_facility(vcpu->kvm, 156))
3300		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3301	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3302	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3303	 */
3304	if (MACHINE_HAS_VX)
3305		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3306	else
3307		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3308
3309	if (kvm_is_ucontrol(vcpu->kvm)) {
3310		rc = __kvm_ucontrol_vcpu_init(vcpu);
3311		if (rc)
3312			goto out_free_sie_block;
3313	}
3314
3315	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3316		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3317	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3318
3319	rc = kvm_s390_vcpu_setup(vcpu);
3320	if (rc)
3321		goto out_ucontrol_uninit;
3322	return 0;
3323
3324out_ucontrol_uninit:
3325	if (kvm_is_ucontrol(vcpu->kvm))
3326		gmap_remove(vcpu->arch.gmap);
3327out_free_sie_block:
3328	free_page((unsigned long)(vcpu->arch.sie_block));
3329	return rc;
3330}
3331
3332int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3333{
3334	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3335	return kvm_s390_vcpu_has_irq(vcpu, 0);
3336}
3337
3338bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3339{
3340	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3341}
3342
3343void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3344{
3345	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3346	exit_sie(vcpu);
3347}
3348
3349void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3350{
3351	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3352}
3353
3354static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3355{
3356	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3357	exit_sie(vcpu);
3358}
3359
3360bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3361{
3362	return atomic_read(&vcpu->arch.sie_block->prog20) &
3363	       (PROG_BLOCK_SIE | PROG_REQUEST);
3364}
3365
3366static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3367{
3368	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3369}
3370
3371/*
3372 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3373 * If the CPU is not running (e.g. waiting as idle) the function will
3374 * return immediately. */
3375void exit_sie(struct kvm_vcpu *vcpu)
3376{
3377	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3378	kvm_s390_vsie_kick(vcpu);
3379	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3380		cpu_relax();
3381}
3382
3383/* Kick a guest cpu out of SIE to process a request synchronously */
3384void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3385{
3386	kvm_make_request(req, vcpu);
3387	kvm_s390_vcpu_request(vcpu);
3388}
3389
3390static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3391			      unsigned long end)
3392{
3393	struct kvm *kvm = gmap->private;
3394	struct kvm_vcpu *vcpu;
3395	unsigned long prefix;
3396	int i;
3397
3398	if (gmap_is_shadow(gmap))
3399		return;
3400	if (start >= 1UL << 31)
3401		/* We are only interested in prefix pages */
3402		return;
3403	kvm_for_each_vcpu(i, vcpu, kvm) {
3404		/* match against both prefix pages */
3405		prefix = kvm_s390_get_prefix(vcpu);
3406		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3407			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3408				   start, end);
3409			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3410		}
3411	}
3412}
3413
3414bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3415{
3416	/* do not poll with more than halt_poll_max_steal percent of steal time */
3417	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3418	    halt_poll_max_steal) {
3419		vcpu->stat.halt_no_poll_steal++;
3420		return true;
3421	}
3422	return false;
3423}
3424
3425int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3426{
3427	/* kvm common code refers to this, but never calls it */
3428	BUG();
3429	return 0;
3430}
3431
3432static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3433					   struct kvm_one_reg *reg)
3434{
3435	int r = -EINVAL;
3436
3437	switch (reg->id) {
3438	case KVM_REG_S390_TODPR:
3439		r = put_user(vcpu->arch.sie_block->todpr,
3440			     (u32 __user *)reg->addr);
3441		break;
3442	case KVM_REG_S390_EPOCHDIFF:
3443		r = put_user(vcpu->arch.sie_block->epoch,
3444			     (u64 __user *)reg->addr);
3445		break;
3446	case KVM_REG_S390_CPU_TIMER:
3447		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3448			     (u64 __user *)reg->addr);
3449		break;
3450	case KVM_REG_S390_CLOCK_COMP:
3451		r = put_user(vcpu->arch.sie_block->ckc,
3452			     (u64 __user *)reg->addr);
3453		break;
3454	case KVM_REG_S390_PFTOKEN:
3455		r = put_user(vcpu->arch.pfault_token,
3456			     (u64 __user *)reg->addr);
3457		break;
3458	case KVM_REG_S390_PFCOMPARE:
3459		r = put_user(vcpu->arch.pfault_compare,
3460			     (u64 __user *)reg->addr);
3461		break;
3462	case KVM_REG_S390_PFSELECT:
3463		r = put_user(vcpu->arch.pfault_select,
3464			     (u64 __user *)reg->addr);
3465		break;
3466	case KVM_REG_S390_PP:
3467		r = put_user(vcpu->arch.sie_block->pp,
3468			     (u64 __user *)reg->addr);
3469		break;
3470	case KVM_REG_S390_GBEA:
3471		r = put_user(vcpu->arch.sie_block->gbea,
3472			     (u64 __user *)reg->addr);
3473		break;
3474	default:
3475		break;
3476	}
3477
3478	return r;
3479}
3480
3481static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3482					   struct kvm_one_reg *reg)
3483{
3484	int r = -EINVAL;
3485	__u64 val;
3486
3487	switch (reg->id) {
3488	case KVM_REG_S390_TODPR:
3489		r = get_user(vcpu->arch.sie_block->todpr,
3490			     (u32 __user *)reg->addr);
3491		break;
3492	case KVM_REG_S390_EPOCHDIFF:
3493		r = get_user(vcpu->arch.sie_block->epoch,
3494			     (u64 __user *)reg->addr);
3495		break;
3496	case KVM_REG_S390_CPU_TIMER:
3497		r = get_user(val, (u64 __user *)reg->addr);
3498		if (!r)
3499			kvm_s390_set_cpu_timer(vcpu, val);
3500		break;
3501	case KVM_REG_S390_CLOCK_COMP:
3502		r = get_user(vcpu->arch.sie_block->ckc,
3503			     (u64 __user *)reg->addr);
3504		break;
3505	case KVM_REG_S390_PFTOKEN:
3506		r = get_user(vcpu->arch.pfault_token,
3507			     (u64 __user *)reg->addr);
3508		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3509			kvm_clear_async_pf_completion_queue(vcpu);
3510		break;
3511	case KVM_REG_S390_PFCOMPARE:
3512		r = get_user(vcpu->arch.pfault_compare,
3513			     (u64 __user *)reg->addr);
3514		break;
3515	case KVM_REG_S390_PFSELECT:
3516		r = get_user(vcpu->arch.pfault_select,
3517			     (u64 __user *)reg->addr);
3518		break;
3519	case KVM_REG_S390_PP:
3520		r = get_user(vcpu->arch.sie_block->pp,
3521			     (u64 __user *)reg->addr);
3522		break;
3523	case KVM_REG_S390_GBEA:
3524		r = get_user(vcpu->arch.sie_block->gbea,
3525			     (u64 __user *)reg->addr);
3526		break;
3527	default:
3528		break;
3529	}
3530
3531	return r;
3532}
3533
3534static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3535{
3536	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3537	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3538	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3539
3540	kvm_clear_async_pf_completion_queue(vcpu);
3541	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3542		kvm_s390_vcpu_stop(vcpu);
3543	kvm_s390_clear_local_irqs(vcpu);
3544}
3545
3546static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3547{
3548	/* Initial reset is a superset of the normal reset */
3549	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3550
3551	/*
3552	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3553	 * We do not only reset the internal data, but also ...
3554	 */
3555	vcpu->arch.sie_block->gpsw.mask = 0;
3556	vcpu->arch.sie_block->gpsw.addr = 0;
3557	kvm_s390_set_prefix(vcpu, 0);
3558	kvm_s390_set_cpu_timer(vcpu, 0);
3559	vcpu->arch.sie_block->ckc = 0;
3560	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3561	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3562	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3563
3564	/* ... the data in sync regs */
3565	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3566	vcpu->run->s.regs.ckc = 0;
3567	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3568	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3569	vcpu->run->psw_addr = 0;
3570	vcpu->run->psw_mask = 0;
3571	vcpu->run->s.regs.todpr = 0;
3572	vcpu->run->s.regs.cputm = 0;
3573	vcpu->run->s.regs.ckc = 0;
3574	vcpu->run->s.regs.pp = 0;
3575	vcpu->run->s.regs.gbea = 1;
3576	vcpu->run->s.regs.fpc = 0;
3577	/*
3578	 * Do not reset these registers in the protected case, as some of
3579	 * them are overlayed and they are not accessible in this case
3580	 * anyway.
3581	 */
3582	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3583		vcpu->arch.sie_block->gbea = 1;
3584		vcpu->arch.sie_block->pp = 0;
3585		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3586		vcpu->arch.sie_block->todpr = 0;
3587	}
3588}
3589
3590static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3591{
3592	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3593
3594	/* Clear reset is a superset of the initial reset */
3595	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3596
3597	memset(&regs->gprs, 0, sizeof(regs->gprs));
3598	memset(&regs->vrs, 0, sizeof(regs->vrs));
3599	memset(&regs->acrs, 0, sizeof(regs->acrs));
3600	memset(&regs->gscb, 0, sizeof(regs->gscb));
3601
3602	regs->etoken = 0;
3603	regs->etoken_extension = 0;
3604}
3605
3606int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3607{
3608	vcpu_load(vcpu);
3609	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3610	vcpu_put(vcpu);
3611	return 0;
3612}
3613
3614int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3615{
3616	vcpu_load(vcpu);
3617	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3618	vcpu_put(vcpu);
3619	return 0;
3620}
3621
3622int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3623				  struct kvm_sregs *sregs)
3624{
3625	vcpu_load(vcpu);
3626
3627	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3628	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3629
3630	vcpu_put(vcpu);
3631	return 0;
3632}
3633
3634int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3635				  struct kvm_sregs *sregs)
3636{
3637	vcpu_load(vcpu);
3638
3639	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3640	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3641
3642	vcpu_put(vcpu);
3643	return 0;
3644}
3645
3646int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3647{
3648	int ret = 0;
3649
3650	vcpu_load(vcpu);
3651
3652	vcpu->run->s.regs.fpc = fpu->fpc;
3653	if (MACHINE_HAS_VX)
3654		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3655				 (freg_t *) fpu->fprs);
3656	else
3657		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3658
3659	vcpu_put(vcpu);
3660	return ret;
3661}
3662
3663int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3664{
3665	vcpu_load(vcpu);
3666
3667	/* make sure we have the latest values */
3668	save_fpu_regs();
3669	if (MACHINE_HAS_VX)
3670		convert_vx_to_fp((freg_t *) fpu->fprs,
3671				 (__vector128 *) vcpu->run->s.regs.vrs);
3672	else
3673		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3674	fpu->fpc = vcpu->run->s.regs.fpc;
3675
3676	vcpu_put(vcpu);
3677	return 0;
3678}
3679
3680static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3681{
3682	int rc = 0;
3683
3684	if (!is_vcpu_stopped(vcpu))
3685		rc = -EBUSY;
3686	else {
3687		vcpu->run->psw_mask = psw.mask;
3688		vcpu->run->psw_addr = psw.addr;
3689	}
3690	return rc;
3691}
3692
3693int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3694				  struct kvm_translation *tr)
3695{
3696	return -EINVAL; /* not implemented yet */
3697}
3698
3699#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3700			      KVM_GUESTDBG_USE_HW_BP | \
3701			      KVM_GUESTDBG_ENABLE)
3702
3703int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3704					struct kvm_guest_debug *dbg)
3705{
3706	int rc = 0;
3707
3708	vcpu_load(vcpu);
3709
3710	vcpu->guest_debug = 0;
3711	kvm_s390_clear_bp_data(vcpu);
3712
3713	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3714		rc = -EINVAL;
3715		goto out;
3716	}
3717	if (!sclp.has_gpere) {
3718		rc = -EINVAL;
3719		goto out;
3720	}
3721
3722	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3723		vcpu->guest_debug = dbg->control;
3724		/* enforce guest PER */
3725		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3726
3727		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3728			rc = kvm_s390_import_bp_data(vcpu, dbg);
3729	} else {
3730		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3731		vcpu->arch.guestdbg.last_bp = 0;
3732	}
3733
3734	if (rc) {
3735		vcpu->guest_debug = 0;
3736		kvm_s390_clear_bp_data(vcpu);
3737		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3738	}
3739
3740out:
3741	vcpu_put(vcpu);
3742	return rc;
3743}
3744
3745int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3746				    struct kvm_mp_state *mp_state)
3747{
3748	int ret;
3749
3750	vcpu_load(vcpu);
3751
3752	/* CHECK_STOP and LOAD are not supported yet */
3753	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3754				      KVM_MP_STATE_OPERATING;
3755
3756	vcpu_put(vcpu);
3757	return ret;
3758}
3759
3760int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3761				    struct kvm_mp_state *mp_state)
3762{
3763	int rc = 0;
3764
3765	vcpu_load(vcpu);
3766
3767	/* user space knows about this interface - let it control the state */
3768	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3769
3770	switch (mp_state->mp_state) {
3771	case KVM_MP_STATE_STOPPED:
3772		rc = kvm_s390_vcpu_stop(vcpu);
3773		break;
3774	case KVM_MP_STATE_OPERATING:
3775		rc = kvm_s390_vcpu_start(vcpu);
3776		break;
3777	case KVM_MP_STATE_LOAD:
3778		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3779			rc = -ENXIO;
3780			break;
3781		}
3782		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3783		break;
3784	case KVM_MP_STATE_CHECK_STOP:
3785		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3786	default:
3787		rc = -ENXIO;
3788	}
3789
3790	vcpu_put(vcpu);
3791	return rc;
3792}
3793
3794static bool ibs_enabled(struct kvm_vcpu *vcpu)
3795{
3796	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3797}
3798
3799static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3800{
3801retry:
3802	kvm_s390_vcpu_request_handled(vcpu);
3803	if (!kvm_request_pending(vcpu))
3804		return 0;
3805	/*
3806	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3807	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3808	 * This ensures that the ipte instruction for this request has
3809	 * already finished. We might race against a second unmapper that
3810	 * wants to set the blocking bit. Lets just retry the request loop.
3811	 */
3812	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3813		int rc;
3814		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3815					  kvm_s390_get_prefix(vcpu),
3816					  PAGE_SIZE * 2, PROT_WRITE);
3817		if (rc) {
3818			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3819			return rc;
3820		}
3821		goto retry;
3822	}
3823
3824	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3825		vcpu->arch.sie_block->ihcpu = 0xffff;
3826		goto retry;
3827	}
3828
3829	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3830		if (!ibs_enabled(vcpu)) {
3831			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3832			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3833		}
3834		goto retry;
3835	}
3836
3837	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3838		if (ibs_enabled(vcpu)) {
3839			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3840			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3841		}
3842		goto retry;
3843	}
3844
3845	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3846		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3847		goto retry;
3848	}
3849
3850	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3851		/*
3852		 * Disable CMM virtualization; we will emulate the ESSA
3853		 * instruction manually, in order to provide additional
3854		 * functionalities needed for live migration.
3855		 */
3856		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3857		goto retry;
3858	}
3859
3860	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3861		/*
3862		 * Re-enable CMM virtualization if CMMA is available and
3863		 * CMM has been used.
3864		 */
3865		if ((vcpu->kvm->arch.use_cmma) &&
3866		    (vcpu->kvm->mm->context.uses_cmm))
3867			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3868		goto retry;
3869	}
3870
3871	/* nothing to do, just clear the request */
3872	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3873	/* we left the vsie handler, nothing to do, just clear the request */
3874	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3875
3876	return 0;
3877}
3878
3879static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3880{
3881	struct kvm_vcpu *vcpu;
3882	struct kvm_s390_tod_clock_ext htod;
3883	int i;
3884
3885	preempt_disable();
3886
3887	get_tod_clock_ext((char *)&htod);
3888
3889	kvm->arch.epoch = gtod->tod - htod.tod;
3890	kvm->arch.epdx = 0;
3891	if (test_kvm_facility(kvm, 139)) {
3892		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3893		if (kvm->arch.epoch > gtod->tod)
3894			kvm->arch.epdx -= 1;
3895	}
3896
3897	kvm_s390_vcpu_block_all(kvm);
3898	kvm_for_each_vcpu(i, vcpu, kvm) {
3899		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3900		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3901	}
3902
3903	kvm_s390_vcpu_unblock_all(kvm);
3904	preempt_enable();
3905}
3906
3907int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3908{
3909	if (!mutex_trylock(&kvm->lock))
3910		return 0;
3911	__kvm_s390_set_tod_clock(kvm, gtod);
3912	mutex_unlock(&kvm->lock);
3913	return 1;
3914}
3915
3916/**
3917 * kvm_arch_fault_in_page - fault-in guest page if necessary
3918 * @vcpu: The corresponding virtual cpu
3919 * @gpa: Guest physical address
3920 * @writable: Whether the page should be writable or not
3921 *
3922 * Make sure that a guest page has been faulted-in on the host.
3923 *
3924 * Return: Zero on success, negative error code otherwise.
3925 */
3926long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3927{
3928	return gmap_fault(vcpu->arch.gmap, gpa,
3929			  writable ? FAULT_FLAG_WRITE : 0);
3930}
3931
3932static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3933				      unsigned long token)
3934{
3935	struct kvm_s390_interrupt inti;
3936	struct kvm_s390_irq irq;
3937
3938	if (start_token) {
3939		irq.u.ext.ext_params2 = token;
3940		irq.type = KVM_S390_INT_PFAULT_INIT;
3941		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3942	} else {
3943		inti.type = KVM_S390_INT_PFAULT_DONE;
3944		inti.parm64 = token;
3945		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3946	}
3947}
3948
3949bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3950				     struct kvm_async_pf *work)
3951{
3952	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3953	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3954
3955	return true;
3956}
3957
3958void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3959				 struct kvm_async_pf *work)
3960{
3961	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3962	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3963}
3964
3965void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3966			       struct kvm_async_pf *work)
3967{
3968	/* s390 will always inject the page directly */
3969}
3970
3971bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3972{
3973	/*
3974	 * s390 will always inject the page directly,
3975	 * but we still want check_async_completion to cleanup
3976	 */
3977	return true;
3978}
3979
3980static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3981{
3982	hva_t hva;
3983	struct kvm_arch_async_pf arch;
3984
3985	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3986		return false;
3987	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3988	    vcpu->arch.pfault_compare)
3989		return false;
3990	if (psw_extint_disabled(vcpu))
3991		return false;
3992	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3993		return false;
3994	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3995		return false;
3996	if (!vcpu->arch.gmap->pfault_enabled)
3997		return false;
3998
3999	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4000	hva += current->thread.gmap_addr & ~PAGE_MASK;
4001	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4002		return false;
4003
4004	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4005}
4006
4007static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4008{
4009	int rc, cpuflags;
4010
4011	/*
4012	 * On s390 notifications for arriving pages will be delivered directly
4013	 * to the guest but the house keeping for completed pfaults is
4014	 * handled outside the worker.
4015	 */
4016	kvm_check_async_pf_completion(vcpu);
4017
4018	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4019	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4020
4021	if (need_resched())
4022		schedule();
4023
4024	if (!kvm_is_ucontrol(vcpu->kvm)) {
4025		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4026		if (rc)
4027			return rc;
4028	}
4029
4030	rc = kvm_s390_handle_requests(vcpu);
4031	if (rc)
4032		return rc;
4033
4034	if (guestdbg_enabled(vcpu)) {
4035		kvm_s390_backup_guest_per_regs(vcpu);
4036		kvm_s390_patch_guest_per_regs(vcpu);
4037	}
4038
4039	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4040
4041	vcpu->arch.sie_block->icptcode = 0;
4042	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4043	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4044	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4045
4046	return 0;
4047}
4048
4049static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4050{
4051	struct kvm_s390_pgm_info pgm_info = {
4052		.code = PGM_ADDRESSING,
4053	};
4054	u8 opcode, ilen;
4055	int rc;
4056
4057	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4058	trace_kvm_s390_sie_fault(vcpu);
4059
4060	/*
4061	 * We want to inject an addressing exception, which is defined as a
4062	 * suppressing or terminating exception. However, since we came here
4063	 * by a DAT access exception, the PSW still points to the faulting
4064	 * instruction since DAT exceptions are nullifying. So we've got
4065	 * to look up the current opcode to get the length of the instruction
4066	 * to be able to forward the PSW.
4067	 */
4068	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4069	ilen = insn_length(opcode);
4070	if (rc < 0) {
4071		return rc;
4072	} else if (rc) {
4073		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4074		 * Forward by arbitrary ilc, injection will take care of
4075		 * nullification if necessary.
4076		 */
4077		pgm_info = vcpu->arch.pgm;
4078		ilen = 4;
4079	}
4080	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4081	kvm_s390_forward_psw(vcpu, ilen);
4082	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4083}
4084
4085static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4086{
4087	struct mcck_volatile_info *mcck_info;
4088	struct sie_page *sie_page;
4089
4090	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4091		   vcpu->arch.sie_block->icptcode);
4092	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4093
4094	if (guestdbg_enabled(vcpu))
4095		kvm_s390_restore_guest_per_regs(vcpu);
4096
4097	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4098	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4099
4100	if (exit_reason == -EINTR) {
4101		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4102		sie_page = container_of(vcpu->arch.sie_block,
4103					struct sie_page, sie_block);
4104		mcck_info = &sie_page->mcck_info;
4105		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4106		return 0;
4107	}
4108
4109	if (vcpu->arch.sie_block->icptcode > 0) {
4110		int rc = kvm_handle_sie_intercept(vcpu);
4111
4112		if (rc != -EOPNOTSUPP)
4113			return rc;
4114		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4115		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4116		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4117		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4118		return -EREMOTE;
4119	} else if (exit_reason != -EFAULT) {
4120		vcpu->stat.exit_null++;
4121		return 0;
4122	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4123		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4124		vcpu->run->s390_ucontrol.trans_exc_code =
4125						current->thread.gmap_addr;
4126		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4127		return -EREMOTE;
4128	} else if (current->thread.gmap_pfault) {
4129		trace_kvm_s390_major_guest_pfault(vcpu);
4130		current->thread.gmap_pfault = 0;
4131		if (kvm_arch_setup_async_pf(vcpu))
4132			return 0;
4133		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4134	}
4135	return vcpu_post_run_fault_in_sie(vcpu);
4136}
4137
4138#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4139static int __vcpu_run(struct kvm_vcpu *vcpu)
4140{
4141	int rc, exit_reason;
4142	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4143
4144	/*
4145	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4146	 * ning the guest), so that memslots (and other stuff) are protected
4147	 */
4148	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4149
4150	do {
4151		rc = vcpu_pre_run(vcpu);
4152		if (rc)
4153			break;
4154
4155		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4156		/*
4157		 * As PF_VCPU will be used in fault handler, between
4158		 * guest_enter and guest_exit should be no uaccess.
4159		 */
4160		local_irq_disable();
4161		guest_enter_irqoff();
4162		__disable_cpu_timer_accounting(vcpu);
4163		local_irq_enable();
4164		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4165			memcpy(sie_page->pv_grregs,
4166			       vcpu->run->s.regs.gprs,
4167			       sizeof(sie_page->pv_grregs));
4168		}
4169		exit_reason = sie64a(vcpu->arch.sie_block,
4170				     vcpu->run->s.regs.gprs);
4171		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4172			memcpy(vcpu->run->s.regs.gprs,
4173			       sie_page->pv_grregs,
4174			       sizeof(sie_page->pv_grregs));
4175			/*
4176			 * We're not allowed to inject interrupts on intercepts
4177			 * that leave the guest state in an "in-between" state
4178			 * where the next SIE entry will do a continuation.
4179			 * Fence interrupts in our "internal" PSW.
4180			 */
4181			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4182			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4183				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4184			}
4185		}
4186		local_irq_disable();
4187		__enable_cpu_timer_accounting(vcpu);
4188		guest_exit_irqoff();
4189		local_irq_enable();
4190		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4191
4192		rc = vcpu_post_run(vcpu, exit_reason);
4193	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4194
4195	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4196	return rc;
4197}
4198
4199static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4200{
4201	struct kvm_run *kvm_run = vcpu->run;
4202	struct runtime_instr_cb *riccb;
4203	struct gs_cb *gscb;
4204
4205	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4206	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4207	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4208	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4209	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4210		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4211		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4212		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4213	}
4214	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4215		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4216		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4217		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4218		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4219			kvm_clear_async_pf_completion_queue(vcpu);
4220	}
4221	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4222		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4223		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4224	}
4225	/*
4226	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4227	 * we should enable RI here instead of doing the lazy enablement.
4228	 */
4229	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4230	    test_kvm_facility(vcpu->kvm, 64) &&
4231	    riccb->v &&
4232	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4233		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4234		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4235	}
4236	/*
4237	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4238	 * we should enable GS here instead of doing the lazy enablement.
4239	 */
4240	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4241	    test_kvm_facility(vcpu->kvm, 133) &&
4242	    gscb->gssm &&
4243	    !vcpu->arch.gs_enabled) {
4244		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4245		vcpu->arch.sie_block->ecb |= ECB_GS;
4246		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4247		vcpu->arch.gs_enabled = 1;
4248	}
4249	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4250	    test_kvm_facility(vcpu->kvm, 82)) {
4251		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4252		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4253	}
4254	if (MACHINE_HAS_GS) {
4255		preempt_disable();
4256		__ctl_set_bit(2, 4);
4257		if (current->thread.gs_cb) {
4258			vcpu->arch.host_gscb = current->thread.gs_cb;
4259			save_gs_cb(vcpu->arch.host_gscb);
4260		}
4261		if (vcpu->arch.gs_enabled) {
4262			current->thread.gs_cb = (struct gs_cb *)
4263						&vcpu->run->s.regs.gscb;
4264			restore_gs_cb(current->thread.gs_cb);
4265		}
4266		preempt_enable();
4267	}
4268	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4269}
4270
4271static void sync_regs(struct kvm_vcpu *vcpu)
4272{
4273	struct kvm_run *kvm_run = vcpu->run;
4274
4275	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4276		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4277	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4278		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4279		/* some control register changes require a tlb flush */
4280		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4281	}
4282	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4283		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4284		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4285	}
4286	save_access_regs(vcpu->arch.host_acrs);
4287	restore_access_regs(vcpu->run->s.regs.acrs);
4288	/* save host (userspace) fprs/vrs */
4289	save_fpu_regs();
4290	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4291	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4292	if (MACHINE_HAS_VX)
4293		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4294	else
4295		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4296	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4297	if (test_fp_ctl(current->thread.fpu.fpc))
4298		/* User space provided an invalid FPC, let's clear it */
4299		current->thread.fpu.fpc = 0;
4300
4301	/* Sync fmt2 only data */
4302	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4303		sync_regs_fmt2(vcpu);
4304	} else {
4305		/*
4306		 * In several places we have to modify our internal view to
4307		 * not do things that are disallowed by the ultravisor. For
4308		 * example we must not inject interrupts after specific exits
4309		 * (e.g. 112 prefix page not secure). We do this by turning
4310		 * off the machine check, external and I/O interrupt bits
4311		 * of our PSW copy. To avoid getting validity intercepts, we
4312		 * do only accept the condition code from userspace.
4313		 */
4314		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4315		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4316						   PSW_MASK_CC;
4317	}
4318
4319	kvm_run->kvm_dirty_regs = 0;
4320}
4321
4322static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4323{
4324	struct kvm_run *kvm_run = vcpu->run;
4325
4326	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4327	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4328	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4329	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4330	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4331	if (MACHINE_HAS_GS) {
4332		preempt_disable();
4333		__ctl_set_bit(2, 4);
4334		if (vcpu->arch.gs_enabled)
4335			save_gs_cb(current->thread.gs_cb);
4336		current->thread.gs_cb = vcpu->arch.host_gscb;
4337		restore_gs_cb(vcpu->arch.host_gscb);
4338		if (!vcpu->arch.host_gscb)
4339			__ctl_clear_bit(2, 4);
4340		vcpu->arch.host_gscb = NULL;
4341		preempt_enable();
4342	}
4343	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4344}
4345
4346static void store_regs(struct kvm_vcpu *vcpu)
4347{
4348	struct kvm_run *kvm_run = vcpu->run;
4349
4350	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4351	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4352	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4353	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4354	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4355	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4356	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4357	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4358	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4359	save_access_regs(vcpu->run->s.regs.acrs);
4360	restore_access_regs(vcpu->arch.host_acrs);
4361	/* Save guest register state */
4362	save_fpu_regs();
4363	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4364	/* Restore will be done lazily at return */
4365	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4366	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4367	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4368		store_regs_fmt2(vcpu);
4369}
4370
4371int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4372{
4373	struct kvm_run *kvm_run = vcpu->run;
4374	int rc;
4375
4376	if (kvm_run->immediate_exit)
4377		return -EINTR;
4378
4379	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4380	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4381		return -EINVAL;
4382
4383	vcpu_load(vcpu);
4384
4385	if (guestdbg_exit_pending(vcpu)) {
4386		kvm_s390_prepare_debug_exit(vcpu);
4387		rc = 0;
4388		goto out;
4389	}
4390
4391	kvm_sigset_activate(vcpu);
4392
4393	/*
4394	 * no need to check the return value of vcpu_start as it can only have
4395	 * an error for protvirt, but protvirt means user cpu state
4396	 */
4397	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4398		kvm_s390_vcpu_start(vcpu);
4399	} else if (is_vcpu_stopped(vcpu)) {
4400		pr_err_ratelimited("can't run stopped vcpu %d\n",
4401				   vcpu->vcpu_id);
4402		rc = -EINVAL;
4403		goto out;
4404	}
4405
4406	sync_regs(vcpu);
4407	enable_cpu_timer_accounting(vcpu);
4408
4409	might_fault();
4410	rc = __vcpu_run(vcpu);
4411
4412	if (signal_pending(current) && !rc) {
4413		kvm_run->exit_reason = KVM_EXIT_INTR;
4414		rc = -EINTR;
4415	}
4416
4417	if (guestdbg_exit_pending(vcpu) && !rc)  {
4418		kvm_s390_prepare_debug_exit(vcpu);
4419		rc = 0;
4420	}
4421
4422	if (rc == -EREMOTE) {
4423		/* userspace support is needed, kvm_run has been prepared */
4424		rc = 0;
4425	}
4426
4427	disable_cpu_timer_accounting(vcpu);
4428	store_regs(vcpu);
4429
4430	kvm_sigset_deactivate(vcpu);
4431
4432	vcpu->stat.exit_userspace++;
4433out:
4434	vcpu_put(vcpu);
4435	return rc;
4436}
4437
4438/*
4439 * store status at address
4440 * we use have two special cases:
4441 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4442 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4443 */
4444int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4445{
4446	unsigned char archmode = 1;
4447	freg_t fprs[NUM_FPRS];
4448	unsigned int px;
4449	u64 clkcomp, cputm;
4450	int rc;
4451
4452	px = kvm_s390_get_prefix(vcpu);
4453	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4454		if (write_guest_abs(vcpu, 163, &archmode, 1))
4455			return -EFAULT;
4456		gpa = 0;
4457	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4458		if (write_guest_real(vcpu, 163, &archmode, 1))
4459			return -EFAULT;
4460		gpa = px;
4461	} else
4462		gpa -= __LC_FPREGS_SAVE_AREA;
4463
4464	/* manually convert vector registers if necessary */
4465	if (MACHINE_HAS_VX) {
4466		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4467		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4468				     fprs, 128);
4469	} else {
4470		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4471				     vcpu->run->s.regs.fprs, 128);
4472	}
4473	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4474			      vcpu->run->s.regs.gprs, 128);
4475	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4476			      &vcpu->arch.sie_block->gpsw, 16);
4477	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4478			      &px, 4);
4479	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4480			      &vcpu->run->s.regs.fpc, 4);
4481	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4482			      &vcpu->arch.sie_block->todpr, 4);
4483	cputm = kvm_s390_get_cpu_timer(vcpu);
4484	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4485			      &cputm, 8);
4486	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4487	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4488			      &clkcomp, 8);
4489	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4490			      &vcpu->run->s.regs.acrs, 64);
4491	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4492			      &vcpu->arch.sie_block->gcr, 128);
4493	return rc ? -EFAULT : 0;
4494}
4495
4496int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4497{
4498	/*
4499	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4500	 * switch in the run ioctl. Let's update our copies before we save
4501	 * it into the save area
4502	 */
4503	save_fpu_regs();
4504	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4505	save_access_regs(vcpu->run->s.regs.acrs);
4506
4507	return kvm_s390_store_status_unloaded(vcpu, addr);
4508}
4509
4510static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4511{
4512	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4513	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4514}
4515
4516static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4517{
4518	unsigned int i;
4519	struct kvm_vcpu *vcpu;
4520
4521	kvm_for_each_vcpu(i, vcpu, kvm) {
4522		__disable_ibs_on_vcpu(vcpu);
4523	}
4524}
4525
4526static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4527{
4528	if (!sclp.has_ibs)
4529		return;
4530	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4531	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4532}
4533
4534int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4535{
4536	int i, online_vcpus, r = 0, started_vcpus = 0;
4537
4538	if (!is_vcpu_stopped(vcpu))
4539		return 0;
4540
4541	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4542	/* Only one cpu at a time may enter/leave the STOPPED state. */
4543	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4544	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4545
4546	/* Let's tell the UV that we want to change into the operating state */
4547	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4548		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4549		if (r) {
4550			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4551			return r;
4552		}
4553	}
4554
4555	for (i = 0; i < online_vcpus; i++) {
4556		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4557			started_vcpus++;
4558	}
4559
4560	if (started_vcpus == 0) {
4561		/* we're the only active VCPU -> speed it up */
4562		__enable_ibs_on_vcpu(vcpu);
4563	} else if (started_vcpus == 1) {
4564		/*
4565		 * As we are starting a second VCPU, we have to disable
4566		 * the IBS facility on all VCPUs to remove potentially
4567		 * oustanding ENABLE requests.
4568		 */
4569		__disable_ibs_on_all_vcpus(vcpu->kvm);
4570	}
4571
4572	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4573	/*
4574	 * The real PSW might have changed due to a RESTART interpreted by the
4575	 * ultravisor. We block all interrupts and let the next sie exit
4576	 * refresh our view.
4577	 */
4578	if (kvm_s390_pv_cpu_is_protected(vcpu))
4579		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4580	/*
4581	 * Another VCPU might have used IBS while we were offline.
4582	 * Let's play safe and flush the VCPU at startup.
4583	 */
4584	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4585	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4586	return 0;
4587}
4588
4589int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4590{
4591	int i, online_vcpus, r = 0, started_vcpus = 0;
4592	struct kvm_vcpu *started_vcpu = NULL;
4593
4594	if (is_vcpu_stopped(vcpu))
4595		return 0;
4596
4597	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4598	/* Only one cpu at a time may enter/leave the STOPPED state. */
4599	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4600	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4601
4602	/* Let's tell the UV that we want to change into the stopped state */
4603	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4604		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4605		if (r) {
4606			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4607			return r;
4608		}
4609	}
4610
4611	/*
4612	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4613	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4614	 * have been fully processed. This will ensure that the VCPU
4615	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4616	 */
4617	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4618	kvm_s390_clear_stop_irq(vcpu);
4619
4620	__disable_ibs_on_vcpu(vcpu);
4621
4622	for (i = 0; i < online_vcpus; i++) {
4623		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4624			started_vcpus++;
4625			started_vcpu = vcpu->kvm->vcpus[i];
4626		}
4627	}
4628
4629	if (started_vcpus == 1) {
4630		/*
4631		 * As we only have one VCPU left, we want to enable the
4632		 * IBS facility for that VCPU to speed it up.
4633		 */
4634		__enable_ibs_on_vcpu(started_vcpu);
4635	}
4636
4637	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4638	return 0;
4639}
4640
4641static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4642				     struct kvm_enable_cap *cap)
4643{
4644	int r;
4645
4646	if (cap->flags)
4647		return -EINVAL;
4648
4649	switch (cap->cap) {
4650	case KVM_CAP_S390_CSS_SUPPORT:
4651		if (!vcpu->kvm->arch.css_support) {
4652			vcpu->kvm->arch.css_support = 1;
4653			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4654			trace_kvm_s390_enable_css(vcpu->kvm);
4655		}
4656		r = 0;
4657		break;
4658	default:
4659		r = -EINVAL;
4660		break;
4661	}
4662	return r;
4663}
4664
4665static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4666				   struct kvm_s390_mem_op *mop)
4667{
4668	void __user *uaddr = (void __user *)mop->buf;
4669	int r = 0;
4670
4671	if (mop->flags || !mop->size)
4672		return -EINVAL;
4673	if (mop->size + mop->sida_offset < mop->size)
4674		return -EINVAL;
4675	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4676		return -E2BIG;
4677	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4678		return -EINVAL;
4679
4680	switch (mop->op) {
4681	case KVM_S390_MEMOP_SIDA_READ:
4682		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4683				 mop->sida_offset), mop->size))
4684			r = -EFAULT;
4685
4686		break;
4687	case KVM_S390_MEMOP_SIDA_WRITE:
4688		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4689				   mop->sida_offset), uaddr, mop->size))
4690			r = -EFAULT;
4691		break;
4692	}
4693	return r;
4694}
4695static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4696				  struct kvm_s390_mem_op *mop)
4697{
4698	void __user *uaddr = (void __user *)mop->buf;
4699	void *tmpbuf = NULL;
4700	int r = 0;
4701	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4702				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4703
4704	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4705		return -EINVAL;
4706
4707	if (mop->size > MEM_OP_MAX_SIZE)
4708		return -E2BIG;
4709
4710	if (kvm_s390_pv_cpu_is_protected(vcpu))
4711		return -EINVAL;
4712
4713	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4714		tmpbuf = vmalloc(mop->size);
4715		if (!tmpbuf)
4716			return -ENOMEM;
4717	}
4718
4719	switch (mop->op) {
4720	case KVM_S390_MEMOP_LOGICAL_READ:
4721		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4722			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4723					    mop->size, GACC_FETCH);
4724			break;
4725		}
4726		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4727		if (r == 0) {
4728			if (copy_to_user(uaddr, tmpbuf, mop->size))
4729				r = -EFAULT;
4730		}
4731		break;
4732	case KVM_S390_MEMOP_LOGICAL_WRITE:
4733		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4734			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4735					    mop->size, GACC_STORE);
4736			break;
4737		}
4738		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4739			r = -EFAULT;
4740			break;
4741		}
4742		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4743		break;
4744	}
4745
4746	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4747		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4748
4749	vfree(tmpbuf);
4750	return r;
4751}
4752
4753static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4754				      struct kvm_s390_mem_op *mop)
4755{
4756	int r, srcu_idx;
4757
4758	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4759
4760	switch (mop->op) {
4761	case KVM_S390_MEMOP_LOGICAL_READ:
4762	case KVM_S390_MEMOP_LOGICAL_WRITE:
4763		r = kvm_s390_guest_mem_op(vcpu, mop);
4764		break;
4765	case KVM_S390_MEMOP_SIDA_READ:
4766	case KVM_S390_MEMOP_SIDA_WRITE:
4767		/* we are locked against sida going away by the vcpu->mutex */
4768		r = kvm_s390_guest_sida_op(vcpu, mop);
4769		break;
4770	default:
4771		r = -EINVAL;
4772	}
4773
4774	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4775	return r;
4776}
4777
4778long kvm_arch_vcpu_async_ioctl(struct file *filp,
4779			       unsigned int ioctl, unsigned long arg)
4780{
4781	struct kvm_vcpu *vcpu = filp->private_data;
4782	void __user *argp = (void __user *)arg;
4783
4784	switch (ioctl) {
4785	case KVM_S390_IRQ: {
4786		struct kvm_s390_irq s390irq;
4787
4788		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4789			return -EFAULT;
4790		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4791	}
4792	case KVM_S390_INTERRUPT: {
4793		struct kvm_s390_interrupt s390int;
4794		struct kvm_s390_irq s390irq = {};
4795
4796		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4797			return -EFAULT;
4798		if (s390int_to_s390irq(&s390int, &s390irq))
4799			return -EINVAL;
4800		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4801	}
4802	}
4803	return -ENOIOCTLCMD;
4804}
4805
4806long kvm_arch_vcpu_ioctl(struct file *filp,
4807			 unsigned int ioctl, unsigned long arg)
4808{
4809	struct kvm_vcpu *vcpu = filp->private_data;
4810	void __user *argp = (void __user *)arg;
4811	int idx;
4812	long r;
4813	u16 rc, rrc;
4814
4815	vcpu_load(vcpu);
4816
4817	switch (ioctl) {
4818	case KVM_S390_STORE_STATUS:
4819		idx = srcu_read_lock(&vcpu->kvm->srcu);
4820		r = kvm_s390_store_status_unloaded(vcpu, arg);
4821		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4822		break;
4823	case KVM_S390_SET_INITIAL_PSW: {
4824		psw_t psw;
4825
4826		r = -EFAULT;
4827		if (copy_from_user(&psw, argp, sizeof(psw)))
4828			break;
4829		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4830		break;
4831	}
4832	case KVM_S390_CLEAR_RESET:
4833		r = 0;
4834		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4835		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4836			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4837					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4838			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4839				   rc, rrc);
4840		}
4841		break;
4842	case KVM_S390_INITIAL_RESET:
4843		r = 0;
4844		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4845		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4846			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4847					  UVC_CMD_CPU_RESET_INITIAL,
4848					  &rc, &rrc);
4849			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4850				   rc, rrc);
4851		}
4852		break;
4853	case KVM_S390_NORMAL_RESET:
4854		r = 0;
4855		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4856		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4857			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4858					  UVC_CMD_CPU_RESET, &rc, &rrc);
4859			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4860				   rc, rrc);
4861		}
4862		break;
4863	case KVM_SET_ONE_REG:
4864	case KVM_GET_ONE_REG: {
4865		struct kvm_one_reg reg;
4866		r = -EINVAL;
4867		if (kvm_s390_pv_cpu_is_protected(vcpu))
4868			break;
4869		r = -EFAULT;
4870		if (copy_from_user(&reg, argp, sizeof(reg)))
4871			break;
4872		if (ioctl == KVM_SET_ONE_REG)
4873			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4874		else
4875			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4876		break;
4877	}
4878#ifdef CONFIG_KVM_S390_UCONTROL
4879	case KVM_S390_UCAS_MAP: {
4880		struct kvm_s390_ucas_mapping ucasmap;
4881
4882		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4883			r = -EFAULT;
4884			break;
4885		}
4886
4887		if (!kvm_is_ucontrol(vcpu->kvm)) {
4888			r = -EINVAL;
4889			break;
4890		}
4891
4892		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4893				     ucasmap.vcpu_addr, ucasmap.length);
4894		break;
4895	}
4896	case KVM_S390_UCAS_UNMAP: {
4897		struct kvm_s390_ucas_mapping ucasmap;
4898
4899		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4900			r = -EFAULT;
4901			break;
4902		}
4903
4904		if (!kvm_is_ucontrol(vcpu->kvm)) {
4905			r = -EINVAL;
4906			break;
4907		}
4908
4909		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4910			ucasmap.length);
4911		break;
4912	}
4913#endif
4914	case KVM_S390_VCPU_FAULT: {
4915		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4916		break;
4917	}
4918	case KVM_ENABLE_CAP:
4919	{
4920		struct kvm_enable_cap cap;
4921		r = -EFAULT;
4922		if (copy_from_user(&cap, argp, sizeof(cap)))
4923			break;
4924		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4925		break;
4926	}
4927	case KVM_S390_MEM_OP: {
4928		struct kvm_s390_mem_op mem_op;
4929
4930		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4931			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4932		else
4933			r = -EFAULT;
4934		break;
4935	}
4936	case KVM_S390_SET_IRQ_STATE: {
4937		struct kvm_s390_irq_state irq_state;
4938
4939		r = -EFAULT;
4940		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4941			break;
4942		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4943		    irq_state.len == 0 ||
4944		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4945			r = -EINVAL;
4946			break;
4947		}
4948		/* do not use irq_state.flags, it will break old QEMUs */
4949		r = kvm_s390_set_irq_state(vcpu,
4950					   (void __user *) irq_state.buf,
4951					   irq_state.len);
4952		break;
4953	}
4954	case KVM_S390_GET_IRQ_STATE: {
4955		struct kvm_s390_irq_state irq_state;
4956
4957		r = -EFAULT;
4958		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4959			break;
4960		if (irq_state.len == 0) {
4961			r = -EINVAL;
4962			break;
4963		}
4964		/* do not use irq_state.flags, it will break old QEMUs */
4965		r = kvm_s390_get_irq_state(vcpu,
4966					   (__u8 __user *)  irq_state.buf,
4967					   irq_state.len);
4968		break;
4969	}
4970	default:
4971		r = -ENOTTY;
4972	}
4973
4974	vcpu_put(vcpu);
4975	return r;
4976}
4977
4978vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4979{
4980#ifdef CONFIG_KVM_S390_UCONTROL
4981	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4982		 && (kvm_is_ucontrol(vcpu->kvm))) {
4983		vmf->page = virt_to_page(vcpu->arch.sie_block);
4984		get_page(vmf->page);
4985		return 0;
4986	}
4987#endif
4988	return VM_FAULT_SIGBUS;
4989}
4990
4991/* Section: memory related */
4992int kvm_arch_prepare_memory_region(struct kvm *kvm,
4993				   struct kvm_memory_slot *memslot,
4994				   const struct kvm_userspace_memory_region *mem,
4995				   enum kvm_mr_change change)
4996{
4997	/* A few sanity checks. We can have memory slots which have to be
4998	   located/ended at a segment boundary (1MB). The memory in userland is
4999	   ok to be fragmented into various different vmas. It is okay to mmap()
5000	   and munmap() stuff in this slot after doing this call at any time */
5001
5002	if (mem->userspace_addr & 0xffffful)
5003		return -EINVAL;
5004
5005	if (mem->memory_size & 0xffffful)
5006		return -EINVAL;
5007
5008	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5009		return -EINVAL;
5010
5011	/* When we are protected, we should not change the memory slots */
5012	if (kvm_s390_pv_get_handle(kvm))
5013		return -EINVAL;
5014
5015	if (!kvm->arch.migration_mode)
5016		return 0;
5017
5018	/*
5019	 * Turn off migration mode when:
5020	 * - userspace creates a new memslot with dirty logging off,
5021	 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5022	 *   dirty logging is turned off.
5023	 * Migration mode expects dirty page logging being enabled to store
5024	 * its dirty bitmap.
5025	 */
5026	if (change != KVM_MR_DELETE &&
5027	    !(mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
5028		WARN(kvm_s390_vm_stop_migration(kvm),
5029		     "Failed to stop migration mode");
5030
5031	return 0;
5032}
5033
5034void kvm_arch_commit_memory_region(struct kvm *kvm,
5035				const struct kvm_userspace_memory_region *mem,
5036				struct kvm_memory_slot *old,
5037				const struct kvm_memory_slot *new,
5038				enum kvm_mr_change change)
5039{
5040	int rc = 0;
5041
5042	switch (change) {
5043	case KVM_MR_DELETE:
5044		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5045					old->npages * PAGE_SIZE);
5046		break;
5047	case KVM_MR_MOVE:
5048		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5049					old->npages * PAGE_SIZE);
5050		if (rc)
5051			break;
5052		fallthrough;
5053	case KVM_MR_CREATE:
5054		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5055				      mem->guest_phys_addr, mem->memory_size);
5056		break;
5057	case KVM_MR_FLAGS_ONLY:
5058		break;
5059	default:
5060		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5061	}
5062	if (rc)
5063		pr_warn("failed to commit memory region\n");
5064	return;
5065}
5066
5067static inline unsigned long nonhyp_mask(int i)
5068{
5069	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5070
5071	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5072}
5073
5074void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5075{
5076	vcpu->valid_wakeup = false;
5077}
5078
5079static int __init kvm_s390_init(void)
5080{
5081	int i;
5082
5083	if (!sclp.has_sief2) {
5084		pr_info("SIE is not available\n");
5085		return -ENODEV;
5086	}
5087
5088	if (nested && hpage) {
5089		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5090		return -EINVAL;
5091	}
5092
5093	for (i = 0; i < 16; i++)
5094		kvm_s390_fac_base[i] |=
5095			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5096
5097	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5098}
5099
5100static void __exit kvm_s390_exit(void)
5101{
5102	kvm_exit();
5103}
5104
5105module_init(kvm_s390_init);
5106module_exit(kvm_s390_exit);
5107
5108/*
5109 * Enable autoloading of the kvm module.
5110 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5111 * since x86 takes a different approach.
5112 */
5113#include <linux/miscdevice.h>
5114MODULE_ALIAS_MISCDEV(KVM_MINOR);
5115MODULE_ALIAS("devname:kvm");
5116