18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright IBM Corporation, 2018
48c2ecf20Sopenharmony_ci * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
58c2ecf20Sopenharmony_ci *	   Paul Mackerras <paulus@ozlabs.org>
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Description: KVM functions specific to running nested KVM-HV guests
88c2ecf20Sopenharmony_ci * on Book3S processors (specifically POWER9 and later).
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include <linux/kernel.h>
128c2ecf20Sopenharmony_ci#include <linux/kvm_host.h>
138c2ecf20Sopenharmony_ci#include <linux/llist.h>
148c2ecf20Sopenharmony_ci#include <linux/pgtable.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include <asm/kvm_ppc.h>
178c2ecf20Sopenharmony_ci#include <asm/kvm_book3s.h>
188c2ecf20Sopenharmony_ci#include <asm/mmu.h>
198c2ecf20Sopenharmony_ci#include <asm/pgalloc.h>
208c2ecf20Sopenharmony_ci#include <asm/pte-walk.h>
218c2ecf20Sopenharmony_ci#include <asm/reg.h>
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_cistatic struct patb_entry *pseries_partition_tb;
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_cistatic void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
268c2ecf20Sopenharmony_cistatic void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_civoid kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
298c2ecf20Sopenharmony_ci{
308c2ecf20Sopenharmony_ci	struct kvmppc_vcore *vc = vcpu->arch.vcore;
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci	hr->pcr = vc->pcr | PCR_MASK;
338c2ecf20Sopenharmony_ci	hr->dpdes = vc->dpdes;
348c2ecf20Sopenharmony_ci	hr->hfscr = vcpu->arch.hfscr;
358c2ecf20Sopenharmony_ci	hr->tb_offset = vc->tb_offset;
368c2ecf20Sopenharmony_ci	hr->dawr0 = vcpu->arch.dawr;
378c2ecf20Sopenharmony_ci	hr->dawrx0 = vcpu->arch.dawrx;
388c2ecf20Sopenharmony_ci	hr->ciabr = vcpu->arch.ciabr;
398c2ecf20Sopenharmony_ci	hr->purr = vcpu->arch.purr;
408c2ecf20Sopenharmony_ci	hr->spurr = vcpu->arch.spurr;
418c2ecf20Sopenharmony_ci	hr->ic = vcpu->arch.ic;
428c2ecf20Sopenharmony_ci	hr->vtb = vc->vtb;
438c2ecf20Sopenharmony_ci	hr->srr0 = vcpu->arch.shregs.srr0;
448c2ecf20Sopenharmony_ci	hr->srr1 = vcpu->arch.shregs.srr1;
458c2ecf20Sopenharmony_ci	hr->sprg[0] = vcpu->arch.shregs.sprg0;
468c2ecf20Sopenharmony_ci	hr->sprg[1] = vcpu->arch.shregs.sprg1;
478c2ecf20Sopenharmony_ci	hr->sprg[2] = vcpu->arch.shregs.sprg2;
488c2ecf20Sopenharmony_ci	hr->sprg[3] = vcpu->arch.shregs.sprg3;
498c2ecf20Sopenharmony_ci	hr->pidr = vcpu->arch.pid;
508c2ecf20Sopenharmony_ci	hr->cfar = vcpu->arch.cfar;
518c2ecf20Sopenharmony_ci	hr->ppr = vcpu->arch.ppr;
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
558c2ecf20Sopenharmony_cistatic noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
568c2ecf20Sopenharmony_ci{
578c2ecf20Sopenharmony_ci	unsigned long *addr = (unsigned long *) regs;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	for (; addr < ((unsigned long *) (regs + 1)); addr++)
608c2ecf20Sopenharmony_ci		*addr = swab64(*addr);
618c2ecf20Sopenharmony_ci}
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_cistatic void byteswap_hv_regs(struct hv_guest_state *hr)
648c2ecf20Sopenharmony_ci{
658c2ecf20Sopenharmony_ci	hr->version = swab64(hr->version);
668c2ecf20Sopenharmony_ci	hr->lpid = swab32(hr->lpid);
678c2ecf20Sopenharmony_ci	hr->vcpu_token = swab32(hr->vcpu_token);
688c2ecf20Sopenharmony_ci	hr->lpcr = swab64(hr->lpcr);
698c2ecf20Sopenharmony_ci	hr->pcr = swab64(hr->pcr) | PCR_MASK;
708c2ecf20Sopenharmony_ci	hr->amor = swab64(hr->amor);
718c2ecf20Sopenharmony_ci	hr->dpdes = swab64(hr->dpdes);
728c2ecf20Sopenharmony_ci	hr->hfscr = swab64(hr->hfscr);
738c2ecf20Sopenharmony_ci	hr->tb_offset = swab64(hr->tb_offset);
748c2ecf20Sopenharmony_ci	hr->dawr0 = swab64(hr->dawr0);
758c2ecf20Sopenharmony_ci	hr->dawrx0 = swab64(hr->dawrx0);
768c2ecf20Sopenharmony_ci	hr->ciabr = swab64(hr->ciabr);
778c2ecf20Sopenharmony_ci	hr->hdec_expiry = swab64(hr->hdec_expiry);
788c2ecf20Sopenharmony_ci	hr->purr = swab64(hr->purr);
798c2ecf20Sopenharmony_ci	hr->spurr = swab64(hr->spurr);
808c2ecf20Sopenharmony_ci	hr->ic = swab64(hr->ic);
818c2ecf20Sopenharmony_ci	hr->vtb = swab64(hr->vtb);
828c2ecf20Sopenharmony_ci	hr->hdar = swab64(hr->hdar);
838c2ecf20Sopenharmony_ci	hr->hdsisr = swab64(hr->hdsisr);
848c2ecf20Sopenharmony_ci	hr->heir = swab64(hr->heir);
858c2ecf20Sopenharmony_ci	hr->asdr = swab64(hr->asdr);
868c2ecf20Sopenharmony_ci	hr->srr0 = swab64(hr->srr0);
878c2ecf20Sopenharmony_ci	hr->srr1 = swab64(hr->srr1);
888c2ecf20Sopenharmony_ci	hr->sprg[0] = swab64(hr->sprg[0]);
898c2ecf20Sopenharmony_ci	hr->sprg[1] = swab64(hr->sprg[1]);
908c2ecf20Sopenharmony_ci	hr->sprg[2] = swab64(hr->sprg[2]);
918c2ecf20Sopenharmony_ci	hr->sprg[3] = swab64(hr->sprg[3]);
928c2ecf20Sopenharmony_ci	hr->pidr = swab64(hr->pidr);
938c2ecf20Sopenharmony_ci	hr->cfar = swab64(hr->cfar);
948c2ecf20Sopenharmony_ci	hr->ppr = swab64(hr->ppr);
958c2ecf20Sopenharmony_ci}
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_cistatic void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
988c2ecf20Sopenharmony_ci				 struct hv_guest_state *hr)
998c2ecf20Sopenharmony_ci{
1008c2ecf20Sopenharmony_ci	struct kvmppc_vcore *vc = vcpu->arch.vcore;
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	hr->dpdes = vc->dpdes;
1038c2ecf20Sopenharmony_ci	hr->hfscr = vcpu->arch.hfscr;
1048c2ecf20Sopenharmony_ci	hr->purr = vcpu->arch.purr;
1058c2ecf20Sopenharmony_ci	hr->spurr = vcpu->arch.spurr;
1068c2ecf20Sopenharmony_ci	hr->ic = vcpu->arch.ic;
1078c2ecf20Sopenharmony_ci	hr->vtb = vc->vtb;
1088c2ecf20Sopenharmony_ci	hr->srr0 = vcpu->arch.shregs.srr0;
1098c2ecf20Sopenharmony_ci	hr->srr1 = vcpu->arch.shregs.srr1;
1108c2ecf20Sopenharmony_ci	hr->sprg[0] = vcpu->arch.shregs.sprg0;
1118c2ecf20Sopenharmony_ci	hr->sprg[1] = vcpu->arch.shregs.sprg1;
1128c2ecf20Sopenharmony_ci	hr->sprg[2] = vcpu->arch.shregs.sprg2;
1138c2ecf20Sopenharmony_ci	hr->sprg[3] = vcpu->arch.shregs.sprg3;
1148c2ecf20Sopenharmony_ci	hr->pidr = vcpu->arch.pid;
1158c2ecf20Sopenharmony_ci	hr->cfar = vcpu->arch.cfar;
1168c2ecf20Sopenharmony_ci	hr->ppr = vcpu->arch.ppr;
1178c2ecf20Sopenharmony_ci	switch (trap) {
1188c2ecf20Sopenharmony_ci	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
1198c2ecf20Sopenharmony_ci		hr->hdar = vcpu->arch.fault_dar;
1208c2ecf20Sopenharmony_ci		hr->hdsisr = vcpu->arch.fault_dsisr;
1218c2ecf20Sopenharmony_ci		hr->asdr = vcpu->arch.fault_gpa;
1228c2ecf20Sopenharmony_ci		break;
1238c2ecf20Sopenharmony_ci	case BOOK3S_INTERRUPT_H_INST_STORAGE:
1248c2ecf20Sopenharmony_ci		hr->asdr = vcpu->arch.fault_gpa;
1258c2ecf20Sopenharmony_ci		break;
1268c2ecf20Sopenharmony_ci	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
1278c2ecf20Sopenharmony_ci		hr->heir = vcpu->arch.emul_inst;
1288c2ecf20Sopenharmony_ci		break;
1298c2ecf20Sopenharmony_ci	}
1308c2ecf20Sopenharmony_ci}
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_cistatic void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
1338c2ecf20Sopenharmony_ci{
1348c2ecf20Sopenharmony_ci	/*
1358c2ecf20Sopenharmony_ci	 * Don't let L1 enable features for L2 which we've disabled for L1,
1368c2ecf20Sopenharmony_ci	 * but preserve the interrupt cause field.
1378c2ecf20Sopenharmony_ci	 */
1388c2ecf20Sopenharmony_ci	hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	/* Don't let data address watchpoint match in hypervisor state */
1418c2ecf20Sopenharmony_ci	hr->dawrx0 &= ~DAWRX_HYP;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	/* Don't let completed instruction address breakpt match in HV state */
1448c2ecf20Sopenharmony_ci	if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
1458c2ecf20Sopenharmony_ci		hr->ciabr &= ~CIABR_PRIV;
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_cistatic void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	struct kvmppc_vcore *vc = vcpu->arch.vcore;
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	vc->pcr = hr->pcr | PCR_MASK;
1538c2ecf20Sopenharmony_ci	vc->dpdes = hr->dpdes;
1548c2ecf20Sopenharmony_ci	vcpu->arch.hfscr = hr->hfscr;
1558c2ecf20Sopenharmony_ci	vcpu->arch.dawr = hr->dawr0;
1568c2ecf20Sopenharmony_ci	vcpu->arch.dawrx = hr->dawrx0;
1578c2ecf20Sopenharmony_ci	vcpu->arch.ciabr = hr->ciabr;
1588c2ecf20Sopenharmony_ci	vcpu->arch.purr = hr->purr;
1598c2ecf20Sopenharmony_ci	vcpu->arch.spurr = hr->spurr;
1608c2ecf20Sopenharmony_ci	vcpu->arch.ic = hr->ic;
1618c2ecf20Sopenharmony_ci	vc->vtb = hr->vtb;
1628c2ecf20Sopenharmony_ci	vcpu->arch.shregs.srr0 = hr->srr0;
1638c2ecf20Sopenharmony_ci	vcpu->arch.shregs.srr1 = hr->srr1;
1648c2ecf20Sopenharmony_ci	vcpu->arch.shregs.sprg0 = hr->sprg[0];
1658c2ecf20Sopenharmony_ci	vcpu->arch.shregs.sprg1 = hr->sprg[1];
1668c2ecf20Sopenharmony_ci	vcpu->arch.shregs.sprg2 = hr->sprg[2];
1678c2ecf20Sopenharmony_ci	vcpu->arch.shregs.sprg3 = hr->sprg[3];
1688c2ecf20Sopenharmony_ci	vcpu->arch.pid = hr->pidr;
1698c2ecf20Sopenharmony_ci	vcpu->arch.cfar = hr->cfar;
1708c2ecf20Sopenharmony_ci	vcpu->arch.ppr = hr->ppr;
1718c2ecf20Sopenharmony_ci}
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_civoid kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
1748c2ecf20Sopenharmony_ci				   struct hv_guest_state *hr)
1758c2ecf20Sopenharmony_ci{
1768c2ecf20Sopenharmony_ci	struct kvmppc_vcore *vc = vcpu->arch.vcore;
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	vc->dpdes = hr->dpdes;
1798c2ecf20Sopenharmony_ci	vcpu->arch.hfscr = hr->hfscr;
1808c2ecf20Sopenharmony_ci	vcpu->arch.purr = hr->purr;
1818c2ecf20Sopenharmony_ci	vcpu->arch.spurr = hr->spurr;
1828c2ecf20Sopenharmony_ci	vcpu->arch.ic = hr->ic;
1838c2ecf20Sopenharmony_ci	vc->vtb = hr->vtb;
1848c2ecf20Sopenharmony_ci	vcpu->arch.fault_dar = hr->hdar;
1858c2ecf20Sopenharmony_ci	vcpu->arch.fault_dsisr = hr->hdsisr;
1868c2ecf20Sopenharmony_ci	vcpu->arch.fault_gpa = hr->asdr;
1878c2ecf20Sopenharmony_ci	vcpu->arch.emul_inst = hr->heir;
1888c2ecf20Sopenharmony_ci	vcpu->arch.shregs.srr0 = hr->srr0;
1898c2ecf20Sopenharmony_ci	vcpu->arch.shregs.srr1 = hr->srr1;
1908c2ecf20Sopenharmony_ci	vcpu->arch.shregs.sprg0 = hr->sprg[0];
1918c2ecf20Sopenharmony_ci	vcpu->arch.shregs.sprg1 = hr->sprg[1];
1928c2ecf20Sopenharmony_ci	vcpu->arch.shregs.sprg2 = hr->sprg[2];
1938c2ecf20Sopenharmony_ci	vcpu->arch.shregs.sprg3 = hr->sprg[3];
1948c2ecf20Sopenharmony_ci	vcpu->arch.pid = hr->pidr;
1958c2ecf20Sopenharmony_ci	vcpu->arch.cfar = hr->cfar;
1968c2ecf20Sopenharmony_ci	vcpu->arch.ppr = hr->ppr;
1978c2ecf20Sopenharmony_ci}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_cistatic void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
2008c2ecf20Sopenharmony_ci{
2018c2ecf20Sopenharmony_ci	/* No need to reflect the page fault to L1, we've handled it */
2028c2ecf20Sopenharmony_ci	vcpu->arch.trap = 0;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	/*
2058c2ecf20Sopenharmony_ci	 * Since the L2 gprs have already been written back into L1 memory when
2068c2ecf20Sopenharmony_ci	 * we complete the mmio, store the L1 memory location of the L2 gpr
2078c2ecf20Sopenharmony_ci	 * being loaded into by the mmio so that the loaded value can be
2088c2ecf20Sopenharmony_ci	 * written there in kvmppc_complete_mmio_load()
2098c2ecf20Sopenharmony_ci	 */
2108c2ecf20Sopenharmony_ci	if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR)
2118c2ecf20Sopenharmony_ci	    && (vcpu->mmio_is_write == 0)) {
2128c2ecf20Sopenharmony_ci		vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr +
2138c2ecf20Sopenharmony_ci					   offsetof(struct pt_regs,
2148c2ecf20Sopenharmony_ci						    gpr[vcpu->arch.io_gpr]);
2158c2ecf20Sopenharmony_ci		vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR;
2168c2ecf20Sopenharmony_ci	}
2178c2ecf20Sopenharmony_ci}
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_cilong kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
2208c2ecf20Sopenharmony_ci{
2218c2ecf20Sopenharmony_ci	long int err, r;
2228c2ecf20Sopenharmony_ci	struct kvm_nested_guest *l2;
2238c2ecf20Sopenharmony_ci	struct pt_regs l2_regs, saved_l1_regs;
2248c2ecf20Sopenharmony_ci	struct hv_guest_state l2_hv, saved_l1_hv;
2258c2ecf20Sopenharmony_ci	struct kvmppc_vcore *vc = vcpu->arch.vcore;
2268c2ecf20Sopenharmony_ci	u64 hv_ptr, regs_ptr;
2278c2ecf20Sopenharmony_ci	u64 hdec_exp;
2288c2ecf20Sopenharmony_ci	s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
2298c2ecf20Sopenharmony_ci	u64 mask;
2308c2ecf20Sopenharmony_ci	unsigned long lpcr;
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci	if (vcpu->kvm->arch.l1_ptcr == 0)
2338c2ecf20Sopenharmony_ci		return H_NOT_AVAILABLE;
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
2368c2ecf20Sopenharmony_ci		return H_BAD_MODE;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	/* copy parameters in */
2398c2ecf20Sopenharmony_ci	hv_ptr = kvmppc_get_gpr(vcpu, 4);
2408c2ecf20Sopenharmony_ci	regs_ptr = kvmppc_get_gpr(vcpu, 5);
2418c2ecf20Sopenharmony_ci	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2428c2ecf20Sopenharmony_ci	err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
2438c2ecf20Sopenharmony_ci				  sizeof(struct hv_guest_state)) ||
2448c2ecf20Sopenharmony_ci		kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
2458c2ecf20Sopenharmony_ci				    sizeof(struct pt_regs));
2468c2ecf20Sopenharmony_ci	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2478c2ecf20Sopenharmony_ci	if (err)
2488c2ecf20Sopenharmony_ci		return H_PARAMETER;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	if (kvmppc_need_byteswap(vcpu))
2518c2ecf20Sopenharmony_ci		byteswap_hv_regs(&l2_hv);
2528c2ecf20Sopenharmony_ci	if (l2_hv.version != HV_GUEST_STATE_VERSION)
2538c2ecf20Sopenharmony_ci		return H_P2;
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	if (kvmppc_need_byteswap(vcpu))
2568c2ecf20Sopenharmony_ci		byteswap_pt_regs(&l2_regs);
2578c2ecf20Sopenharmony_ci	if (l2_hv.vcpu_token >= NR_CPUS)
2588c2ecf20Sopenharmony_ci		return H_PARAMETER;
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	/*
2618c2ecf20Sopenharmony_ci	 * L1 must have set up a suspended state to enter the L2 in a
2628c2ecf20Sopenharmony_ci	 * transactional state, and only in that case. These have to be
2638c2ecf20Sopenharmony_ci	 * filtered out here to prevent causing a TM Bad Thing in the
2648c2ecf20Sopenharmony_ci	 * host HRFID. We could synthesize a TM Bad Thing back to the L1
2658c2ecf20Sopenharmony_ci	 * here but there doesn't seem like much point.
2668c2ecf20Sopenharmony_ci	 */
2678c2ecf20Sopenharmony_ci	if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
2688c2ecf20Sopenharmony_ci		if (!MSR_TM_ACTIVE(l2_regs.msr))
2698c2ecf20Sopenharmony_ci			return H_BAD_MODE;
2708c2ecf20Sopenharmony_ci	} else {
2718c2ecf20Sopenharmony_ci		if (l2_regs.msr & MSR_TS_MASK)
2728c2ecf20Sopenharmony_ci			return H_BAD_MODE;
2738c2ecf20Sopenharmony_ci		if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
2748c2ecf20Sopenharmony_ci			return H_BAD_MODE;
2758c2ecf20Sopenharmony_ci	}
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	/* translate lpid */
2788c2ecf20Sopenharmony_ci	l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
2798c2ecf20Sopenharmony_ci	if (!l2)
2808c2ecf20Sopenharmony_ci		return H_PARAMETER;
2818c2ecf20Sopenharmony_ci	if (!l2->l1_gr_to_hr) {
2828c2ecf20Sopenharmony_ci		mutex_lock(&l2->tlb_lock);
2838c2ecf20Sopenharmony_ci		kvmhv_update_ptbl_cache(l2);
2848c2ecf20Sopenharmony_ci		mutex_unlock(&l2->tlb_lock);
2858c2ecf20Sopenharmony_ci	}
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	/* save l1 values of things */
2888c2ecf20Sopenharmony_ci	vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
2898c2ecf20Sopenharmony_ci	saved_l1_regs = vcpu->arch.regs;
2908c2ecf20Sopenharmony_ci	kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci	/* convert TB values/offsets to host (L0) values */
2938c2ecf20Sopenharmony_ci	hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
2948c2ecf20Sopenharmony_ci	vc->tb_offset += l2_hv.tb_offset;
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	/* set L1 state to L2 state */
2978c2ecf20Sopenharmony_ci	vcpu->arch.nested = l2;
2988c2ecf20Sopenharmony_ci	vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
2998c2ecf20Sopenharmony_ci	vcpu->arch.regs = l2_regs;
3008c2ecf20Sopenharmony_ci	vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
3018c2ecf20Sopenharmony_ci	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
3028c2ecf20Sopenharmony_ci		LPCR_LPES | LPCR_MER;
3038c2ecf20Sopenharmony_ci	lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
3048c2ecf20Sopenharmony_ci	sanitise_hv_regs(vcpu, &l2_hv);
3058c2ecf20Sopenharmony_ci	restore_hv_regs(vcpu, &l2_hv);
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_ci	vcpu->arch.ret = RESUME_GUEST;
3088c2ecf20Sopenharmony_ci	vcpu->arch.trap = 0;
3098c2ecf20Sopenharmony_ci	do {
3108c2ecf20Sopenharmony_ci		if (mftb() >= hdec_exp) {
3118c2ecf20Sopenharmony_ci			vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
3128c2ecf20Sopenharmony_ci			r = RESUME_HOST;
3138c2ecf20Sopenharmony_ci			break;
3148c2ecf20Sopenharmony_ci		}
3158c2ecf20Sopenharmony_ci		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
3168c2ecf20Sopenharmony_ci	} while (is_kvmppc_resume_guest(r));
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	/* save L2 state for return */
3198c2ecf20Sopenharmony_ci	l2_regs = vcpu->arch.regs;
3208c2ecf20Sopenharmony_ci	l2_regs.msr = vcpu->arch.shregs.msr;
3218c2ecf20Sopenharmony_ci	delta_purr = vcpu->arch.purr - l2_hv.purr;
3228c2ecf20Sopenharmony_ci	delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
3238c2ecf20Sopenharmony_ci	delta_ic = vcpu->arch.ic - l2_hv.ic;
3248c2ecf20Sopenharmony_ci	delta_vtb = vc->vtb - l2_hv.vtb;
3258c2ecf20Sopenharmony_ci	save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	/* restore L1 state */
3288c2ecf20Sopenharmony_ci	vcpu->arch.nested = NULL;
3298c2ecf20Sopenharmony_ci	vcpu->arch.regs = saved_l1_regs;
3308c2ecf20Sopenharmony_ci	vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
3318c2ecf20Sopenharmony_ci	/* set L1 MSR TS field according to L2 transaction state */
3328c2ecf20Sopenharmony_ci	if (l2_regs.msr & MSR_TS_MASK)
3338c2ecf20Sopenharmony_ci		vcpu->arch.shregs.msr |= MSR_TS_S;
3348c2ecf20Sopenharmony_ci	vc->tb_offset = saved_l1_hv.tb_offset;
3358c2ecf20Sopenharmony_ci	restore_hv_regs(vcpu, &saved_l1_hv);
3368c2ecf20Sopenharmony_ci	vcpu->arch.purr += delta_purr;
3378c2ecf20Sopenharmony_ci	vcpu->arch.spurr += delta_spurr;
3388c2ecf20Sopenharmony_ci	vcpu->arch.ic += delta_ic;
3398c2ecf20Sopenharmony_ci	vc->vtb += delta_vtb;
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	kvmhv_put_nested(l2);
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci	/* copy l2_hv_state and regs back to guest */
3448c2ecf20Sopenharmony_ci	if (kvmppc_need_byteswap(vcpu)) {
3458c2ecf20Sopenharmony_ci		byteswap_hv_regs(&l2_hv);
3468c2ecf20Sopenharmony_ci		byteswap_pt_regs(&l2_regs);
3478c2ecf20Sopenharmony_ci	}
3488c2ecf20Sopenharmony_ci	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3498c2ecf20Sopenharmony_ci	err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
3508c2ecf20Sopenharmony_ci				   sizeof(struct hv_guest_state)) ||
3518c2ecf20Sopenharmony_ci		kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
3528c2ecf20Sopenharmony_ci				   sizeof(struct pt_regs));
3538c2ecf20Sopenharmony_ci	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3548c2ecf20Sopenharmony_ci	if (err)
3558c2ecf20Sopenharmony_ci		return H_AUTHORITY;
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci	if (r == -EINTR)
3588c2ecf20Sopenharmony_ci		return H_INTERRUPT;
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci	if (vcpu->mmio_needed) {
3618c2ecf20Sopenharmony_ci		kvmhv_nested_mmio_needed(vcpu, regs_ptr);
3628c2ecf20Sopenharmony_ci		return H_TOO_HARD;
3638c2ecf20Sopenharmony_ci	}
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci	return vcpu->arch.trap;
3668c2ecf20Sopenharmony_ci}
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_cilong kvmhv_nested_init(void)
3698c2ecf20Sopenharmony_ci{
3708c2ecf20Sopenharmony_ci	long int ptb_order;
3718c2ecf20Sopenharmony_ci	unsigned long ptcr;
3728c2ecf20Sopenharmony_ci	long rc;
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	if (!kvmhv_on_pseries())
3758c2ecf20Sopenharmony_ci		return 0;
3768c2ecf20Sopenharmony_ci	if (!radix_enabled())
3778c2ecf20Sopenharmony_ci		return -ENODEV;
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci	/* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
3808c2ecf20Sopenharmony_ci	ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
3818c2ecf20Sopenharmony_ci	if (ptb_order < 8)
3828c2ecf20Sopenharmony_ci		ptb_order = 8;
3838c2ecf20Sopenharmony_ci	pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
3848c2ecf20Sopenharmony_ci				       GFP_KERNEL);
3858c2ecf20Sopenharmony_ci	if (!pseries_partition_tb) {
3868c2ecf20Sopenharmony_ci		pr_err("kvm-hv: failed to allocated nested partition table\n");
3878c2ecf20Sopenharmony_ci		return -ENOMEM;
3888c2ecf20Sopenharmony_ci	}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
3918c2ecf20Sopenharmony_ci	rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
3928c2ecf20Sopenharmony_ci	if (rc != H_SUCCESS) {
3938c2ecf20Sopenharmony_ci		pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
3948c2ecf20Sopenharmony_ci		       rc);
3958c2ecf20Sopenharmony_ci		kfree(pseries_partition_tb);
3968c2ecf20Sopenharmony_ci		pseries_partition_tb = NULL;
3978c2ecf20Sopenharmony_ci		return -ENODEV;
3988c2ecf20Sopenharmony_ci	}
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	return 0;
4018c2ecf20Sopenharmony_ci}
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_civoid kvmhv_nested_exit(void)
4048c2ecf20Sopenharmony_ci{
4058c2ecf20Sopenharmony_ci	/*
4068c2ecf20Sopenharmony_ci	 * N.B. the kvmhv_on_pseries() test is there because it enables
4078c2ecf20Sopenharmony_ci	 * the compiler to remove the call to plpar_hcall_norets()
4088c2ecf20Sopenharmony_ci	 * when CONFIG_PPC_PSERIES=n.
4098c2ecf20Sopenharmony_ci	 */
4108c2ecf20Sopenharmony_ci	if (kvmhv_on_pseries() && pseries_partition_tb) {
4118c2ecf20Sopenharmony_ci		plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
4128c2ecf20Sopenharmony_ci		kfree(pseries_partition_tb);
4138c2ecf20Sopenharmony_ci		pseries_partition_tb = NULL;
4148c2ecf20Sopenharmony_ci	}
4158c2ecf20Sopenharmony_ci}
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_cistatic void kvmhv_flush_lpid(unsigned int lpid)
4188c2ecf20Sopenharmony_ci{
4198c2ecf20Sopenharmony_ci	long rc;
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci	if (!kvmhv_on_pseries()) {
4228c2ecf20Sopenharmony_ci		radix__flush_all_lpid(lpid);
4238c2ecf20Sopenharmony_ci		return;
4248c2ecf20Sopenharmony_ci	}
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
4278c2ecf20Sopenharmony_ci				lpid, TLBIEL_INVAL_SET_LPID);
4288c2ecf20Sopenharmony_ci	if (rc)
4298c2ecf20Sopenharmony_ci		pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
4308c2ecf20Sopenharmony_ci}
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_civoid kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
4338c2ecf20Sopenharmony_ci{
4348c2ecf20Sopenharmony_ci	if (!kvmhv_on_pseries()) {
4358c2ecf20Sopenharmony_ci		mmu_partition_table_set_entry(lpid, dw0, dw1, true);
4368c2ecf20Sopenharmony_ci		return;
4378c2ecf20Sopenharmony_ci	}
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
4408c2ecf20Sopenharmony_ci	pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
4418c2ecf20Sopenharmony_ci	/* L0 will do the necessary barriers */
4428c2ecf20Sopenharmony_ci	kvmhv_flush_lpid(lpid);
4438c2ecf20Sopenharmony_ci}
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_cistatic void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
4468c2ecf20Sopenharmony_ci{
4478c2ecf20Sopenharmony_ci	unsigned long dw0;
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	dw0 = PATB_HR | radix__get_tree_size() |
4508c2ecf20Sopenharmony_ci		__pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
4518c2ecf20Sopenharmony_ci	kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
4528c2ecf20Sopenharmony_ci}
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_civoid kvmhv_vm_nested_init(struct kvm *kvm)
4558c2ecf20Sopenharmony_ci{
4568c2ecf20Sopenharmony_ci	kvm->arch.max_nested_lpid = -1;
4578c2ecf20Sopenharmony_ci}
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci/*
4608c2ecf20Sopenharmony_ci * Handle the H_SET_PARTITION_TABLE hcall.
4618c2ecf20Sopenharmony_ci * r4 = guest real address of partition table + log_2(size) - 12
4628c2ecf20Sopenharmony_ci * (formatted as for the PTCR).
4638c2ecf20Sopenharmony_ci */
4648c2ecf20Sopenharmony_cilong kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
4658c2ecf20Sopenharmony_ci{
4668c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
4678c2ecf20Sopenharmony_ci	unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
4688c2ecf20Sopenharmony_ci	int srcu_idx;
4698c2ecf20Sopenharmony_ci	long ret = H_SUCCESS;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	srcu_idx = srcu_read_lock(&kvm->srcu);
4728c2ecf20Sopenharmony_ci	/*
4738c2ecf20Sopenharmony_ci	 * Limit the partition table to 4096 entries (because that's what
4748c2ecf20Sopenharmony_ci	 * hardware supports), and check the base address.
4758c2ecf20Sopenharmony_ci	 */
4768c2ecf20Sopenharmony_ci	if ((ptcr & PRTS_MASK) > 12 - 8 ||
4778c2ecf20Sopenharmony_ci	    !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
4788c2ecf20Sopenharmony_ci		ret = H_PARAMETER;
4798c2ecf20Sopenharmony_ci	srcu_read_unlock(&kvm->srcu, srcu_idx);
4808c2ecf20Sopenharmony_ci	if (ret == H_SUCCESS)
4818c2ecf20Sopenharmony_ci		kvm->arch.l1_ptcr = ptcr;
4828c2ecf20Sopenharmony_ci	return ret;
4838c2ecf20Sopenharmony_ci}
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci/*
4868c2ecf20Sopenharmony_ci * Handle the H_COPY_TOFROM_GUEST hcall.
4878c2ecf20Sopenharmony_ci * r4 = L1 lpid of nested guest
4888c2ecf20Sopenharmony_ci * r5 = pid
4898c2ecf20Sopenharmony_ci * r6 = eaddr to access
4908c2ecf20Sopenharmony_ci * r7 = to buffer (L1 gpa)
4918c2ecf20Sopenharmony_ci * r8 = from buffer (L1 gpa)
4928c2ecf20Sopenharmony_ci * r9 = n bytes to copy
4938c2ecf20Sopenharmony_ci */
4948c2ecf20Sopenharmony_cilong kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
4958c2ecf20Sopenharmony_ci{
4968c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp;
4978c2ecf20Sopenharmony_ci	int l1_lpid = kvmppc_get_gpr(vcpu, 4);
4988c2ecf20Sopenharmony_ci	int pid = kvmppc_get_gpr(vcpu, 5);
4998c2ecf20Sopenharmony_ci	gva_t eaddr = kvmppc_get_gpr(vcpu, 6);
5008c2ecf20Sopenharmony_ci	gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7);
5018c2ecf20Sopenharmony_ci	gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8);
5028c2ecf20Sopenharmony_ci	void *buf;
5038c2ecf20Sopenharmony_ci	unsigned long n = kvmppc_get_gpr(vcpu, 9);
5048c2ecf20Sopenharmony_ci	bool is_load = !!gp_to;
5058c2ecf20Sopenharmony_ci	long rc;
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci	if (gp_to && gp_from) /* One must be NULL to determine the direction */
5088c2ecf20Sopenharmony_ci		return H_PARAMETER;
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	if (eaddr & (0xFFFUL << 52))
5118c2ecf20Sopenharmony_ci		return H_PARAMETER;
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
5148c2ecf20Sopenharmony_ci	if (!buf)
5158c2ecf20Sopenharmony_ci		return H_NO_MEM;
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
5188c2ecf20Sopenharmony_ci	if (!gp) {
5198c2ecf20Sopenharmony_ci		rc = H_PARAMETER;
5208c2ecf20Sopenharmony_ci		goto out_free;
5218c2ecf20Sopenharmony_ci	}
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci	mutex_lock(&gp->tlb_lock);
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci	if (is_load) {
5268c2ecf20Sopenharmony_ci		/* Load from the nested guest into our buffer */
5278c2ecf20Sopenharmony_ci		rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
5288c2ecf20Sopenharmony_ci						     eaddr, buf, NULL, n);
5298c2ecf20Sopenharmony_ci		if (rc)
5308c2ecf20Sopenharmony_ci			goto not_found;
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci		/* Write what was loaded into our buffer back to the L1 guest */
5338c2ecf20Sopenharmony_ci		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5348c2ecf20Sopenharmony_ci		rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
5358c2ecf20Sopenharmony_ci		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5368c2ecf20Sopenharmony_ci		if (rc)
5378c2ecf20Sopenharmony_ci			goto not_found;
5388c2ecf20Sopenharmony_ci	} else {
5398c2ecf20Sopenharmony_ci		/* Load the data to be stored from the L1 guest into our buf */
5408c2ecf20Sopenharmony_ci		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5418c2ecf20Sopenharmony_ci		rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
5428c2ecf20Sopenharmony_ci		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5438c2ecf20Sopenharmony_ci		if (rc)
5448c2ecf20Sopenharmony_ci			goto not_found;
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci		/* Store from our buffer into the nested guest */
5478c2ecf20Sopenharmony_ci		rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
5488c2ecf20Sopenharmony_ci						     eaddr, NULL, buf, n);
5498c2ecf20Sopenharmony_ci		if (rc)
5508c2ecf20Sopenharmony_ci			goto not_found;
5518c2ecf20Sopenharmony_ci	}
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ciout_unlock:
5548c2ecf20Sopenharmony_ci	mutex_unlock(&gp->tlb_lock);
5558c2ecf20Sopenharmony_ci	kvmhv_put_nested(gp);
5568c2ecf20Sopenharmony_ciout_free:
5578c2ecf20Sopenharmony_ci	kfree(buf);
5588c2ecf20Sopenharmony_ci	return rc;
5598c2ecf20Sopenharmony_cinot_found:
5608c2ecf20Sopenharmony_ci	rc = H_NOT_FOUND;
5618c2ecf20Sopenharmony_ci	goto out_unlock;
5628c2ecf20Sopenharmony_ci}
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci/*
5658c2ecf20Sopenharmony_ci * Reload the partition table entry for a guest.
5668c2ecf20Sopenharmony_ci * Caller must hold gp->tlb_lock.
5678c2ecf20Sopenharmony_ci */
5688c2ecf20Sopenharmony_cistatic void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
5698c2ecf20Sopenharmony_ci{
5708c2ecf20Sopenharmony_ci	int ret;
5718c2ecf20Sopenharmony_ci	struct patb_entry ptbl_entry;
5728c2ecf20Sopenharmony_ci	unsigned long ptbl_addr;
5738c2ecf20Sopenharmony_ci	struct kvm *kvm = gp->l1_host;
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	ret = -EFAULT;
5768c2ecf20Sopenharmony_ci	ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
5778c2ecf20Sopenharmony_ci	if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) {
5788c2ecf20Sopenharmony_ci		int srcu_idx = srcu_read_lock(&kvm->srcu);
5798c2ecf20Sopenharmony_ci		ret = kvm_read_guest(kvm, ptbl_addr,
5808c2ecf20Sopenharmony_ci				     &ptbl_entry, sizeof(ptbl_entry));
5818c2ecf20Sopenharmony_ci		srcu_read_unlock(&kvm->srcu, srcu_idx);
5828c2ecf20Sopenharmony_ci	}
5838c2ecf20Sopenharmony_ci	if (ret) {
5848c2ecf20Sopenharmony_ci		gp->l1_gr_to_hr = 0;
5858c2ecf20Sopenharmony_ci		gp->process_table = 0;
5868c2ecf20Sopenharmony_ci	} else {
5878c2ecf20Sopenharmony_ci		gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
5888c2ecf20Sopenharmony_ci		gp->process_table = be64_to_cpu(ptbl_entry.patb1);
5898c2ecf20Sopenharmony_ci	}
5908c2ecf20Sopenharmony_ci	kvmhv_set_nested_ptbl(gp);
5918c2ecf20Sopenharmony_ci}
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_cistatic struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
5948c2ecf20Sopenharmony_ci{
5958c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp;
5968c2ecf20Sopenharmony_ci	long shadow_lpid;
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	gp = kzalloc(sizeof(*gp), GFP_KERNEL);
5998c2ecf20Sopenharmony_ci	if (!gp)
6008c2ecf20Sopenharmony_ci		return NULL;
6018c2ecf20Sopenharmony_ci	gp->l1_host = kvm;
6028c2ecf20Sopenharmony_ci	gp->l1_lpid = lpid;
6038c2ecf20Sopenharmony_ci	mutex_init(&gp->tlb_lock);
6048c2ecf20Sopenharmony_ci	gp->shadow_pgtable = pgd_alloc(kvm->mm);
6058c2ecf20Sopenharmony_ci	if (!gp->shadow_pgtable)
6068c2ecf20Sopenharmony_ci		goto out_free;
6078c2ecf20Sopenharmony_ci	shadow_lpid = kvmppc_alloc_lpid();
6088c2ecf20Sopenharmony_ci	if (shadow_lpid < 0)
6098c2ecf20Sopenharmony_ci		goto out_free2;
6108c2ecf20Sopenharmony_ci	gp->shadow_lpid = shadow_lpid;
6118c2ecf20Sopenharmony_ci	gp->radix = 1;
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci	memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	return gp;
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci out_free2:
6188c2ecf20Sopenharmony_ci	pgd_free(kvm->mm, gp->shadow_pgtable);
6198c2ecf20Sopenharmony_ci out_free:
6208c2ecf20Sopenharmony_ci	kfree(gp);
6218c2ecf20Sopenharmony_ci	return NULL;
6228c2ecf20Sopenharmony_ci}
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci/*
6258c2ecf20Sopenharmony_ci * Free up any resources allocated for a nested guest.
6268c2ecf20Sopenharmony_ci */
6278c2ecf20Sopenharmony_cistatic void kvmhv_release_nested(struct kvm_nested_guest *gp)
6288c2ecf20Sopenharmony_ci{
6298c2ecf20Sopenharmony_ci	struct kvm *kvm = gp->l1_host;
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	if (gp->shadow_pgtable) {
6328c2ecf20Sopenharmony_ci		/*
6338c2ecf20Sopenharmony_ci		 * No vcpu is using this struct and no call to
6348c2ecf20Sopenharmony_ci		 * kvmhv_get_nested can find this struct,
6358c2ecf20Sopenharmony_ci		 * so we don't need to hold kvm->mmu_lock.
6368c2ecf20Sopenharmony_ci		 */
6378c2ecf20Sopenharmony_ci		kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
6388c2ecf20Sopenharmony_ci					  gp->shadow_lpid);
6398c2ecf20Sopenharmony_ci		pgd_free(kvm->mm, gp->shadow_pgtable);
6408c2ecf20Sopenharmony_ci	}
6418c2ecf20Sopenharmony_ci	kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
6428c2ecf20Sopenharmony_ci	kvmppc_free_lpid(gp->shadow_lpid);
6438c2ecf20Sopenharmony_ci	kfree(gp);
6448c2ecf20Sopenharmony_ci}
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_cistatic void kvmhv_remove_nested(struct kvm_nested_guest *gp)
6478c2ecf20Sopenharmony_ci{
6488c2ecf20Sopenharmony_ci	struct kvm *kvm = gp->l1_host;
6498c2ecf20Sopenharmony_ci	int lpid = gp->l1_lpid;
6508c2ecf20Sopenharmony_ci	long ref;
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
6538c2ecf20Sopenharmony_ci	if (gp == kvm->arch.nested_guests[lpid]) {
6548c2ecf20Sopenharmony_ci		kvm->arch.nested_guests[lpid] = NULL;
6558c2ecf20Sopenharmony_ci		if (lpid == kvm->arch.max_nested_lpid) {
6568c2ecf20Sopenharmony_ci			while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
6578c2ecf20Sopenharmony_ci				;
6588c2ecf20Sopenharmony_ci			kvm->arch.max_nested_lpid = lpid;
6598c2ecf20Sopenharmony_ci		}
6608c2ecf20Sopenharmony_ci		--gp->refcnt;
6618c2ecf20Sopenharmony_ci	}
6628c2ecf20Sopenharmony_ci	ref = gp->refcnt;
6638c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
6648c2ecf20Sopenharmony_ci	if (ref == 0)
6658c2ecf20Sopenharmony_ci		kvmhv_release_nested(gp);
6668c2ecf20Sopenharmony_ci}
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ci/*
6698c2ecf20Sopenharmony_ci * Free up all nested resources allocated for this guest.
6708c2ecf20Sopenharmony_ci * This is called with no vcpus of the guest running, when
6718c2ecf20Sopenharmony_ci * switching the guest to HPT mode or when destroying the
6728c2ecf20Sopenharmony_ci * guest.
6738c2ecf20Sopenharmony_ci */
6748c2ecf20Sopenharmony_civoid kvmhv_release_all_nested(struct kvm *kvm)
6758c2ecf20Sopenharmony_ci{
6768c2ecf20Sopenharmony_ci	int i;
6778c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp;
6788c2ecf20Sopenharmony_ci	struct kvm_nested_guest *freelist = NULL;
6798c2ecf20Sopenharmony_ci	struct kvm_memory_slot *memslot;
6808c2ecf20Sopenharmony_ci	int srcu_idx;
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
6838c2ecf20Sopenharmony_ci	for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
6848c2ecf20Sopenharmony_ci		gp = kvm->arch.nested_guests[i];
6858c2ecf20Sopenharmony_ci		if (!gp)
6868c2ecf20Sopenharmony_ci			continue;
6878c2ecf20Sopenharmony_ci		kvm->arch.nested_guests[i] = NULL;
6888c2ecf20Sopenharmony_ci		if (--gp->refcnt == 0) {
6898c2ecf20Sopenharmony_ci			gp->next = freelist;
6908c2ecf20Sopenharmony_ci			freelist = gp;
6918c2ecf20Sopenharmony_ci		}
6928c2ecf20Sopenharmony_ci	}
6938c2ecf20Sopenharmony_ci	kvm->arch.max_nested_lpid = -1;
6948c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
6958c2ecf20Sopenharmony_ci	while ((gp = freelist) != NULL) {
6968c2ecf20Sopenharmony_ci		freelist = gp->next;
6978c2ecf20Sopenharmony_ci		kvmhv_release_nested(gp);
6988c2ecf20Sopenharmony_ci	}
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	srcu_idx = srcu_read_lock(&kvm->srcu);
7018c2ecf20Sopenharmony_ci	kvm_for_each_memslot(memslot, kvm_memslots(kvm))
7028c2ecf20Sopenharmony_ci		kvmhv_free_memslot_nest_rmap(memslot);
7038c2ecf20Sopenharmony_ci	srcu_read_unlock(&kvm->srcu, srcu_idx);
7048c2ecf20Sopenharmony_ci}
7058c2ecf20Sopenharmony_ci
7068c2ecf20Sopenharmony_ci/* caller must hold gp->tlb_lock */
7078c2ecf20Sopenharmony_cistatic void kvmhv_flush_nested(struct kvm_nested_guest *gp)
7088c2ecf20Sopenharmony_ci{
7098c2ecf20Sopenharmony_ci	struct kvm *kvm = gp->l1_host;
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
7128c2ecf20Sopenharmony_ci	kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
7138c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
7148c2ecf20Sopenharmony_ci	kvmhv_flush_lpid(gp->shadow_lpid);
7158c2ecf20Sopenharmony_ci	kvmhv_update_ptbl_cache(gp);
7168c2ecf20Sopenharmony_ci	if (gp->l1_gr_to_hr == 0)
7178c2ecf20Sopenharmony_ci		kvmhv_remove_nested(gp);
7188c2ecf20Sopenharmony_ci}
7198c2ecf20Sopenharmony_ci
7208c2ecf20Sopenharmony_cistruct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
7218c2ecf20Sopenharmony_ci					  bool create)
7228c2ecf20Sopenharmony_ci{
7238c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp, *newgp;
7248c2ecf20Sopenharmony_ci
7258c2ecf20Sopenharmony_ci	if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
7268c2ecf20Sopenharmony_ci	    l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
7278c2ecf20Sopenharmony_ci		return NULL;
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
7308c2ecf20Sopenharmony_ci	gp = kvm->arch.nested_guests[l1_lpid];
7318c2ecf20Sopenharmony_ci	if (gp)
7328c2ecf20Sopenharmony_ci		++gp->refcnt;
7338c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	if (gp || !create)
7368c2ecf20Sopenharmony_ci		return gp;
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ci	newgp = kvmhv_alloc_nested(kvm, l1_lpid);
7398c2ecf20Sopenharmony_ci	if (!newgp)
7408c2ecf20Sopenharmony_ci		return NULL;
7418c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
7428c2ecf20Sopenharmony_ci	if (kvm->arch.nested_guests[l1_lpid]) {
7438c2ecf20Sopenharmony_ci		/* someone else beat us to it */
7448c2ecf20Sopenharmony_ci		gp = kvm->arch.nested_guests[l1_lpid];
7458c2ecf20Sopenharmony_ci	} else {
7468c2ecf20Sopenharmony_ci		kvm->arch.nested_guests[l1_lpid] = newgp;
7478c2ecf20Sopenharmony_ci		++newgp->refcnt;
7488c2ecf20Sopenharmony_ci		gp = newgp;
7498c2ecf20Sopenharmony_ci		newgp = NULL;
7508c2ecf20Sopenharmony_ci		if (l1_lpid > kvm->arch.max_nested_lpid)
7518c2ecf20Sopenharmony_ci			kvm->arch.max_nested_lpid = l1_lpid;
7528c2ecf20Sopenharmony_ci	}
7538c2ecf20Sopenharmony_ci	++gp->refcnt;
7548c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_ci	if (newgp)
7578c2ecf20Sopenharmony_ci		kvmhv_release_nested(newgp);
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	return gp;
7608c2ecf20Sopenharmony_ci}
7618c2ecf20Sopenharmony_ci
7628c2ecf20Sopenharmony_civoid kvmhv_put_nested(struct kvm_nested_guest *gp)
7638c2ecf20Sopenharmony_ci{
7648c2ecf20Sopenharmony_ci	struct kvm *kvm = gp->l1_host;
7658c2ecf20Sopenharmony_ci	long ref;
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
7688c2ecf20Sopenharmony_ci	ref = --gp->refcnt;
7698c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
7708c2ecf20Sopenharmony_ci	if (ref == 0)
7718c2ecf20Sopenharmony_ci		kvmhv_release_nested(gp);
7728c2ecf20Sopenharmony_ci}
7738c2ecf20Sopenharmony_ci
7748c2ecf20Sopenharmony_cistatic struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
7758c2ecf20Sopenharmony_ci{
7768c2ecf20Sopenharmony_ci	if (lpid > kvm->arch.max_nested_lpid)
7778c2ecf20Sopenharmony_ci		return NULL;
7788c2ecf20Sopenharmony_ci	return kvm->arch.nested_guests[lpid];
7798c2ecf20Sopenharmony_ci}
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_cipte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
7828c2ecf20Sopenharmony_ci				 unsigned long ea, unsigned *hshift)
7838c2ecf20Sopenharmony_ci{
7848c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp;
7858c2ecf20Sopenharmony_ci	pte_t *pte;
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci	gp = kvmhv_find_nested(kvm, lpid);
7888c2ecf20Sopenharmony_ci	if (!gp)
7898c2ecf20Sopenharmony_ci		return NULL;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	VM_WARN(!spin_is_locked(&kvm->mmu_lock),
7928c2ecf20Sopenharmony_ci		"%s called with kvm mmu_lock not held \n", __func__);
7938c2ecf20Sopenharmony_ci	pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift);
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci	return pte;
7968c2ecf20Sopenharmony_ci}
7978c2ecf20Sopenharmony_ci
7988c2ecf20Sopenharmony_cistatic inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
7998c2ecf20Sopenharmony_ci{
8008c2ecf20Sopenharmony_ci	return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
8018c2ecf20Sopenharmony_ci				       RMAP_NESTED_GPA_MASK));
8028c2ecf20Sopenharmony_ci}
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_civoid kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
8058c2ecf20Sopenharmony_ci			    struct rmap_nested **n_rmap)
8068c2ecf20Sopenharmony_ci{
8078c2ecf20Sopenharmony_ci	struct llist_node *entry = ((struct llist_head *) rmapp)->first;
8088c2ecf20Sopenharmony_ci	struct rmap_nested *cursor;
8098c2ecf20Sopenharmony_ci	u64 rmap, new_rmap = (*n_rmap)->rmap;
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci	/* Are there any existing entries? */
8128c2ecf20Sopenharmony_ci	if (!(*rmapp)) {
8138c2ecf20Sopenharmony_ci		/* No -> use the rmap as a single entry */
8148c2ecf20Sopenharmony_ci		*rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
8158c2ecf20Sopenharmony_ci		return;
8168c2ecf20Sopenharmony_ci	}
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	/* Do any entries match what we're trying to insert? */
8198c2ecf20Sopenharmony_ci	for_each_nest_rmap_safe(cursor, entry, &rmap) {
8208c2ecf20Sopenharmony_ci		if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
8218c2ecf20Sopenharmony_ci			return;
8228c2ecf20Sopenharmony_ci	}
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci	/* Do we need to create a list or just add the new entry? */
8258c2ecf20Sopenharmony_ci	rmap = *rmapp;
8268c2ecf20Sopenharmony_ci	if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
8278c2ecf20Sopenharmony_ci		*rmapp = 0UL;
8288c2ecf20Sopenharmony_ci	llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
8298c2ecf20Sopenharmony_ci	if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
8308c2ecf20Sopenharmony_ci		(*n_rmap)->list.next = (struct llist_node *) rmap;
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_ci	/* Set NULL so not freed by caller */
8338c2ecf20Sopenharmony_ci	*n_rmap = NULL;
8348c2ecf20Sopenharmony_ci}
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_cistatic void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
8378c2ecf20Sopenharmony_ci				      unsigned long clr, unsigned long set,
8388c2ecf20Sopenharmony_ci				      unsigned long hpa, unsigned long mask)
8398c2ecf20Sopenharmony_ci{
8408c2ecf20Sopenharmony_ci	unsigned long gpa;
8418c2ecf20Sopenharmony_ci	unsigned int shift, lpid;
8428c2ecf20Sopenharmony_ci	pte_t *ptep;
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci	gpa = n_rmap & RMAP_NESTED_GPA_MASK;
8458c2ecf20Sopenharmony_ci	lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci	/* Find the pte */
8488c2ecf20Sopenharmony_ci	ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
8498c2ecf20Sopenharmony_ci	/*
8508c2ecf20Sopenharmony_ci	 * If the pte is present and the pfn is still the same, update the pte.
8518c2ecf20Sopenharmony_ci	 * If the pfn has changed then this is a stale rmap entry, the nested
8528c2ecf20Sopenharmony_ci	 * gpa actually points somewhere else now, and there is nothing to do.
8538c2ecf20Sopenharmony_ci	 * XXX A future optimisation would be to remove the rmap entry here.
8548c2ecf20Sopenharmony_ci	 */
8558c2ecf20Sopenharmony_ci	if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
8568c2ecf20Sopenharmony_ci		__radix_pte_update(ptep, clr, set);
8578c2ecf20Sopenharmony_ci		kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
8588c2ecf20Sopenharmony_ci	}
8598c2ecf20Sopenharmony_ci}
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci/*
8628c2ecf20Sopenharmony_ci * For a given list of rmap entries, update the rc bits in all ptes in shadow
8638c2ecf20Sopenharmony_ci * page tables for nested guests which are referenced by the rmap list.
8648c2ecf20Sopenharmony_ci */
8658c2ecf20Sopenharmony_civoid kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
8668c2ecf20Sopenharmony_ci				    unsigned long clr, unsigned long set,
8678c2ecf20Sopenharmony_ci				    unsigned long hpa, unsigned long nbytes)
8688c2ecf20Sopenharmony_ci{
8698c2ecf20Sopenharmony_ci	struct llist_node *entry = ((struct llist_head *) rmapp)->first;
8708c2ecf20Sopenharmony_ci	struct rmap_nested *cursor;
8718c2ecf20Sopenharmony_ci	unsigned long rmap, mask;
8728c2ecf20Sopenharmony_ci
8738c2ecf20Sopenharmony_ci	if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
8748c2ecf20Sopenharmony_ci		return;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci	mask = PTE_RPN_MASK & ~(nbytes - 1);
8778c2ecf20Sopenharmony_ci	hpa &= mask;
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci	for_each_nest_rmap_safe(cursor, entry, &rmap)
8808c2ecf20Sopenharmony_ci		kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
8818c2ecf20Sopenharmony_ci}
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_cistatic void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
8848c2ecf20Sopenharmony_ci				   unsigned long hpa, unsigned long mask)
8858c2ecf20Sopenharmony_ci{
8868c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp;
8878c2ecf20Sopenharmony_ci	unsigned long gpa;
8888c2ecf20Sopenharmony_ci	unsigned int shift, lpid;
8898c2ecf20Sopenharmony_ci	pte_t *ptep;
8908c2ecf20Sopenharmony_ci
8918c2ecf20Sopenharmony_ci	gpa = n_rmap & RMAP_NESTED_GPA_MASK;
8928c2ecf20Sopenharmony_ci	lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
8938c2ecf20Sopenharmony_ci	gp = kvmhv_find_nested(kvm, lpid);
8948c2ecf20Sopenharmony_ci	if (!gp)
8958c2ecf20Sopenharmony_ci		return;
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_ci	/* Find and invalidate the pte */
8988c2ecf20Sopenharmony_ci	ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
8998c2ecf20Sopenharmony_ci	/* Don't spuriously invalidate ptes if the pfn has changed */
9008c2ecf20Sopenharmony_ci	if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
9018c2ecf20Sopenharmony_ci		kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
9028c2ecf20Sopenharmony_ci}
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_cistatic void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
9058c2ecf20Sopenharmony_ci					unsigned long hpa, unsigned long mask)
9068c2ecf20Sopenharmony_ci{
9078c2ecf20Sopenharmony_ci	struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
9088c2ecf20Sopenharmony_ci	struct rmap_nested *cursor;
9098c2ecf20Sopenharmony_ci	unsigned long rmap;
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_ci	for_each_nest_rmap_safe(cursor, entry, &rmap) {
9128c2ecf20Sopenharmony_ci		kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
9138c2ecf20Sopenharmony_ci		kfree(cursor);
9148c2ecf20Sopenharmony_ci	}
9158c2ecf20Sopenharmony_ci}
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci/* called with kvm->mmu_lock held */
9188c2ecf20Sopenharmony_civoid kvmhv_remove_nest_rmap_range(struct kvm *kvm,
9198c2ecf20Sopenharmony_ci				  const struct kvm_memory_slot *memslot,
9208c2ecf20Sopenharmony_ci				  unsigned long gpa, unsigned long hpa,
9218c2ecf20Sopenharmony_ci				  unsigned long nbytes)
9228c2ecf20Sopenharmony_ci{
9238c2ecf20Sopenharmony_ci	unsigned long gfn, end_gfn;
9248c2ecf20Sopenharmony_ci	unsigned long addr_mask;
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_ci	if (!memslot)
9278c2ecf20Sopenharmony_ci		return;
9288c2ecf20Sopenharmony_ci	gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
9298c2ecf20Sopenharmony_ci	end_gfn = gfn + (nbytes >> PAGE_SHIFT);
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci	addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
9328c2ecf20Sopenharmony_ci	hpa &= addr_mask;
9338c2ecf20Sopenharmony_ci
9348c2ecf20Sopenharmony_ci	for (; gfn < end_gfn; gfn++) {
9358c2ecf20Sopenharmony_ci		unsigned long *rmap = &memslot->arch.rmap[gfn];
9368c2ecf20Sopenharmony_ci		kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
9378c2ecf20Sopenharmony_ci	}
9388c2ecf20Sopenharmony_ci}
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_cistatic void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
9418c2ecf20Sopenharmony_ci{
9428c2ecf20Sopenharmony_ci	unsigned long page;
9438c2ecf20Sopenharmony_ci
9448c2ecf20Sopenharmony_ci	for (page = 0; page < free->npages; page++) {
9458c2ecf20Sopenharmony_ci		unsigned long rmap, *rmapp = &free->arch.rmap[page];
9468c2ecf20Sopenharmony_ci		struct rmap_nested *cursor;
9478c2ecf20Sopenharmony_ci		struct llist_node *entry;
9488c2ecf20Sopenharmony_ci
9498c2ecf20Sopenharmony_ci		entry = llist_del_all((struct llist_head *) rmapp);
9508c2ecf20Sopenharmony_ci		for_each_nest_rmap_safe(cursor, entry, &rmap)
9518c2ecf20Sopenharmony_ci			kfree(cursor);
9528c2ecf20Sopenharmony_ci	}
9538c2ecf20Sopenharmony_ci}
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_cistatic bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
9568c2ecf20Sopenharmony_ci					struct kvm_nested_guest *gp,
9578c2ecf20Sopenharmony_ci					long gpa, int *shift_ret)
9588c2ecf20Sopenharmony_ci{
9598c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
9608c2ecf20Sopenharmony_ci	bool ret = false;
9618c2ecf20Sopenharmony_ci	pte_t *ptep;
9628c2ecf20Sopenharmony_ci	int shift;
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
9658c2ecf20Sopenharmony_ci	ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift);
9668c2ecf20Sopenharmony_ci	if (!shift)
9678c2ecf20Sopenharmony_ci		shift = PAGE_SHIFT;
9688c2ecf20Sopenharmony_ci	if (ptep && pte_present(*ptep)) {
9698c2ecf20Sopenharmony_ci		kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
9708c2ecf20Sopenharmony_ci		ret = true;
9718c2ecf20Sopenharmony_ci	}
9728c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci	if (shift_ret)
9758c2ecf20Sopenharmony_ci		*shift_ret = shift;
9768c2ecf20Sopenharmony_ci	return ret;
9778c2ecf20Sopenharmony_ci}
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_cistatic inline int get_ric(unsigned int instr)
9808c2ecf20Sopenharmony_ci{
9818c2ecf20Sopenharmony_ci	return (instr >> 18) & 0x3;
9828c2ecf20Sopenharmony_ci}
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_cistatic inline int get_prs(unsigned int instr)
9858c2ecf20Sopenharmony_ci{
9868c2ecf20Sopenharmony_ci	return (instr >> 17) & 0x1;
9878c2ecf20Sopenharmony_ci}
9888c2ecf20Sopenharmony_ci
9898c2ecf20Sopenharmony_cistatic inline int get_r(unsigned int instr)
9908c2ecf20Sopenharmony_ci{
9918c2ecf20Sopenharmony_ci	return (instr >> 16) & 0x1;
9928c2ecf20Sopenharmony_ci}
9938c2ecf20Sopenharmony_ci
9948c2ecf20Sopenharmony_cistatic inline int get_lpid(unsigned long r_val)
9958c2ecf20Sopenharmony_ci{
9968c2ecf20Sopenharmony_ci	return r_val & 0xffffffff;
9978c2ecf20Sopenharmony_ci}
9988c2ecf20Sopenharmony_ci
9998c2ecf20Sopenharmony_cistatic inline int get_is(unsigned long r_val)
10008c2ecf20Sopenharmony_ci{
10018c2ecf20Sopenharmony_ci	return (r_val >> 10) & 0x3;
10028c2ecf20Sopenharmony_ci}
10038c2ecf20Sopenharmony_ci
10048c2ecf20Sopenharmony_cistatic inline int get_ap(unsigned long r_val)
10058c2ecf20Sopenharmony_ci{
10068c2ecf20Sopenharmony_ci	return (r_val >> 5) & 0x7;
10078c2ecf20Sopenharmony_ci}
10088c2ecf20Sopenharmony_ci
10098c2ecf20Sopenharmony_cistatic inline long get_epn(unsigned long r_val)
10108c2ecf20Sopenharmony_ci{
10118c2ecf20Sopenharmony_ci	return r_val >> 12;
10128c2ecf20Sopenharmony_ci}
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_cistatic int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
10158c2ecf20Sopenharmony_ci					int ap, long epn)
10168c2ecf20Sopenharmony_ci{
10178c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
10188c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp;
10198c2ecf20Sopenharmony_ci	long npages;
10208c2ecf20Sopenharmony_ci	int shift, shadow_shift;
10218c2ecf20Sopenharmony_ci	unsigned long addr;
10228c2ecf20Sopenharmony_ci
10238c2ecf20Sopenharmony_ci	shift = ap_to_shift(ap);
10248c2ecf20Sopenharmony_ci	addr = epn << 12;
10258c2ecf20Sopenharmony_ci	if (shift < 0)
10268c2ecf20Sopenharmony_ci		/* Invalid ap encoding */
10278c2ecf20Sopenharmony_ci		return -EINVAL;
10288c2ecf20Sopenharmony_ci
10298c2ecf20Sopenharmony_ci	addr &= ~((1UL << shift) - 1);
10308c2ecf20Sopenharmony_ci	npages = 1UL << (shift - PAGE_SHIFT);
10318c2ecf20Sopenharmony_ci
10328c2ecf20Sopenharmony_ci	gp = kvmhv_get_nested(kvm, lpid, false);
10338c2ecf20Sopenharmony_ci	if (!gp) /* No such guest -> nothing to do */
10348c2ecf20Sopenharmony_ci		return 0;
10358c2ecf20Sopenharmony_ci	mutex_lock(&gp->tlb_lock);
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci	/* There may be more than one host page backing this single guest pte */
10388c2ecf20Sopenharmony_ci	do {
10398c2ecf20Sopenharmony_ci		kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
10408c2ecf20Sopenharmony_ci
10418c2ecf20Sopenharmony_ci		npages -= 1UL << (shadow_shift - PAGE_SHIFT);
10428c2ecf20Sopenharmony_ci		addr += 1UL << shadow_shift;
10438c2ecf20Sopenharmony_ci	} while (npages > 0);
10448c2ecf20Sopenharmony_ci
10458c2ecf20Sopenharmony_ci	mutex_unlock(&gp->tlb_lock);
10468c2ecf20Sopenharmony_ci	kvmhv_put_nested(gp);
10478c2ecf20Sopenharmony_ci	return 0;
10488c2ecf20Sopenharmony_ci}
10498c2ecf20Sopenharmony_ci
10508c2ecf20Sopenharmony_cistatic void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
10518c2ecf20Sopenharmony_ci				     struct kvm_nested_guest *gp, int ric)
10528c2ecf20Sopenharmony_ci{
10538c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci	mutex_lock(&gp->tlb_lock);
10568c2ecf20Sopenharmony_ci	switch (ric) {
10578c2ecf20Sopenharmony_ci	case 0:
10588c2ecf20Sopenharmony_ci		/* Invalidate TLB */
10598c2ecf20Sopenharmony_ci		spin_lock(&kvm->mmu_lock);
10608c2ecf20Sopenharmony_ci		kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
10618c2ecf20Sopenharmony_ci					  gp->shadow_lpid);
10628c2ecf20Sopenharmony_ci		kvmhv_flush_lpid(gp->shadow_lpid);
10638c2ecf20Sopenharmony_ci		spin_unlock(&kvm->mmu_lock);
10648c2ecf20Sopenharmony_ci		break;
10658c2ecf20Sopenharmony_ci	case 1:
10668c2ecf20Sopenharmony_ci		/*
10678c2ecf20Sopenharmony_ci		 * Invalidate PWC
10688c2ecf20Sopenharmony_ci		 * We don't cache this -> nothing to do
10698c2ecf20Sopenharmony_ci		 */
10708c2ecf20Sopenharmony_ci		break;
10718c2ecf20Sopenharmony_ci	case 2:
10728c2ecf20Sopenharmony_ci		/* Invalidate TLB, PWC and caching of partition table entries */
10738c2ecf20Sopenharmony_ci		kvmhv_flush_nested(gp);
10748c2ecf20Sopenharmony_ci		break;
10758c2ecf20Sopenharmony_ci	default:
10768c2ecf20Sopenharmony_ci		break;
10778c2ecf20Sopenharmony_ci	}
10788c2ecf20Sopenharmony_ci	mutex_unlock(&gp->tlb_lock);
10798c2ecf20Sopenharmony_ci}
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_cistatic void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
10828c2ecf20Sopenharmony_ci{
10838c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
10848c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp;
10858c2ecf20Sopenharmony_ci	int i;
10868c2ecf20Sopenharmony_ci
10878c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
10888c2ecf20Sopenharmony_ci	for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
10898c2ecf20Sopenharmony_ci		gp = kvm->arch.nested_guests[i];
10908c2ecf20Sopenharmony_ci		if (gp) {
10918c2ecf20Sopenharmony_ci			spin_unlock(&kvm->mmu_lock);
10928c2ecf20Sopenharmony_ci			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
10938c2ecf20Sopenharmony_ci			spin_lock(&kvm->mmu_lock);
10948c2ecf20Sopenharmony_ci		}
10958c2ecf20Sopenharmony_ci	}
10968c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
10978c2ecf20Sopenharmony_ci}
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_cistatic int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
11008c2ecf20Sopenharmony_ci				    unsigned long rsval, unsigned long rbval)
11018c2ecf20Sopenharmony_ci{
11028c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
11038c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp;
11048c2ecf20Sopenharmony_ci	int r, ric, prs, is, ap;
11058c2ecf20Sopenharmony_ci	int lpid;
11068c2ecf20Sopenharmony_ci	long epn;
11078c2ecf20Sopenharmony_ci	int ret = 0;
11088c2ecf20Sopenharmony_ci
11098c2ecf20Sopenharmony_ci	ric = get_ric(instr);
11108c2ecf20Sopenharmony_ci	prs = get_prs(instr);
11118c2ecf20Sopenharmony_ci	r = get_r(instr);
11128c2ecf20Sopenharmony_ci	lpid = get_lpid(rsval);
11138c2ecf20Sopenharmony_ci	is = get_is(rbval);
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ci	/*
11168c2ecf20Sopenharmony_ci	 * These cases are invalid and are not handled:
11178c2ecf20Sopenharmony_ci	 * r   != 1 -> Only radix supported
11188c2ecf20Sopenharmony_ci	 * prs == 1 -> Not HV privileged
11198c2ecf20Sopenharmony_ci	 * ric == 3 -> No cluster bombs for radix
11208c2ecf20Sopenharmony_ci	 * is  == 1 -> Partition scoped translations not associated with pid
11218c2ecf20Sopenharmony_ci	 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
11228c2ecf20Sopenharmony_ci	 */
11238c2ecf20Sopenharmony_ci	if ((!r) || (prs) || (ric == 3) || (is == 1) ||
11248c2ecf20Sopenharmony_ci	    ((!is) && (ric == 1 || ric == 2)))
11258c2ecf20Sopenharmony_ci		return -EINVAL;
11268c2ecf20Sopenharmony_ci
11278c2ecf20Sopenharmony_ci	switch (is) {
11288c2ecf20Sopenharmony_ci	case 0:
11298c2ecf20Sopenharmony_ci		/*
11308c2ecf20Sopenharmony_ci		 * We know ric == 0
11318c2ecf20Sopenharmony_ci		 * Invalidate TLB for a given target address
11328c2ecf20Sopenharmony_ci		 */
11338c2ecf20Sopenharmony_ci		epn = get_epn(rbval);
11348c2ecf20Sopenharmony_ci		ap = get_ap(rbval);
11358c2ecf20Sopenharmony_ci		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
11368c2ecf20Sopenharmony_ci		break;
11378c2ecf20Sopenharmony_ci	case 2:
11388c2ecf20Sopenharmony_ci		/* Invalidate matching LPID */
11398c2ecf20Sopenharmony_ci		gp = kvmhv_get_nested(kvm, lpid, false);
11408c2ecf20Sopenharmony_ci		if (gp) {
11418c2ecf20Sopenharmony_ci			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
11428c2ecf20Sopenharmony_ci			kvmhv_put_nested(gp);
11438c2ecf20Sopenharmony_ci		}
11448c2ecf20Sopenharmony_ci		break;
11458c2ecf20Sopenharmony_ci	case 3:
11468c2ecf20Sopenharmony_ci		/* Invalidate ALL LPIDs */
11478c2ecf20Sopenharmony_ci		kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
11488c2ecf20Sopenharmony_ci		break;
11498c2ecf20Sopenharmony_ci	default:
11508c2ecf20Sopenharmony_ci		ret = -EINVAL;
11518c2ecf20Sopenharmony_ci		break;
11528c2ecf20Sopenharmony_ci	}
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_ci	return ret;
11558c2ecf20Sopenharmony_ci}
11568c2ecf20Sopenharmony_ci
11578c2ecf20Sopenharmony_ci/*
11588c2ecf20Sopenharmony_ci * This handles the H_TLB_INVALIDATE hcall.
11598c2ecf20Sopenharmony_ci * Parameters are (r4) tlbie instruction code, (r5) rS contents,
11608c2ecf20Sopenharmony_ci * (r6) rB contents.
11618c2ecf20Sopenharmony_ci */
11628c2ecf20Sopenharmony_cilong kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
11638c2ecf20Sopenharmony_ci{
11648c2ecf20Sopenharmony_ci	int ret;
11658c2ecf20Sopenharmony_ci
11668c2ecf20Sopenharmony_ci	ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
11678c2ecf20Sopenharmony_ci			kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
11688c2ecf20Sopenharmony_ci	if (ret)
11698c2ecf20Sopenharmony_ci		return H_PARAMETER;
11708c2ecf20Sopenharmony_ci	return H_SUCCESS;
11718c2ecf20Sopenharmony_ci}
11728c2ecf20Sopenharmony_ci
11738c2ecf20Sopenharmony_ci/* Used to convert a nested guest real address to a L1 guest real address */
11748c2ecf20Sopenharmony_cistatic int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
11758c2ecf20Sopenharmony_ci				       struct kvm_nested_guest *gp,
11768c2ecf20Sopenharmony_ci				       unsigned long n_gpa, unsigned long dsisr,
11778c2ecf20Sopenharmony_ci				       struct kvmppc_pte *gpte_p)
11788c2ecf20Sopenharmony_ci{
11798c2ecf20Sopenharmony_ci	u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
11808c2ecf20Sopenharmony_ci	int ret;
11818c2ecf20Sopenharmony_ci
11828c2ecf20Sopenharmony_ci	ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
11838c2ecf20Sopenharmony_ci					 &fault_addr);
11848c2ecf20Sopenharmony_ci
11858c2ecf20Sopenharmony_ci	if (ret) {
11868c2ecf20Sopenharmony_ci		/* We didn't find a pte */
11878c2ecf20Sopenharmony_ci		if (ret == -EINVAL) {
11888c2ecf20Sopenharmony_ci			/* Unsupported mmu config */
11898c2ecf20Sopenharmony_ci			flags |= DSISR_UNSUPP_MMU;
11908c2ecf20Sopenharmony_ci		} else if (ret == -ENOENT) {
11918c2ecf20Sopenharmony_ci			/* No translation found */
11928c2ecf20Sopenharmony_ci			flags |= DSISR_NOHPTE;
11938c2ecf20Sopenharmony_ci		} else if (ret == -EFAULT) {
11948c2ecf20Sopenharmony_ci			/* Couldn't access L1 real address */
11958c2ecf20Sopenharmony_ci			flags |= DSISR_PRTABLE_FAULT;
11968c2ecf20Sopenharmony_ci			vcpu->arch.fault_gpa = fault_addr;
11978c2ecf20Sopenharmony_ci		} else {
11988c2ecf20Sopenharmony_ci			/* Unknown error */
11998c2ecf20Sopenharmony_ci			return ret;
12008c2ecf20Sopenharmony_ci		}
12018c2ecf20Sopenharmony_ci		goto forward_to_l1;
12028c2ecf20Sopenharmony_ci	} else {
12038c2ecf20Sopenharmony_ci		/* We found a pte -> check permissions */
12048c2ecf20Sopenharmony_ci		if (dsisr & DSISR_ISSTORE) {
12058c2ecf20Sopenharmony_ci			/* Can we write? */
12068c2ecf20Sopenharmony_ci			if (!gpte_p->may_write) {
12078c2ecf20Sopenharmony_ci				flags |= DSISR_PROTFAULT;
12088c2ecf20Sopenharmony_ci				goto forward_to_l1;
12098c2ecf20Sopenharmony_ci			}
12108c2ecf20Sopenharmony_ci		} else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
12118c2ecf20Sopenharmony_ci			/* Can we execute? */
12128c2ecf20Sopenharmony_ci			if (!gpte_p->may_execute) {
12138c2ecf20Sopenharmony_ci				flags |= SRR1_ISI_N_G_OR_CIP;
12148c2ecf20Sopenharmony_ci				goto forward_to_l1;
12158c2ecf20Sopenharmony_ci			}
12168c2ecf20Sopenharmony_ci		} else {
12178c2ecf20Sopenharmony_ci			/* Can we read? */
12188c2ecf20Sopenharmony_ci			if (!gpte_p->may_read && !gpte_p->may_write) {
12198c2ecf20Sopenharmony_ci				flags |= DSISR_PROTFAULT;
12208c2ecf20Sopenharmony_ci				goto forward_to_l1;
12218c2ecf20Sopenharmony_ci			}
12228c2ecf20Sopenharmony_ci		}
12238c2ecf20Sopenharmony_ci	}
12248c2ecf20Sopenharmony_ci
12258c2ecf20Sopenharmony_ci	return 0;
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_ciforward_to_l1:
12288c2ecf20Sopenharmony_ci	vcpu->arch.fault_dsisr = flags;
12298c2ecf20Sopenharmony_ci	if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
12308c2ecf20Sopenharmony_ci		vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
12318c2ecf20Sopenharmony_ci		vcpu->arch.shregs.msr |= flags;
12328c2ecf20Sopenharmony_ci	}
12338c2ecf20Sopenharmony_ci	return RESUME_HOST;
12348c2ecf20Sopenharmony_ci}
12358c2ecf20Sopenharmony_ci
12368c2ecf20Sopenharmony_cistatic long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
12378c2ecf20Sopenharmony_ci				       struct kvm_nested_guest *gp,
12388c2ecf20Sopenharmony_ci				       unsigned long n_gpa,
12398c2ecf20Sopenharmony_ci				       struct kvmppc_pte gpte,
12408c2ecf20Sopenharmony_ci				       unsigned long dsisr)
12418c2ecf20Sopenharmony_ci{
12428c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
12438c2ecf20Sopenharmony_ci	bool writing = !!(dsisr & DSISR_ISSTORE);
12448c2ecf20Sopenharmony_ci	u64 pgflags;
12458c2ecf20Sopenharmony_ci	long ret;
12468c2ecf20Sopenharmony_ci
12478c2ecf20Sopenharmony_ci	/* Are the rc bits set in the L1 partition scoped pte? */
12488c2ecf20Sopenharmony_ci	pgflags = _PAGE_ACCESSED;
12498c2ecf20Sopenharmony_ci	if (writing)
12508c2ecf20Sopenharmony_ci		pgflags |= _PAGE_DIRTY;
12518c2ecf20Sopenharmony_ci	if (pgflags & ~gpte.rc)
12528c2ecf20Sopenharmony_ci		return RESUME_HOST;
12538c2ecf20Sopenharmony_ci
12548c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
12558c2ecf20Sopenharmony_ci	/* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
12568c2ecf20Sopenharmony_ci	ret = kvmppc_hv_handle_set_rc(kvm, false, writing,
12578c2ecf20Sopenharmony_ci				      gpte.raddr, kvm->arch.lpid);
12588c2ecf20Sopenharmony_ci	if (!ret) {
12598c2ecf20Sopenharmony_ci		ret = -EINVAL;
12608c2ecf20Sopenharmony_ci		goto out_unlock;
12618c2ecf20Sopenharmony_ci	}
12628c2ecf20Sopenharmony_ci
12638c2ecf20Sopenharmony_ci	/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
12648c2ecf20Sopenharmony_ci	ret = kvmppc_hv_handle_set_rc(kvm, true, writing,
12658c2ecf20Sopenharmony_ci				      n_gpa, gp->l1_lpid);
12668c2ecf20Sopenharmony_ci	if (!ret)
12678c2ecf20Sopenharmony_ci		ret = -EINVAL;
12688c2ecf20Sopenharmony_ci	else
12698c2ecf20Sopenharmony_ci		ret = 0;
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_ciout_unlock:
12728c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
12738c2ecf20Sopenharmony_ci	return ret;
12748c2ecf20Sopenharmony_ci}
12758c2ecf20Sopenharmony_ci
12768c2ecf20Sopenharmony_cistatic inline int kvmppc_radix_level_to_shift(int level)
12778c2ecf20Sopenharmony_ci{
12788c2ecf20Sopenharmony_ci	switch (level) {
12798c2ecf20Sopenharmony_ci	case 2:
12808c2ecf20Sopenharmony_ci		return PUD_SHIFT;
12818c2ecf20Sopenharmony_ci	case 1:
12828c2ecf20Sopenharmony_ci		return PMD_SHIFT;
12838c2ecf20Sopenharmony_ci	default:
12848c2ecf20Sopenharmony_ci		return PAGE_SHIFT;
12858c2ecf20Sopenharmony_ci	}
12868c2ecf20Sopenharmony_ci}
12878c2ecf20Sopenharmony_ci
12888c2ecf20Sopenharmony_cistatic inline int kvmppc_radix_shift_to_level(int shift)
12898c2ecf20Sopenharmony_ci{
12908c2ecf20Sopenharmony_ci	if (shift == PUD_SHIFT)
12918c2ecf20Sopenharmony_ci		return 2;
12928c2ecf20Sopenharmony_ci	if (shift == PMD_SHIFT)
12938c2ecf20Sopenharmony_ci		return 1;
12948c2ecf20Sopenharmony_ci	if (shift == PAGE_SHIFT)
12958c2ecf20Sopenharmony_ci		return 0;
12968c2ecf20Sopenharmony_ci	WARN_ON_ONCE(1);
12978c2ecf20Sopenharmony_ci	return 0;
12988c2ecf20Sopenharmony_ci}
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_ci/* called with gp->tlb_lock held */
13018c2ecf20Sopenharmony_cistatic long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
13028c2ecf20Sopenharmony_ci					  struct kvm_nested_guest *gp)
13038c2ecf20Sopenharmony_ci{
13048c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
13058c2ecf20Sopenharmony_ci	struct kvm_memory_slot *memslot;
13068c2ecf20Sopenharmony_ci	struct rmap_nested *n_rmap;
13078c2ecf20Sopenharmony_ci	struct kvmppc_pte gpte;
13088c2ecf20Sopenharmony_ci	pte_t pte, *pte_p;
13098c2ecf20Sopenharmony_ci	unsigned long mmu_seq;
13108c2ecf20Sopenharmony_ci	unsigned long dsisr = vcpu->arch.fault_dsisr;
13118c2ecf20Sopenharmony_ci	unsigned long ea = vcpu->arch.fault_dar;
13128c2ecf20Sopenharmony_ci	unsigned long *rmapp;
13138c2ecf20Sopenharmony_ci	unsigned long n_gpa, gpa, gfn, perm = 0UL;
13148c2ecf20Sopenharmony_ci	unsigned int shift, l1_shift, level;
13158c2ecf20Sopenharmony_ci	bool writing = !!(dsisr & DSISR_ISSTORE);
13168c2ecf20Sopenharmony_ci	bool kvm_ro = false;
13178c2ecf20Sopenharmony_ci	long int ret;
13188c2ecf20Sopenharmony_ci
13198c2ecf20Sopenharmony_ci	if (!gp->l1_gr_to_hr) {
13208c2ecf20Sopenharmony_ci		kvmhv_update_ptbl_cache(gp);
13218c2ecf20Sopenharmony_ci		if (!gp->l1_gr_to_hr)
13228c2ecf20Sopenharmony_ci			return RESUME_HOST;
13238c2ecf20Sopenharmony_ci	}
13248c2ecf20Sopenharmony_ci
13258c2ecf20Sopenharmony_ci	/* Convert the nested guest real address into a L1 guest real address */
13268c2ecf20Sopenharmony_ci
13278c2ecf20Sopenharmony_ci	n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
13288c2ecf20Sopenharmony_ci	if (!(dsisr & DSISR_PRTABLE_FAULT))
13298c2ecf20Sopenharmony_ci		n_gpa |= ea & 0xFFF;
13308c2ecf20Sopenharmony_ci	ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
13318c2ecf20Sopenharmony_ci
13328c2ecf20Sopenharmony_ci	/*
13338c2ecf20Sopenharmony_ci	 * If the hardware found a translation but we don't now have a usable
13348c2ecf20Sopenharmony_ci	 * translation in the l1 partition-scoped tree, remove the shadow pte
13358c2ecf20Sopenharmony_ci	 * and let the guest retry.
13368c2ecf20Sopenharmony_ci	 */
13378c2ecf20Sopenharmony_ci	if (ret == RESUME_HOST &&
13388c2ecf20Sopenharmony_ci	    (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
13398c2ecf20Sopenharmony_ci		      DSISR_BAD_COPYPASTE)))
13408c2ecf20Sopenharmony_ci		goto inval;
13418c2ecf20Sopenharmony_ci	if (ret)
13428c2ecf20Sopenharmony_ci		return ret;
13438c2ecf20Sopenharmony_ci
13448c2ecf20Sopenharmony_ci	/* Failed to set the reference/change bits */
13458c2ecf20Sopenharmony_ci	if (dsisr & DSISR_SET_RC) {
13468c2ecf20Sopenharmony_ci		ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
13478c2ecf20Sopenharmony_ci		if (ret == RESUME_HOST)
13488c2ecf20Sopenharmony_ci			return ret;
13498c2ecf20Sopenharmony_ci		if (ret)
13508c2ecf20Sopenharmony_ci			goto inval;
13518c2ecf20Sopenharmony_ci		dsisr &= ~DSISR_SET_RC;
13528c2ecf20Sopenharmony_ci		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
13538c2ecf20Sopenharmony_ci			       DSISR_PROTFAULT)))
13548c2ecf20Sopenharmony_ci			return RESUME_GUEST;
13558c2ecf20Sopenharmony_ci	}
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_ci	/*
13588c2ecf20Sopenharmony_ci	 * We took an HISI or HDSI while we were running a nested guest which
13598c2ecf20Sopenharmony_ci	 * means we have no partition scoped translation for that. This means
13608c2ecf20Sopenharmony_ci	 * we need to insert a pte for the mapping into our shadow_pgtable.
13618c2ecf20Sopenharmony_ci	 */
13628c2ecf20Sopenharmony_ci
13638c2ecf20Sopenharmony_ci	l1_shift = gpte.page_shift;
13648c2ecf20Sopenharmony_ci	if (l1_shift < PAGE_SHIFT) {
13658c2ecf20Sopenharmony_ci		/* We don't support l1 using a page size smaller than our own */
13668c2ecf20Sopenharmony_ci		pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
13678c2ecf20Sopenharmony_ci			l1_shift, PAGE_SHIFT);
13688c2ecf20Sopenharmony_ci		return -EINVAL;
13698c2ecf20Sopenharmony_ci	}
13708c2ecf20Sopenharmony_ci	gpa = gpte.raddr;
13718c2ecf20Sopenharmony_ci	gfn = gpa >> PAGE_SHIFT;
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_ci	/* 1. Get the corresponding host memslot */
13748c2ecf20Sopenharmony_ci
13758c2ecf20Sopenharmony_ci	memslot = gfn_to_memslot(kvm, gfn);
13768c2ecf20Sopenharmony_ci	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
13778c2ecf20Sopenharmony_ci		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
13788c2ecf20Sopenharmony_ci			/* unusual error -> reflect to the guest as a DSI */
13798c2ecf20Sopenharmony_ci			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
13808c2ecf20Sopenharmony_ci			return RESUME_GUEST;
13818c2ecf20Sopenharmony_ci		}
13828c2ecf20Sopenharmony_ci
13838c2ecf20Sopenharmony_ci		/* passthrough of emulated MMIO case */
13848c2ecf20Sopenharmony_ci		return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
13858c2ecf20Sopenharmony_ci	}
13868c2ecf20Sopenharmony_ci	if (memslot->flags & KVM_MEM_READONLY) {
13878c2ecf20Sopenharmony_ci		if (writing) {
13888c2ecf20Sopenharmony_ci			/* Give the guest a DSI */
13898c2ecf20Sopenharmony_ci			kvmppc_core_queue_data_storage(vcpu, ea,
13908c2ecf20Sopenharmony_ci					DSISR_ISSTORE | DSISR_PROTFAULT);
13918c2ecf20Sopenharmony_ci			return RESUME_GUEST;
13928c2ecf20Sopenharmony_ci		}
13938c2ecf20Sopenharmony_ci		kvm_ro = true;
13948c2ecf20Sopenharmony_ci	}
13958c2ecf20Sopenharmony_ci
13968c2ecf20Sopenharmony_ci	/* 2. Find the host pte for this L1 guest real address */
13978c2ecf20Sopenharmony_ci
13988c2ecf20Sopenharmony_ci	/* Used to check for invalidations in progress */
13998c2ecf20Sopenharmony_ci	mmu_seq = kvm->mmu_notifier_seq;
14008c2ecf20Sopenharmony_ci	smp_rmb();
14018c2ecf20Sopenharmony_ci
14028c2ecf20Sopenharmony_ci	/* See if can find translation in our partition scoped tables for L1 */
14038c2ecf20Sopenharmony_ci	pte = __pte(0);
14048c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
14058c2ecf20Sopenharmony_ci	pte_p = find_kvm_secondary_pte(kvm, gpa, &shift);
14068c2ecf20Sopenharmony_ci	if (!shift)
14078c2ecf20Sopenharmony_ci		shift = PAGE_SHIFT;
14088c2ecf20Sopenharmony_ci	if (pte_p)
14098c2ecf20Sopenharmony_ci		pte = *pte_p;
14108c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_ci	if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
14138c2ecf20Sopenharmony_ci		/* No suitable pte found -> try to insert a mapping */
14148c2ecf20Sopenharmony_ci		ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
14158c2ecf20Sopenharmony_ci					writing, kvm_ro, &pte, &level);
14168c2ecf20Sopenharmony_ci		if (ret == -EAGAIN)
14178c2ecf20Sopenharmony_ci			return RESUME_GUEST;
14188c2ecf20Sopenharmony_ci		else if (ret)
14198c2ecf20Sopenharmony_ci			return ret;
14208c2ecf20Sopenharmony_ci		shift = kvmppc_radix_level_to_shift(level);
14218c2ecf20Sopenharmony_ci	}
14228c2ecf20Sopenharmony_ci	/* Align gfn to the start of the page */
14238c2ecf20Sopenharmony_ci	gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_ci	/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
14268c2ecf20Sopenharmony_ci
14278c2ecf20Sopenharmony_ci	/* The permissions is the combination of the host and l1 guest ptes */
14288c2ecf20Sopenharmony_ci	perm |= gpte.may_read ? 0UL : _PAGE_READ;
14298c2ecf20Sopenharmony_ci	perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
14308c2ecf20Sopenharmony_ci	perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
14318c2ecf20Sopenharmony_ci	/* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
14328c2ecf20Sopenharmony_ci	perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
14338c2ecf20Sopenharmony_ci	perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
14348c2ecf20Sopenharmony_ci	pte = __pte(pte_val(pte) & ~perm);
14358c2ecf20Sopenharmony_ci
14368c2ecf20Sopenharmony_ci	/* What size pte can we insert? */
14378c2ecf20Sopenharmony_ci	if (shift > l1_shift) {
14388c2ecf20Sopenharmony_ci		u64 mask;
14398c2ecf20Sopenharmony_ci		unsigned int actual_shift = PAGE_SHIFT;
14408c2ecf20Sopenharmony_ci		if (PMD_SHIFT < l1_shift)
14418c2ecf20Sopenharmony_ci			actual_shift = PMD_SHIFT;
14428c2ecf20Sopenharmony_ci		mask = (1UL << shift) - (1UL << actual_shift);
14438c2ecf20Sopenharmony_ci		pte = __pte(pte_val(pte) | (gpa & mask));
14448c2ecf20Sopenharmony_ci		shift = actual_shift;
14458c2ecf20Sopenharmony_ci	}
14468c2ecf20Sopenharmony_ci	level = kvmppc_radix_shift_to_level(shift);
14478c2ecf20Sopenharmony_ci	n_gpa &= ~((1UL << shift) - 1);
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_ci	/* 4. Insert the pte into our shadow_pgtable */
14508c2ecf20Sopenharmony_ci
14518c2ecf20Sopenharmony_ci	n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
14528c2ecf20Sopenharmony_ci	if (!n_rmap)
14538c2ecf20Sopenharmony_ci		return RESUME_GUEST; /* Let the guest try again */
14548c2ecf20Sopenharmony_ci	n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
14558c2ecf20Sopenharmony_ci		(((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
14568c2ecf20Sopenharmony_ci	rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
14578c2ecf20Sopenharmony_ci	ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
14588c2ecf20Sopenharmony_ci				mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
14598c2ecf20Sopenharmony_ci	kfree(n_rmap);
14608c2ecf20Sopenharmony_ci	if (ret == -EAGAIN)
14618c2ecf20Sopenharmony_ci		ret = RESUME_GUEST;	/* Let the guest try again */
14628c2ecf20Sopenharmony_ci
14638c2ecf20Sopenharmony_ci	return ret;
14648c2ecf20Sopenharmony_ci
14658c2ecf20Sopenharmony_ci inval:
14668c2ecf20Sopenharmony_ci	kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
14678c2ecf20Sopenharmony_ci	return RESUME_GUEST;
14688c2ecf20Sopenharmony_ci}
14698c2ecf20Sopenharmony_ci
14708c2ecf20Sopenharmony_cilong int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
14718c2ecf20Sopenharmony_ci{
14728c2ecf20Sopenharmony_ci	struct kvm_nested_guest *gp = vcpu->arch.nested;
14738c2ecf20Sopenharmony_ci	long int ret;
14748c2ecf20Sopenharmony_ci
14758c2ecf20Sopenharmony_ci	mutex_lock(&gp->tlb_lock);
14768c2ecf20Sopenharmony_ci	ret = __kvmhv_nested_page_fault(vcpu, gp);
14778c2ecf20Sopenharmony_ci	mutex_unlock(&gp->tlb_lock);
14788c2ecf20Sopenharmony_ci	return ret;
14798c2ecf20Sopenharmony_ci}
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_ciint kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
14828c2ecf20Sopenharmony_ci{
14838c2ecf20Sopenharmony_ci	int ret = -1;
14848c2ecf20Sopenharmony_ci
14858c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
14868c2ecf20Sopenharmony_ci	while (++lpid <= kvm->arch.max_nested_lpid) {
14878c2ecf20Sopenharmony_ci		if (kvm->arch.nested_guests[lpid]) {
14888c2ecf20Sopenharmony_ci			ret = lpid;
14898c2ecf20Sopenharmony_ci			break;
14908c2ecf20Sopenharmony_ci		}
14918c2ecf20Sopenharmony_ci	}
14928c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
14938c2ecf20Sopenharmony_ci	return ret;
14948c2ecf20Sopenharmony_ci}
1495