18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/highmem.h>
78c2ecf20Sopenharmony_ci#include <linux/hugetlb.h>
88c2ecf20Sopenharmony_ci#include <linux/page-flags.h>
98c2ecf20Sopenharmony_ci#include <linux/kvm_host.h>
108c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
118c2ecf20Sopenharmony_ci#include <asm/mmu_context.h>
128c2ecf20Sopenharmony_ci#include <asm/pgalloc.h>
138c2ecf20Sopenharmony_ci#include <asm/tlb.h>
148c2ecf20Sopenharmony_ci#include "kvm_compat.h"
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci/*
178c2ecf20Sopenharmony_ci * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
188c2ecf20Sopenharmony_ci * for which pages need to be cached.
198c2ecf20Sopenharmony_ci */
208c2ecf20Sopenharmony_ci#if defined(__PAGETABLE_PMD_FOLDED)
218c2ecf20Sopenharmony_ci#define KVM_MMU_CACHE_MIN_PAGES 1
228c2ecf20Sopenharmony_ci#else
238c2ecf20Sopenharmony_ci#define KVM_MMU_CACHE_MIN_PAGES 2
248c2ecf20Sopenharmony_ci#endif
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_cistatic inline int kvm_pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; }
278c2ecf20Sopenharmony_cistatic inline pte_t kvm_pte_mksmall(pte_t pte)
288c2ecf20Sopenharmony_ci{
298c2ecf20Sopenharmony_ci	pte_val(pte) &= ~_PAGE_HUGE;
308c2ecf20Sopenharmony_ci	return pte;
318c2ecf20Sopenharmony_ci}
328c2ecf20Sopenharmony_cistatic inline void kvm_set_pte(pte_t *ptep, pte_t val)
338c2ecf20Sopenharmony_ci{
348c2ecf20Sopenharmony_ci	WRITE_ONCE(*ptep, val);
358c2ecf20Sopenharmony_ci}
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_cistatic int kvm_tlb_flush_gpa(struct kvm_vcpu *vcpu, unsigned long gpa)
388c2ecf20Sopenharmony_ci{
398c2ecf20Sopenharmony_ci	preempt_disable();
408c2ecf20Sopenharmony_ci	gpa &= (PAGE_MASK << 1);
418c2ecf20Sopenharmony_ci	invtlb(INVTLB_GID_ADDR, kvm_read_csr_gstat() & KVM_GSTAT_GID, gpa);
428c2ecf20Sopenharmony_ci	preempt_enable();
438c2ecf20Sopenharmony_ci	return 0;
448c2ecf20Sopenharmony_ci}
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_cistatic inline pmd_t kvm_pmd_mkhuge(pmd_t pmd)
478c2ecf20Sopenharmony_ci{
488c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
498c2ecf20Sopenharmony_ci	return pmd_mkhuge(pmd);
508c2ecf20Sopenharmony_ci#elif defined(CONFIG_HUGETLB_PAGE)
518c2ecf20Sopenharmony_ci	pte_t entry;
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	pte_val(entry) = pmd_val(pmd);
548c2ecf20Sopenharmony_ci	entry = pte_mkhuge(entry);
558c2ecf20Sopenharmony_ci	pmd_val(pmd) = pte_val(entry);
568c2ecf20Sopenharmony_ci#endif
578c2ecf20Sopenharmony_ci	return pmd;
588c2ecf20Sopenharmony_ci}
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_cistatic inline pmd_t kvm_pmd_mkclean(pmd_t pmd)
618c2ecf20Sopenharmony_ci{
628c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
638c2ecf20Sopenharmony_ci	return pmd_mkclean(pmd);
648c2ecf20Sopenharmony_ci#elif defined(CONFIG_HUGETLB_PAGE)
658c2ecf20Sopenharmony_ci	pte_t entry;
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	pte_val(entry) = pmd_val(pmd);
688c2ecf20Sopenharmony_ci	entry = pte_mkclean(entry);
698c2ecf20Sopenharmony_ci	pmd_val(pmd) = pte_val(entry);
708c2ecf20Sopenharmony_ci#endif
718c2ecf20Sopenharmony_ci	return pmd;
728c2ecf20Sopenharmony_ci}
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_cistatic inline pmd_t kvm_pmd_mkold(pmd_t pmd)
758c2ecf20Sopenharmony_ci{
768c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
778c2ecf20Sopenharmony_ci	return pmd_mkold(pmd);
788c2ecf20Sopenharmony_ci#elif defined(CONFIG_HUGETLB_PAGE)
798c2ecf20Sopenharmony_ci	pte_t entry;
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci	pte_val(entry) = pmd_val(pmd);
828c2ecf20Sopenharmony_ci	entry = pte_mkold(entry);
838c2ecf20Sopenharmony_ci	pmd_val(pmd) = pte_val(entry);
848c2ecf20Sopenharmony_ci#endif
858c2ecf20Sopenharmony_ci	return pmd;
868c2ecf20Sopenharmony_ci}
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_civoid kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
898c2ecf20Sopenharmony_ci{
908c2ecf20Sopenharmony_ci	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
918c2ecf20Sopenharmony_ci}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci/**
948c2ecf20Sopenharmony_ci * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
958c2ecf20Sopenharmony_ci *
968c2ecf20Sopenharmony_ci * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
978c2ecf20Sopenharmony_ci * to host physical page mappings.
988c2ecf20Sopenharmony_ci *
998c2ecf20Sopenharmony_ci * Returns:	Pointer to new KVM GPA page directory.
1008c2ecf20Sopenharmony_ci *		NULL on allocation failure.
1018c2ecf20Sopenharmony_ci */
1028c2ecf20Sopenharmony_cipgd_t *kvm_pgd_alloc(void)
1038c2ecf20Sopenharmony_ci{
1048c2ecf20Sopenharmony_ci	pgd_t *ret;
1058c2ecf20Sopenharmony_ci	struct page *page;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	page = alloc_pages(GFP_KERNEL, 0);
1088c2ecf20Sopenharmony_ci	if (!page)
1098c2ecf20Sopenharmony_ci		return NULL;
1108c2ecf20Sopenharmony_ci	ret = (pgd_t *) page_address(page);
1118c2ecf20Sopenharmony_ci	if (ret)
1128c2ecf20Sopenharmony_ci		pgd_init((unsigned long)ret);
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	return ret;
1158c2ecf20Sopenharmony_ci}
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci/**
1188c2ecf20Sopenharmony_ci * kvm_walk_pgd() - Walk page table with optional allocation.
1198c2ecf20Sopenharmony_ci * @pgd:	Page directory pointer.
1208c2ecf20Sopenharmony_ci * @addr:	Address to index page table using.
1218c2ecf20Sopenharmony_ci * @cache:	MMU page cache to allocate new page tables from, or NULL.
1228c2ecf20Sopenharmony_ci *
1238c2ecf20Sopenharmony_ci * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
1248c2ecf20Sopenharmony_ci * address @addr. If page tables don't exist for @addr, they will be created
1258c2ecf20Sopenharmony_ci * from the MMU cache if @cache is not NULL.
1268c2ecf20Sopenharmony_ci *
1278c2ecf20Sopenharmony_ci * Returns:	Pointer to pte_t corresponding to @addr.
1288c2ecf20Sopenharmony_ci *		NULL if a page table doesn't exist for @addr and !@cache.
1298c2ecf20Sopenharmony_ci *		NULL if a page table allocation failed.
1308c2ecf20Sopenharmony_ci */
1318c2ecf20Sopenharmony_cistatic pte_t *kvm_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
1328c2ecf20Sopenharmony_ci				unsigned long addr)
1338c2ecf20Sopenharmony_ci{
1348c2ecf20Sopenharmony_ci	p4d_t *p4d;
1358c2ecf20Sopenharmony_ci	pud_t *pud;
1368c2ecf20Sopenharmony_ci	pmd_t *pmd;
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	pgd += pgd_index(addr);
1398c2ecf20Sopenharmony_ci	if (pgd_none(*pgd)) {
1408c2ecf20Sopenharmony_ci		/* Not used yet */
1418c2ecf20Sopenharmony_ci		BUG();
1428c2ecf20Sopenharmony_ci		return NULL;
1438c2ecf20Sopenharmony_ci	}
1448c2ecf20Sopenharmony_ci	p4d = p4d_offset(pgd, addr);
1458c2ecf20Sopenharmony_ci	pud = pud_offset(p4d, addr);
1468c2ecf20Sopenharmony_ci	if (pud_none(*pud)) {
1478c2ecf20Sopenharmony_ci		pmd_t *new_pmd;
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci		if (!cache)
1508c2ecf20Sopenharmony_ci			return NULL;
1518c2ecf20Sopenharmony_ci		new_pmd = kvm_mmu_memory_cache_alloc(cache);
1528c2ecf20Sopenharmony_ci		pmd_init((unsigned long)new_pmd,
1538c2ecf20Sopenharmony_ci			 (unsigned long)invalid_pte_table);
1548c2ecf20Sopenharmony_ci		pud_populate(NULL, pud, new_pmd);
1558c2ecf20Sopenharmony_ci	}
1568c2ecf20Sopenharmony_ci	pmd = pmd_offset(pud, addr);
1578c2ecf20Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE
1588c2ecf20Sopenharmony_ci	if (pmd_huge(*pmd)) {
1598c2ecf20Sopenharmony_ci		return (pte_t *)pmd;
1608c2ecf20Sopenharmony_ci	}
1618c2ecf20Sopenharmony_ci#endif
1628c2ecf20Sopenharmony_ci	if (pmd_none(*pmd)) {
1638c2ecf20Sopenharmony_ci		pte_t *new_pte;
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci		if (!cache)
1668c2ecf20Sopenharmony_ci			return NULL;
1678c2ecf20Sopenharmony_ci		new_pte = kvm_mmu_memory_cache_alloc(cache);
1688c2ecf20Sopenharmony_ci		clear_page(new_pte);
1698c2ecf20Sopenharmony_ci		pmd_populate_kernel(NULL, pmd, new_pte);
1708c2ecf20Sopenharmony_ci	}
1718c2ecf20Sopenharmony_ci	return pte_offset_kernel(pmd, addr);
1728c2ecf20Sopenharmony_ci}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci/* Caller must hold kvm->mm_lock */
1758c2ecf20Sopenharmony_cistatic pte_t *kvm_pte_for_gpa(struct kvm *kvm,
1768c2ecf20Sopenharmony_ci				struct kvm_mmu_memory_cache *cache,
1778c2ecf20Sopenharmony_ci				unsigned long addr)
1788c2ecf20Sopenharmony_ci{
1798c2ecf20Sopenharmony_ci	return kvm_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
1808c2ecf20Sopenharmony_ci}
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci/*
1838c2ecf20Sopenharmony_ci * kvm_flush_gpa_{pte,pmd,pud,pgd,pt}.
1848c2ecf20Sopenharmony_ci * Flush a range of guest physical address space from the VM's GPA page tables.
1858c2ecf20Sopenharmony_ci */
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_cistatic bool kvm_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
1888c2ecf20Sopenharmony_ci				   unsigned long end_gpa, unsigned long *data)
1898c2ecf20Sopenharmony_ci{
1908c2ecf20Sopenharmony_ci	int i_min = pte_index(start_gpa);
1918c2ecf20Sopenharmony_ci	int i_max = pte_index(end_gpa);
1928c2ecf20Sopenharmony_ci	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
1938c2ecf20Sopenharmony_ci	int i;
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i) {
1968c2ecf20Sopenharmony_ci		if (!pte_present(pte[i]))
1978c2ecf20Sopenharmony_ci			continue;
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci		set_pte(pte + i, __pte(0));
2008c2ecf20Sopenharmony_ci		if (data)
2018c2ecf20Sopenharmony_ci			*data = *data + 1;
2028c2ecf20Sopenharmony_ci	}
2038c2ecf20Sopenharmony_ci	return safe_to_remove;
2048c2ecf20Sopenharmony_ci}
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_cistatic bool kvm_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
2078c2ecf20Sopenharmony_ci				   unsigned long end_gpa, unsigned long *data)
2088c2ecf20Sopenharmony_ci{
2098c2ecf20Sopenharmony_ci	pte_t *pte;
2108c2ecf20Sopenharmony_ci	unsigned long end = ~0ul;
2118c2ecf20Sopenharmony_ci	int i_min = pmd_index(start_gpa);
2128c2ecf20Sopenharmony_ci	int i_max = pmd_index(end_gpa);
2138c2ecf20Sopenharmony_ci	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
2148c2ecf20Sopenharmony_ci	int i;
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
2178c2ecf20Sopenharmony_ci		if (!pmd_present(pmd[i]))
2188c2ecf20Sopenharmony_ci			continue;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci		if (pmd_huge(pmd[i]) && pmd_present(pmd[i])) {
2218c2ecf20Sopenharmony_ci			pmd_clear(pmd + i);
2228c2ecf20Sopenharmony_ci			if (data)
2238c2ecf20Sopenharmony_ci				*data += PTRS_PER_PMD;
2248c2ecf20Sopenharmony_ci			continue;
2258c2ecf20Sopenharmony_ci		}
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci		pte = pte_offset_kernel(pmd + i, 0);
2288c2ecf20Sopenharmony_ci		if (i == i_max)
2298c2ecf20Sopenharmony_ci			end = end_gpa;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci		if (kvm_flush_gpa_pte(pte, start_gpa, end, data)) {
2328c2ecf20Sopenharmony_ci			pmd_clear(pmd + i);
2338c2ecf20Sopenharmony_ci			pte_free_kernel(NULL, pte);
2348c2ecf20Sopenharmony_ci		} else {
2358c2ecf20Sopenharmony_ci			safe_to_remove = false;
2368c2ecf20Sopenharmony_ci		}
2378c2ecf20Sopenharmony_ci	}
2388c2ecf20Sopenharmony_ci	return safe_to_remove;
2398c2ecf20Sopenharmony_ci}
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_cistatic bool kvm_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
2428c2ecf20Sopenharmony_ci				   unsigned long end_gpa, unsigned long *data)
2438c2ecf20Sopenharmony_ci{
2448c2ecf20Sopenharmony_ci	pmd_t *pmd;
2458c2ecf20Sopenharmony_ci	unsigned long end = ~0ul;
2468c2ecf20Sopenharmony_ci	int i_min = pud_index(start_gpa);
2478c2ecf20Sopenharmony_ci	int i_max = pud_index(end_gpa);
2488c2ecf20Sopenharmony_ci	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
2498c2ecf20Sopenharmony_ci	int i;
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
2528c2ecf20Sopenharmony_ci		if (!pud_present(pud[i]))
2538c2ecf20Sopenharmony_ci			continue;
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci		pmd = pmd_offset(pud + i, 0);
2568c2ecf20Sopenharmony_ci		if (i == i_max)
2578c2ecf20Sopenharmony_ci			end = end_gpa;
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci		if (kvm_flush_gpa_pmd(pmd, start_gpa, end, data)) {
2608c2ecf20Sopenharmony_ci			pud_clear(pud + i);
2618c2ecf20Sopenharmony_ci			pmd_free(NULL, pmd);
2628c2ecf20Sopenharmony_ci		} else {
2638c2ecf20Sopenharmony_ci			safe_to_remove = false;
2648c2ecf20Sopenharmony_ci		}
2658c2ecf20Sopenharmony_ci	}
2668c2ecf20Sopenharmony_ci	return safe_to_remove;
2678c2ecf20Sopenharmony_ci}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_cistatic bool kvm_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
2708c2ecf20Sopenharmony_ci				unsigned long end_gpa, unsigned long *data)
2718c2ecf20Sopenharmony_ci{
2728c2ecf20Sopenharmony_ci	p4d_t *p4d;
2738c2ecf20Sopenharmony_ci	pud_t *pud;
2748c2ecf20Sopenharmony_ci	unsigned long end = ~0ul;
2758c2ecf20Sopenharmony_ci	int i_min = pgd_index(start_gpa);
2768c2ecf20Sopenharmony_ci	int i_max = pgd_index(end_gpa);
2778c2ecf20Sopenharmony_ci	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
2788c2ecf20Sopenharmony_ci	int i;
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
2818c2ecf20Sopenharmony_ci		if (!pgd_present(pgd[i]))
2828c2ecf20Sopenharmony_ci			continue;
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci		p4d = p4d_offset(pgd, 0);
2858c2ecf20Sopenharmony_ci		pud = pud_offset(p4d + i, 0);
2868c2ecf20Sopenharmony_ci		if (i == i_max)
2878c2ecf20Sopenharmony_ci			end = end_gpa;
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci		if (kvm_flush_gpa_pud(pud, start_gpa, end, data)) {
2908c2ecf20Sopenharmony_ci			pgd_clear(pgd + i);
2918c2ecf20Sopenharmony_ci			pud_free(NULL, pud);
2928c2ecf20Sopenharmony_ci		} else {
2938c2ecf20Sopenharmony_ci			safe_to_remove = false;
2948c2ecf20Sopenharmony_ci		}
2958c2ecf20Sopenharmony_ci	}
2968c2ecf20Sopenharmony_ci	return safe_to_remove;
2978c2ecf20Sopenharmony_ci}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci/**
3008c2ecf20Sopenharmony_ci * kvm_flush_gpa_pt() - Flush a range of guest physical addresses.
3018c2ecf20Sopenharmony_ci * @kvm:	KVM pointer.
3028c2ecf20Sopenharmony_ci * @start_gfn:	Guest frame number of first page in GPA range to flush.
3038c2ecf20Sopenharmony_ci * @end_gfn:	Guest frame number of last page in GPA range to flush.
3048c2ecf20Sopenharmony_ci *
3058c2ecf20Sopenharmony_ci * Flushes a range of GPA mappings from the GPA page tables.
3068c2ecf20Sopenharmony_ci *
3078c2ecf20Sopenharmony_ci * The caller must hold the @kvm->mmu_lock spinlock.
3088c2ecf20Sopenharmony_ci *
3098c2ecf20Sopenharmony_ci * Returns:	Whether its safe to remove the top level page directory because
3108c2ecf20Sopenharmony_ci *		all lower levels have been removed.
3118c2ecf20Sopenharmony_ci */
3128c2ecf20Sopenharmony_cistatic bool kvm_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn, void *data)
3138c2ecf20Sopenharmony_ci{
3148c2ecf20Sopenharmony_ci	return kvm_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
3158c2ecf20Sopenharmony_ci				start_gfn << PAGE_SHIFT,
3168c2ecf20Sopenharmony_ci				end_gfn << PAGE_SHIFT, (unsigned long *)data);
3178c2ecf20Sopenharmony_ci}
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci/*
3208c2ecf20Sopenharmony_ci * kvm_mkclean_gpa_pt.
3218c2ecf20Sopenharmony_ci * Mark a range of guest physical address space clean (writes fault) in the VM's
3228c2ecf20Sopenharmony_ci * GPA page table to allow dirty page tracking.
3238c2ecf20Sopenharmony_ci */
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_cistatic int kvm_mkclean_pte(pte_t *pte, unsigned long start, unsigned long end)
3268c2ecf20Sopenharmony_ci{
3278c2ecf20Sopenharmony_ci	int ret = 0;
3288c2ecf20Sopenharmony_ci	int i_min = pte_index(start);
3298c2ecf20Sopenharmony_ci	int i_max = pte_index(end);
3308c2ecf20Sopenharmony_ci	int i;
3318c2ecf20Sopenharmony_ci	pte_t val;
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i) {
3348c2ecf20Sopenharmony_ci		val = pte[i];
3358c2ecf20Sopenharmony_ci		if (pte_present(val) && pte_dirty(val)) {
3368c2ecf20Sopenharmony_ci			set_pte(pte + i, pte_mkclean(val));
3378c2ecf20Sopenharmony_ci			ret = 1;
3388c2ecf20Sopenharmony_ci		}
3398c2ecf20Sopenharmony_ci	}
3408c2ecf20Sopenharmony_ci	return ret;
3418c2ecf20Sopenharmony_ci}
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_cistatic int kvm_mkclean_pmd(pmd_t *pmd, unsigned long start, unsigned long end)
3448c2ecf20Sopenharmony_ci{
3458c2ecf20Sopenharmony_ci	int ret = 0;
3468c2ecf20Sopenharmony_ci	pte_t *pte;
3478c2ecf20Sopenharmony_ci	unsigned long cur_end = ~0ul;
3488c2ecf20Sopenharmony_ci	int i_min = pmd_index(start);
3498c2ecf20Sopenharmony_ci	int i_max = pmd_index(end);
3508c2ecf20Sopenharmony_ci	int i;
3518c2ecf20Sopenharmony_ci	pmd_t old, new;
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start = 0) {
3548c2ecf20Sopenharmony_ci		if (!pmd_present(pmd[i]))
3558c2ecf20Sopenharmony_ci			continue;
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci		if (pmd_huge(pmd[i])) {
3588c2ecf20Sopenharmony_ci			old = pmd[i];
3598c2ecf20Sopenharmony_ci			new = kvm_pmd_mkclean(old);
3608c2ecf20Sopenharmony_ci			if (pmd_val(new) == pmd_val(old))
3618c2ecf20Sopenharmony_ci				continue;
3628c2ecf20Sopenharmony_ci			set_pmd(pmd + i, new);
3638c2ecf20Sopenharmony_ci			ret = 1;
3648c2ecf20Sopenharmony_ci			continue;
3658c2ecf20Sopenharmony_ci		}
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci		pte = pte_offset_kernel(pmd + i, 0);
3688c2ecf20Sopenharmony_ci		if (i == i_max)
3698c2ecf20Sopenharmony_ci			cur_end = end;
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci		ret |= kvm_mkclean_pte(pte, start, cur_end);
3728c2ecf20Sopenharmony_ci	}
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	return ret;
3758c2ecf20Sopenharmony_ci}
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_cistatic int kvm_mkclean_pud(pud_t *pud, unsigned long start, unsigned long end)
3788c2ecf20Sopenharmony_ci{
3798c2ecf20Sopenharmony_ci	int ret = 0;
3808c2ecf20Sopenharmony_ci	pmd_t *pmd;
3818c2ecf20Sopenharmony_ci	unsigned long cur_end = ~0ul;
3828c2ecf20Sopenharmony_ci	int i_min = pud_index(start);
3838c2ecf20Sopenharmony_ci	int i_max = pud_index(end);
3848c2ecf20Sopenharmony_ci	int i;
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start = 0) {
3878c2ecf20Sopenharmony_ci		if (!pud_present(pud[i]))
3888c2ecf20Sopenharmony_ci			continue;
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci		pmd = pmd_offset(pud + i, 0);
3918c2ecf20Sopenharmony_ci		if (i == i_max)
3928c2ecf20Sopenharmony_ci			cur_end = end;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci		ret |= kvm_mkclean_pmd(pmd, start, cur_end);
3958c2ecf20Sopenharmony_ci	}
3968c2ecf20Sopenharmony_ci	return ret;
3978c2ecf20Sopenharmony_ci}
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_cistatic int kvm_mkclean_pgd(pgd_t *pgd, unsigned long start, unsigned long end)
4008c2ecf20Sopenharmony_ci{
4018c2ecf20Sopenharmony_ci	int ret = 0;
4028c2ecf20Sopenharmony_ci	p4d_t *p4d;							\
4038c2ecf20Sopenharmony_ci	pud_t *pud;
4048c2ecf20Sopenharmony_ci	unsigned long cur_end = ~0ul;
4058c2ecf20Sopenharmony_ci	int i_min = pgd_index(start);
4068c2ecf20Sopenharmony_ci	int i_max = pgd_index(end);
4078c2ecf20Sopenharmony_ci	int i;
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start = 0) {
4108c2ecf20Sopenharmony_ci		if (!pgd_present(pgd[i]))
4118c2ecf20Sopenharmony_ci			continue;
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci		p4d = p4d_offset(pgd, 0);
4148c2ecf20Sopenharmony_ci		pud = pud_offset(p4d + i, 0);
4158c2ecf20Sopenharmony_ci		if (i == i_max)
4168c2ecf20Sopenharmony_ci			cur_end = end;
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci		ret |= kvm_mkclean_pud(pud, start, cur_end);
4198c2ecf20Sopenharmony_ci	}
4208c2ecf20Sopenharmony_ci	return ret;
4218c2ecf20Sopenharmony_ci}
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci/**
4248c2ecf20Sopenharmony_ci * kvm_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
4258c2ecf20Sopenharmony_ci * @kvm:	KVM pointer.
4268c2ecf20Sopenharmony_ci * @start_gfn:	Guest frame number of first page in GPA range to flush.
4278c2ecf20Sopenharmony_ci * @end_gfn:	Guest frame number of last page in GPA range to flush.
4288c2ecf20Sopenharmony_ci *
4298c2ecf20Sopenharmony_ci * Make a range of GPA mappings clean so that guest writes will fault and
4308c2ecf20Sopenharmony_ci * trigger dirty page logging.
4318c2ecf20Sopenharmony_ci *
4328c2ecf20Sopenharmony_ci * The caller must hold the @kvm->mmu_lock spinlock.
4338c2ecf20Sopenharmony_ci *
4348c2ecf20Sopenharmony_ci * Returns:	Whether any GPA mappings were modified, which would require
4358c2ecf20Sopenharmony_ci *		derived mappings (GVA page tables & TLB enties) to be
4368c2ecf20Sopenharmony_ci *		invalidated.
4378c2ecf20Sopenharmony_ci */
4388c2ecf20Sopenharmony_cistatic int kvm_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
4398c2ecf20Sopenharmony_ci{
4408c2ecf20Sopenharmony_ci	return kvm_mkclean_pgd(kvm->arch.gpa_mm.pgd, start_gfn << PAGE_SHIFT,
4418c2ecf20Sopenharmony_ci				end_gfn << PAGE_SHIFT);
4428c2ecf20Sopenharmony_ci}
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci/**
4458c2ecf20Sopenharmony_ci * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
4468c2ecf20Sopenharmony_ci * @kvm:	The KVM pointer
4478c2ecf20Sopenharmony_ci * @slot:	The memory slot associated with mask
4488c2ecf20Sopenharmony_ci * @gfn_offset:	The gfn offset in memory slot
4498c2ecf20Sopenharmony_ci * @mask:	The mask of dirty pages at offset 'gfn_offset' in this memory
4508c2ecf20Sopenharmony_ci *		slot to be write protected
4518c2ecf20Sopenharmony_ci *
4528c2ecf20Sopenharmony_ci * Walks bits set in mask write protects the associated pte's. Caller must
4538c2ecf20Sopenharmony_ci * acquire @kvm->mmu_lock.
4548c2ecf20Sopenharmony_ci */
4558c2ecf20Sopenharmony_civoid kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
4568c2ecf20Sopenharmony_ci		struct kvm_memory_slot *slot,
4578c2ecf20Sopenharmony_ci		gfn_t gfn_offset, unsigned long mask)
4588c2ecf20Sopenharmony_ci{
4598c2ecf20Sopenharmony_ci	gfn_t base_gfn = slot->base_gfn + gfn_offset;
4608c2ecf20Sopenharmony_ci	gfn_t start = base_gfn +  __ffs(mask);
4618c2ecf20Sopenharmony_ci	gfn_t end = base_gfn + __fls(mask);
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	kvm_mkclean_gpa_pt(kvm, start, end);
4648c2ecf20Sopenharmony_ci}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_civoid kvm_arch_commit_memory_region(struct kvm *kvm,
4678c2ecf20Sopenharmony_ci				   const struct kvm_userspace_memory_region *mem,
4688c2ecf20Sopenharmony_ci				   struct kvm_memory_slot *old,
4698c2ecf20Sopenharmony_ci				   const struct kvm_memory_slot *new,
4708c2ecf20Sopenharmony_ci				   enum kvm_mr_change change)
4718c2ecf20Sopenharmony_ci{
4728c2ecf20Sopenharmony_ci	int needs_flush;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n",
4758c2ecf20Sopenharmony_ci		  __func__, kvm, mem->slot, mem->guest_phys_addr,
4768c2ecf20Sopenharmony_ci		  mem->memory_size, mem->userspace_addr);
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	/*
4798c2ecf20Sopenharmony_ci	 * If dirty page logging is enabled, write protect all pages in the slot
4808c2ecf20Sopenharmony_ci	 * ready for dirty logging.
4818c2ecf20Sopenharmony_ci	 *
4828c2ecf20Sopenharmony_ci	 * There is no need to do this in any of the following cases:
4838c2ecf20Sopenharmony_ci	 * CREATE:	No dirty mappings will already exist.
4848c2ecf20Sopenharmony_ci	 * MOVE/DELETE:	The old mappings will already have been cleaned up by
4858c2ecf20Sopenharmony_ci	 *		kvm_arch_flush_shadow_memslot()
4868c2ecf20Sopenharmony_ci	 */
4878c2ecf20Sopenharmony_ci	if (change == KVM_MR_FLAGS_ONLY &&
4888c2ecf20Sopenharmony_ci	    (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
4898c2ecf20Sopenharmony_ci	     new->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
4908c2ecf20Sopenharmony_ci		spin_lock(&kvm->mmu_lock);
4918c2ecf20Sopenharmony_ci		/* Write protect GPA page table entries */
4928c2ecf20Sopenharmony_ci		needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn,
4938c2ecf20Sopenharmony_ci					new->base_gfn + new->npages - 1);
4948c2ecf20Sopenharmony_ci		/* Let implementation do the rest */
4958c2ecf20Sopenharmony_ci		if (needs_flush)
4968c2ecf20Sopenharmony_ci			kvm_flush_remote_tlbs(kvm);
4978c2ecf20Sopenharmony_ci		spin_unlock(&kvm->mmu_lock);
4988c2ecf20Sopenharmony_ci	}
4998c2ecf20Sopenharmony_ci}
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_civoid kvm_arch_flush_shadow_all(struct kvm *kvm)
5028c2ecf20Sopenharmony_ci{
5038c2ecf20Sopenharmony_ci	/* Flush whole GPA */
5048c2ecf20Sopenharmony_ci	kvm_flush_gpa_pt(kvm, 0, ~0UL, NULL);
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci	/* Flush vpid for each VCPU individually */
5078c2ecf20Sopenharmony_ci	kvm_flush_remote_tlbs(kvm);
5088c2ecf20Sopenharmony_ci}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_civoid kvm_arch_flush_shadow_memslot(struct kvm *kvm,
5118c2ecf20Sopenharmony_ci		struct kvm_memory_slot *slot)
5128c2ecf20Sopenharmony_ci{
5138c2ecf20Sopenharmony_ci	unsigned long npages;
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	/*
5168c2ecf20Sopenharmony_ci	 * The slot has been made invalid (ready for moving or deletion), so we
5178c2ecf20Sopenharmony_ci	 * need to ensure that it can no longer be accessed by any guest VCPUs.
5188c2ecf20Sopenharmony_ci	 */
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_ci	npages = 0;
5218c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
5228c2ecf20Sopenharmony_ci	/* Flush slot from GPA */
5238c2ecf20Sopenharmony_ci	kvm_flush_gpa_pt(kvm, slot->base_gfn,
5248c2ecf20Sopenharmony_ci			slot->base_gfn + slot->npages - 1, &npages);
5258c2ecf20Sopenharmony_ci	/* Let implementation do the rest */
5268c2ecf20Sopenharmony_ci	if (npages)
5278c2ecf20Sopenharmony_ci		kvm_flush_remote_tlbs(kvm);
5288c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
5298c2ecf20Sopenharmony_ci}
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_civoid _kvm_destroy_mm(struct kvm *kvm)
5328c2ecf20Sopenharmony_ci{
5338c2ecf20Sopenharmony_ci	/* It should always be safe to remove after flushing the whole range */
5348c2ecf20Sopenharmony_ci	WARN_ON(!kvm_flush_gpa_pt(kvm, 0, ~0UL, NULL));
5358c2ecf20Sopenharmony_ci	pgd_free(NULL, kvm->arch.gpa_mm.pgd);
5368c2ecf20Sopenharmony_ci	kvm->arch.gpa_mm.pgd = NULL;
5378c2ecf20Sopenharmony_ci}
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci/*
5408c2ecf20Sopenharmony_ci * Mark a range of guest physical address space old (all accesses fault) in the
5418c2ecf20Sopenharmony_ci * VM's GPA page table to allow detection of commonly used pages.
5428c2ecf20Sopenharmony_ci */
5438c2ecf20Sopenharmony_ci
5448c2ecf20Sopenharmony_cistatic int kvm_mkold_pte(pte_t *pte, unsigned long start,
5458c2ecf20Sopenharmony_ci				 unsigned long end)
5468c2ecf20Sopenharmony_ci{
5478c2ecf20Sopenharmony_ci	int ret = 0;
5488c2ecf20Sopenharmony_ci	int i_min = pte_index(start);
5498c2ecf20Sopenharmony_ci	int i_max = pte_index(end);
5508c2ecf20Sopenharmony_ci	int i;
5518c2ecf20Sopenharmony_ci	pte_t old, new;
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i) {
5548c2ecf20Sopenharmony_ci		if (!pte_present(pte[i]))
5558c2ecf20Sopenharmony_ci			continue;
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci		old = pte[i];
5588c2ecf20Sopenharmony_ci		new = pte_mkold(old);
5598c2ecf20Sopenharmony_ci		if (pte_val(new) == pte_val(old))
5608c2ecf20Sopenharmony_ci			continue;
5618c2ecf20Sopenharmony_ci		set_pte(pte + i, new);
5628c2ecf20Sopenharmony_ci		ret = 1;
5638c2ecf20Sopenharmony_ci	}
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci	return ret;
5668c2ecf20Sopenharmony_ci}
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_cistatic int kvm_mkold_pmd(pmd_t *pmd, unsigned long start, unsigned long end)
5698c2ecf20Sopenharmony_ci{
5708c2ecf20Sopenharmony_ci	int ret = 0;
5718c2ecf20Sopenharmony_ci	pte_t *pte;
5728c2ecf20Sopenharmony_ci	unsigned long cur_end = ~0ul;
5738c2ecf20Sopenharmony_ci	int i_min = pmd_index(start);
5748c2ecf20Sopenharmony_ci	int i_max = pmd_index(end);
5758c2ecf20Sopenharmony_ci	int i;
5768c2ecf20Sopenharmony_ci	pmd_t old, new;
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start = 0) {
5798c2ecf20Sopenharmony_ci		if (!pmd_present(pmd[i]))
5808c2ecf20Sopenharmony_ci			continue;
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci		if (pmd_huge(pmd[i])) {
5838c2ecf20Sopenharmony_ci			old = pmd[i];
5848c2ecf20Sopenharmony_ci			new = kvm_pmd_mkold(old);
5858c2ecf20Sopenharmony_ci			if (pmd_val(new) == pmd_val(old))
5868c2ecf20Sopenharmony_ci				continue;
5878c2ecf20Sopenharmony_ci			set_pmd(pmd + i, new);
5888c2ecf20Sopenharmony_ci			ret = 1;
5898c2ecf20Sopenharmony_ci			continue;
5908c2ecf20Sopenharmony_ci		}
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci		pte = pte_offset_kernel(pmd + i, 0);
5938c2ecf20Sopenharmony_ci		if (i == i_max)
5948c2ecf20Sopenharmony_ci			cur_end = end;
5958c2ecf20Sopenharmony_ci
5968c2ecf20Sopenharmony_ci		ret |= kvm_mkold_pte(pte, start, cur_end);
5978c2ecf20Sopenharmony_ci	}
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci	return ret;
6008c2ecf20Sopenharmony_ci}
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_cistatic int kvm_mkold_pud(pud_t *pud, unsigned long start, unsigned long end)
6038c2ecf20Sopenharmony_ci{
6048c2ecf20Sopenharmony_ci	int ret = 0;
6058c2ecf20Sopenharmony_ci	pmd_t *pmd;
6068c2ecf20Sopenharmony_ci	unsigned long cur_end = ~0ul;
6078c2ecf20Sopenharmony_ci	int i_min = pud_index(start);
6088c2ecf20Sopenharmony_ci	int i_max = pud_index(end);
6098c2ecf20Sopenharmony_ci	int i;
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start = 0) {
6128c2ecf20Sopenharmony_ci		if (!pud_present(pud[i]))
6138c2ecf20Sopenharmony_ci			continue;
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci		pmd = pmd_offset(pud + i, 0);
6168c2ecf20Sopenharmony_ci		if (i == i_max)
6178c2ecf20Sopenharmony_ci			cur_end = end;
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci		ret |= kvm_mkold_pmd(pmd, start, cur_end);
6208c2ecf20Sopenharmony_ci	}
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	return ret;
6238c2ecf20Sopenharmony_ci}
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_cistatic int kvm_mkold_pgd(pgd_t *pgd, unsigned long start, unsigned long end)
6268c2ecf20Sopenharmony_ci{
6278c2ecf20Sopenharmony_ci	int ret = 0;
6288c2ecf20Sopenharmony_ci	p4d_t *p4d;
6298c2ecf20Sopenharmony_ci	pud_t *pud;
6308c2ecf20Sopenharmony_ci	unsigned long cur_end = ~0ul;
6318c2ecf20Sopenharmony_ci	int i_min = pgd_index(start);
6328c2ecf20Sopenharmony_ci	int i_max = pgd_index(end);
6338c2ecf20Sopenharmony_ci	int i;
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci	for (i = i_min; i <= i_max; ++i, start = 0) {
6368c2ecf20Sopenharmony_ci		if (!pgd_present(pgd[i]))
6378c2ecf20Sopenharmony_ci			continue;
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ci		p4d = p4d_offset(pgd, 0);
6408c2ecf20Sopenharmony_ci		pud = pud_offset(p4d + i, 0);
6418c2ecf20Sopenharmony_ci		if (i == i_max)
6428c2ecf20Sopenharmony_ci			cur_end = end;
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci		ret |= kvm_mkold_pud(pud, start, cur_end);
6458c2ecf20Sopenharmony_ci	}
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	return ret;
6488c2ecf20Sopenharmony_ci}
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_cistatic int handle_hva_to_gpa(struct kvm *kvm,
6518c2ecf20Sopenharmony_ci			     unsigned long start,
6528c2ecf20Sopenharmony_ci			     unsigned long end,
6538c2ecf20Sopenharmony_ci			     int (*handler)(struct kvm *kvm, gfn_t gfn,
6548c2ecf20Sopenharmony_ci					    gpa_t gfn_end,
6558c2ecf20Sopenharmony_ci					    struct kvm_memory_slot *memslot,
6568c2ecf20Sopenharmony_ci					    void *data),
6578c2ecf20Sopenharmony_ci			     void *data)
6588c2ecf20Sopenharmony_ci{
6598c2ecf20Sopenharmony_ci	struct kvm_memslots *slots;
6608c2ecf20Sopenharmony_ci	struct kvm_memory_slot *memslot;
6618c2ecf20Sopenharmony_ci	int ret = 0;
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci	slots = kvm_memslots(kvm);
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_ci	/* we only care about the pages that the guest sees */
6668c2ecf20Sopenharmony_ci	kvm_for_each_memslot(memslot, slots) {
6678c2ecf20Sopenharmony_ci		unsigned long hva_start, hva_end;
6688c2ecf20Sopenharmony_ci		gfn_t gfn, gfn_end;
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci		hva_start = max(start, memslot->userspace_addr);
6718c2ecf20Sopenharmony_ci		hva_end = min(end, memslot->userspace_addr +
6728c2ecf20Sopenharmony_ci					(memslot->npages << PAGE_SHIFT));
6738c2ecf20Sopenharmony_ci		if (hva_start >= hva_end)
6748c2ecf20Sopenharmony_ci			continue;
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci		/*
6778c2ecf20Sopenharmony_ci		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
6788c2ecf20Sopenharmony_ci		 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
6798c2ecf20Sopenharmony_ci		 */
6808c2ecf20Sopenharmony_ci		gfn = hva_to_gfn_memslot(hva_start, memslot);
6818c2ecf20Sopenharmony_ci		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
6828c2ecf20Sopenharmony_ci		ret |= handler(kvm, gfn, gfn_end, memslot, data);
6838c2ecf20Sopenharmony_ci	}
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_ci	return ret;
6868c2ecf20Sopenharmony_ci}
6878c2ecf20Sopenharmony_ci
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_cistatic int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
6908c2ecf20Sopenharmony_ci				 struct kvm_memory_slot *memslot, void *data)
6918c2ecf20Sopenharmony_ci{
6928c2ecf20Sopenharmony_ci	unsigned long npages;
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_ci	npages = 0;
6958c2ecf20Sopenharmony_ci	kvm_flush_gpa_pt(kvm, gfn, gfn_end - 1, &npages);
6968c2ecf20Sopenharmony_ci	*(unsigned long *)data = *(unsigned long *)data + npages;
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	return npages > 0;
6998c2ecf20Sopenharmony_ci}
7008c2ecf20Sopenharmony_ci
7018c2ecf20Sopenharmony_ciint kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable)
7028c2ecf20Sopenharmony_ci{
7038c2ecf20Sopenharmony_ci	unsigned long npages;
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_ci	npages = 0;
7068c2ecf20Sopenharmony_ci	return handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &npages);
7078c2ecf20Sopenharmony_ci}
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_cistatic int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
7108c2ecf20Sopenharmony_ci				struct kvm_memory_slot *memslot, void *data)
7118c2ecf20Sopenharmony_ci{
7128c2ecf20Sopenharmony_ci	gpa_t gpa = gfn << PAGE_SHIFT;
7138c2ecf20Sopenharmony_ci	pte_t hva_pte = *(pte_t *)data;
7148c2ecf20Sopenharmony_ci	pte_t *gpa_pte = kvm_pte_for_gpa(kvm, NULL, gpa);
7158c2ecf20Sopenharmony_ci	pte_t old_pte;
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_ci	if (!gpa_pte)
7188c2ecf20Sopenharmony_ci		return 0;
7198c2ecf20Sopenharmony_ci
7208c2ecf20Sopenharmony_ci	/* Mapping may need adjusting depending on memslot flags */
7218c2ecf20Sopenharmony_ci	old_pte = *gpa_pte;
7228c2ecf20Sopenharmony_ci	if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
7238c2ecf20Sopenharmony_ci		hva_pte = pte_mkclean(hva_pte);
7248c2ecf20Sopenharmony_ci	else if (memslot->flags & KVM_MEM_READONLY)
7258c2ecf20Sopenharmony_ci		hva_pte = pte_wrprotect(hva_pte);
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	set_pte(gpa_pte, hva_pte);
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci	/* Replacing an absent or old page doesn't need flushes */
7308c2ecf20Sopenharmony_ci	if (!pte_present(old_pte) || !pte_young(old_pte))
7318c2ecf20Sopenharmony_ci		return 0;
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_ci	/* Pages swapped, aged, moved, or cleaned require flushes */
7348c2ecf20Sopenharmony_ci	return !pte_present(hva_pte) ||
7358c2ecf20Sopenharmony_ci	       !pte_young(hva_pte) ||
7368c2ecf20Sopenharmony_ci	       pte_pfn(old_pte) != pte_pfn(hva_pte) ||
7378c2ecf20Sopenharmony_ci	       (pte_dirty(old_pte) && !pte_dirty(hva_pte));
7388c2ecf20Sopenharmony_ci}
7398c2ecf20Sopenharmony_ci
7408c2ecf20Sopenharmony_ciint kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
7418c2ecf20Sopenharmony_ci{
7428c2ecf20Sopenharmony_ci	unsigned long end = hva + PAGE_SIZE;
7438c2ecf20Sopenharmony_ci	int ret;
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte);
7468c2ecf20Sopenharmony_ci	if (ret)
7478c2ecf20Sopenharmony_ci		/* Flush vpid for each VCPU individually */
7488c2ecf20Sopenharmony_ci		kvm_flush_remote_tlbs(kvm);
7498c2ecf20Sopenharmony_ci	return 0;
7508c2ecf20Sopenharmony_ci}
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_cistatic int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
7538c2ecf20Sopenharmony_ci			       struct kvm_memory_slot *memslot, void *data)
7548c2ecf20Sopenharmony_ci{
7558c2ecf20Sopenharmony_ci	return kvm_mkold_pgd(kvm->arch.gpa_mm.pgd, gfn << PAGE_SHIFT,
7568c2ecf20Sopenharmony_ci				gfn_end << PAGE_SHIFT);
7578c2ecf20Sopenharmony_ci}
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_cistatic int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
7608c2ecf20Sopenharmony_ci				    struct kvm_memory_slot *memslot, void *data)
7618c2ecf20Sopenharmony_ci{
7628c2ecf20Sopenharmony_ci	gpa_t gpa = gfn << PAGE_SHIFT;
7638c2ecf20Sopenharmony_ci	pte_t *gpa_pte = kvm_pte_for_gpa(kvm, NULL, gpa);
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_ci	if (!gpa_pte)
7668c2ecf20Sopenharmony_ci		return 0;
7678c2ecf20Sopenharmony_ci	return pte_young(*gpa_pte);
7688c2ecf20Sopenharmony_ci}
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ciint kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
7718c2ecf20Sopenharmony_ci{
7728c2ecf20Sopenharmony_ci	return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
7738c2ecf20Sopenharmony_ci}
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ciint kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
7768c2ecf20Sopenharmony_ci{
7778c2ecf20Sopenharmony_ci	return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
7788c2ecf20Sopenharmony_ci}
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_cistatic pud_t *kvm_get_pud(struct kvm *kvm,
7818c2ecf20Sopenharmony_ci		 struct kvm_mmu_memory_cache *cache, phys_addr_t addr)
7828c2ecf20Sopenharmony_ci{
7838c2ecf20Sopenharmony_ci	pgd_t *pgd;
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci	pgd = kvm->arch.gpa_mm.pgd + pgd_index(addr);
7868c2ecf20Sopenharmony_ci	if (pgd_none(*pgd)) {
7878c2ecf20Sopenharmony_ci		/* Not used yet */
7888c2ecf20Sopenharmony_ci		BUG();
7898c2ecf20Sopenharmony_ci		return NULL;
7908c2ecf20Sopenharmony_ci	}
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci	return pud_offset(p4d_offset(pgd, addr), addr);
7938c2ecf20Sopenharmony_ci}
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_cistatic pmd_t *kvm_get_pmd(struct kvm *kvm,
7968c2ecf20Sopenharmony_ci		 struct kvm_mmu_memory_cache *cache, phys_addr_t addr)
7978c2ecf20Sopenharmony_ci{
7988c2ecf20Sopenharmony_ci	pud_t *pud;
7998c2ecf20Sopenharmony_ci	pmd_t *pmd;
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	pud = kvm_get_pud(kvm, cache, addr);
8028c2ecf20Sopenharmony_ci	if (!pud || pud_huge(*pud))
8038c2ecf20Sopenharmony_ci		return NULL;
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	if (pud_none(*pud)) {
8068c2ecf20Sopenharmony_ci		if (!cache)
8078c2ecf20Sopenharmony_ci			return NULL;
8088c2ecf20Sopenharmony_ci		pmd = kvm_mmu_memory_cache_alloc(cache);
8098c2ecf20Sopenharmony_ci		pmd_init((unsigned long)pmd,
8108c2ecf20Sopenharmony_ci				(unsigned long)invalid_pte_table);
8118c2ecf20Sopenharmony_ci		pud_populate(NULL, pud, pmd);
8128c2ecf20Sopenharmony_ci	}
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_ci	return pmd_offset(pud, addr);
8158c2ecf20Sopenharmony_ci}
8168c2ecf20Sopenharmony_ci
8178c2ecf20Sopenharmony_cistatic int kvm_set_pmd_huge(struct kvm_vcpu *vcpu, struct kvm_mmu_memory_cache
8188c2ecf20Sopenharmony_ci			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
8198c2ecf20Sopenharmony_ci{
8208c2ecf20Sopenharmony_ci	pmd_t *pmd, old_pmd;
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ciretry:
8238c2ecf20Sopenharmony_ci	pmd = kvm_get_pmd(vcpu->kvm, cache, addr);
8248c2ecf20Sopenharmony_ci	VM_BUG_ON(!pmd);
8258c2ecf20Sopenharmony_ci
8268c2ecf20Sopenharmony_ci	old_pmd = *pmd;
8278c2ecf20Sopenharmony_ci	/*
8288c2ecf20Sopenharmony_ci	 * Multiple vcpus faulting on the same PMD entry, can
8298c2ecf20Sopenharmony_ci	 * lead to them sequentially updating the PMD with the
8308c2ecf20Sopenharmony_ci	 * same value. Following the break-before-make
8318c2ecf20Sopenharmony_ci	 * (pmd_clear() followed by tlb_flush()) process can
8328c2ecf20Sopenharmony_ci	 * hinder forward progress due to refaults generated
8338c2ecf20Sopenharmony_ci	 * on missing translations.
8348c2ecf20Sopenharmony_ci	 *
8358c2ecf20Sopenharmony_ci	 * Skip updating the page table if the entry is
8368c2ecf20Sopenharmony_ci	 * unchanged.
8378c2ecf20Sopenharmony_ci	 */
8388c2ecf20Sopenharmony_ci	if (pmd_val(old_pmd) == pmd_val(*new_pmd))
8398c2ecf20Sopenharmony_ci		return 0;
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci	if (pmd_present(old_pmd)) {
8428c2ecf20Sopenharmony_ci		/*
8438c2ecf20Sopenharmony_ci		 * If we already have PTE level mapping for this block,
8448c2ecf20Sopenharmony_ci		 * we must unmap it to avoid inconsistent TLB state and
8458c2ecf20Sopenharmony_ci		 * leaking the table page. We could end up in this situation
8468c2ecf20Sopenharmony_ci		 * if the memory slot was marked for dirty logging and was
8478c2ecf20Sopenharmony_ci		 * reverted, leaving PTE level mappings for the pages accessed
8488c2ecf20Sopenharmony_ci		 * during the period. So, unmap the PTE level mapping for this
8498c2ecf20Sopenharmony_ci		 * block and retry, as we could have released the upper level
8508c2ecf20Sopenharmony_ci		 * table in the process.
8518c2ecf20Sopenharmony_ci		 *
8528c2ecf20Sopenharmony_ci		 * Normal THP split/merge follows mmu_notifier callbacks and do
8538c2ecf20Sopenharmony_ci		 * get handled accordingly.
8548c2ecf20Sopenharmony_ci		 */
8558c2ecf20Sopenharmony_ci		if (!pmd_huge(old_pmd)) {
8568c2ecf20Sopenharmony_ci			++vcpu->stat.huge_merge_exits;
8578c2ecf20Sopenharmony_ci			kvm_flush_gpa_pt(vcpu->kvm,
8588c2ecf20Sopenharmony_ci				(addr & PMD_MASK) >> PAGE_SHIFT,
8598c2ecf20Sopenharmony_ci				((addr & PMD_MASK) + PMD_SIZE - 1) >> PAGE_SHIFT, NULL);
8608c2ecf20Sopenharmony_ci			goto retry;
8618c2ecf20Sopenharmony_ci		}
8628c2ecf20Sopenharmony_ci		/*
8638c2ecf20Sopenharmony_ci		 * Mapping in huge pages should only happen through a
8648c2ecf20Sopenharmony_ci		 * fault.  If a page is merged into a transparent huge
8658c2ecf20Sopenharmony_ci		 * page, the individual subpages of that huge page
8668c2ecf20Sopenharmony_ci		 * should be unmapped through MMU notifiers before we
8678c2ecf20Sopenharmony_ci		 * get here.
8688c2ecf20Sopenharmony_ci		 *
8698c2ecf20Sopenharmony_ci		 * Merging of CompoundPages is not supported; they
8708c2ecf20Sopenharmony_ci		 * should become splitting first, unmapped, merged,
8718c2ecf20Sopenharmony_ci		 * and mapped back in on-demand.
8728c2ecf20Sopenharmony_ci		 */
8738c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
8748c2ecf20Sopenharmony_ci		WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
8758c2ecf20Sopenharmony_ci#endif
8768c2ecf20Sopenharmony_ci		pmd_clear(pmd);
8778c2ecf20Sopenharmony_ci	}
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
8808c2ecf20Sopenharmony_ci	set_pmd(pmd, *new_pmd);
8818c2ecf20Sopenharmony_ci	return 0;
8828c2ecf20Sopenharmony_ci}
8838c2ecf20Sopenharmony_ci
8848c2ecf20Sopenharmony_ci/*
8858c2ecf20Sopenharmony_ci * Adjust pfn start boundary if support for transparent hugepage
8868c2ecf20Sopenharmony_ci */
8878c2ecf20Sopenharmony_cistatic bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, unsigned long *gpap)
8888c2ecf20Sopenharmony_ci{
8898c2ecf20Sopenharmony_ci	kvm_pfn_t pfn = *pfnp;
8908c2ecf20Sopenharmony_ci	gfn_t gfn = *gpap >> PAGE_SHIFT;
8918c2ecf20Sopenharmony_ci	struct page *page = pfn_to_page(pfn);
8928c2ecf20Sopenharmony_ci
8938c2ecf20Sopenharmony_ci	/*
8948c2ecf20Sopenharmony_ci	 * PageTransCompoundMap() returns true for THP and
8958c2ecf20Sopenharmony_ci	 * hugetlbfs. Make sure the adjustment is done only for THP
8968c2ecf20Sopenharmony_ci	 * pages.
8978c2ecf20Sopenharmony_ci	 */
8988c2ecf20Sopenharmony_ci	if ((!PageHuge(page)) && PageTransCompound(page) &&
8998c2ecf20Sopenharmony_ci			 (atomic_read(&page->_mapcount) < 0)) {
9008c2ecf20Sopenharmony_ci		unsigned long mask;
9018c2ecf20Sopenharmony_ci		/*
9028c2ecf20Sopenharmony_ci		 * The address we faulted on is backed by a transparent huge
9038c2ecf20Sopenharmony_ci		 * page.  However, because we map the compound huge page and
9048c2ecf20Sopenharmony_ci		 * not the individual tail page, we need to transfer the
9058c2ecf20Sopenharmony_ci		 * refcount to the head page.  We have to be careful that the
9068c2ecf20Sopenharmony_ci		 * THP doesn't start to split while we are adjusting the
9078c2ecf20Sopenharmony_ci		 * refcounts.
9088c2ecf20Sopenharmony_ci		 *
9098c2ecf20Sopenharmony_ci		 * We are sure this doesn't happen, because mmu_notifier_retry
9108c2ecf20Sopenharmony_ci		 * was successful and we are holding the mmu_lock, so if this
9118c2ecf20Sopenharmony_ci		 * THP is trying to split, it will be blocked in the mmu
9128c2ecf20Sopenharmony_ci		 * notifier before touching any of the pages, specifically
9138c2ecf20Sopenharmony_ci		 * before being able to call __split_huge_page_refcount().
9148c2ecf20Sopenharmony_ci		 *
9158c2ecf20Sopenharmony_ci		 * We can therefore safely transfer the refcount from PG_tail
9168c2ecf20Sopenharmony_ci		 * to PG_head and switch the pfn from a tail page to the head
9178c2ecf20Sopenharmony_ci		 * page accordingly.
9188c2ecf20Sopenharmony_ci		 */
9198c2ecf20Sopenharmony_ci		mask = PTRS_PER_PMD - 1;
9208c2ecf20Sopenharmony_ci		VM_BUG_ON((gfn & mask) != (pfn & mask));
9218c2ecf20Sopenharmony_ci		if (pfn & mask) {
9228c2ecf20Sopenharmony_ci			*gpap &= PMD_MASK;
9238c2ecf20Sopenharmony_ci			kvm_release_pfn_clean(pfn);
9248c2ecf20Sopenharmony_ci			pfn &= ~mask;
9258c2ecf20Sopenharmony_ci			kvm_get_pfn(pfn);
9268c2ecf20Sopenharmony_ci			*pfnp = pfn;
9278c2ecf20Sopenharmony_ci		}
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci		return true;
9308c2ecf20Sopenharmony_ci	}
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci	return false;
9338c2ecf20Sopenharmony_ci}
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_cistatic bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
9368c2ecf20Sopenharmony_ci					unsigned long hva, bool write)
9378c2ecf20Sopenharmony_ci{
9388c2ecf20Sopenharmony_ci	gpa_t gpa_start;
9398c2ecf20Sopenharmony_ci	hva_t uaddr_start, uaddr_end;
9408c2ecf20Sopenharmony_ci	unsigned long map_size;
9418c2ecf20Sopenharmony_ci	size_t size;
9428c2ecf20Sopenharmony_ci
9438c2ecf20Sopenharmony_ci	map_size = PMD_SIZE;
9448c2ecf20Sopenharmony_ci	/* Disable dirty logging on HugePages */
9458c2ecf20Sopenharmony_ci	if ((memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) && write)
9468c2ecf20Sopenharmony_ci		return false;
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_ci	size = memslot->npages * PAGE_SIZE;
9498c2ecf20Sopenharmony_ci	gpa_start = memslot->base_gfn << PAGE_SHIFT;
9508c2ecf20Sopenharmony_ci	uaddr_start = memslot->userspace_addr;
9518c2ecf20Sopenharmony_ci	uaddr_end = uaddr_start + size;
9528c2ecf20Sopenharmony_ci
9538c2ecf20Sopenharmony_ci	/*
9548c2ecf20Sopenharmony_ci	 * Pages belonging to memslots that don't have the same alignment
9558c2ecf20Sopenharmony_ci	 * within a PMD/PUD for userspace and GPA cannot be mapped with stage-2
9568c2ecf20Sopenharmony_ci	 * PMD/PUD entries, because we'll end up mapping the wrong pages.
9578c2ecf20Sopenharmony_ci	 *
9588c2ecf20Sopenharmony_ci	 * Consider a layout like the following:
9598c2ecf20Sopenharmony_ci	 *
9608c2ecf20Sopenharmony_ci	 *    memslot->userspace_addr:
9618c2ecf20Sopenharmony_ci	 *    +-----+--------------------+--------------------+---+
9628c2ecf20Sopenharmony_ci	 *    |abcde|fgh  Stage-1 block  |    Stage-1 block tv|xyz|
9638c2ecf20Sopenharmony_ci	 *    +-----+--------------------+--------------------+---+
9648c2ecf20Sopenharmony_ci	 *
9658c2ecf20Sopenharmony_ci	 *    memslot->base_gfn << PAGE_SIZE:
9668c2ecf20Sopenharmony_ci	 *      +---+--------------------+--------------------+-----+
9678c2ecf20Sopenharmony_ci	 *      |abc|def  Stage-2 block  |    Stage-2 block   |tvxyz|
9688c2ecf20Sopenharmony_ci	 *      +---+--------------------+--------------------+-----+
9698c2ecf20Sopenharmony_ci	 *
9708c2ecf20Sopenharmony_ci	 * If we create those stage-2 blocks, we'll end up with this incorrect
9718c2ecf20Sopenharmony_ci	 * mapping:
9728c2ecf20Sopenharmony_ci	 *   d -> f
9738c2ecf20Sopenharmony_ci	 *   e -> g
9748c2ecf20Sopenharmony_ci	 *   f -> h
9758c2ecf20Sopenharmony_ci	 */
9768c2ecf20Sopenharmony_ci	if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
9778c2ecf20Sopenharmony_ci		return false;
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_ci	/*
9808c2ecf20Sopenharmony_ci	 * Next, let's make sure we're not trying to map anything not covered
9818c2ecf20Sopenharmony_ci	 * by the memslot. This means we have to prohibit block size mappings
9828c2ecf20Sopenharmony_ci	 * for the beginning and end of a non-block aligned and non-block sized
9838c2ecf20Sopenharmony_ci	 * memory slot (illustrated by the head and tail parts of the
9848c2ecf20Sopenharmony_ci	 * userspace view above containing pages 'abcde' and 'xyz',
9858c2ecf20Sopenharmony_ci	 * respectively).
9868c2ecf20Sopenharmony_ci	 *
9878c2ecf20Sopenharmony_ci	 * Note that it doesn't matter if we do the check using the
9888c2ecf20Sopenharmony_ci	 * userspace_addr or the base_gfn, as both are equally aligned (per
9898c2ecf20Sopenharmony_ci	 * the check above) and equally sized.
9908c2ecf20Sopenharmony_ci	 */
9918c2ecf20Sopenharmony_ci	return (hva & ~(map_size - 1)) >= uaddr_start &&
9928c2ecf20Sopenharmony_ci	       (hva & ~(map_size - 1)) + map_size <= uaddr_end;
9938c2ecf20Sopenharmony_ci}
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci/**
9968c2ecf20Sopenharmony_ci * kvm_map_page_fast() - Fast path GPA fault handler.
9978c2ecf20Sopenharmony_ci * @vcpu:		VCPU pointer.
9988c2ecf20Sopenharmony_ci * @gpa:		Guest physical address of fault.
9998c2ecf20Sopenharmony_ci * @write:	Whether the fault was due to a write.
10008c2ecf20Sopenharmony_ci *
10018c2ecf20Sopenharmony_ci * Perform fast path GPA fault handling, doing all that can be done without
10028c2ecf20Sopenharmony_ci * calling into KVM. This handles marking old pages young (for idle page
10038c2ecf20Sopenharmony_ci * tracking), and dirtying of clean pages (for dirty page logging).
10048c2ecf20Sopenharmony_ci *
10058c2ecf20Sopenharmony_ci * Returns:	0 on success, in which case we can update derived mappings and
10068c2ecf20Sopenharmony_ci *		resume guest execution.
10078c2ecf20Sopenharmony_ci *		-EFAULT on failure due to absent GPA mapping or write to
10088c2ecf20Sopenharmony_ci *		read-only page, in which case KVM must be consulted.
10098c2ecf20Sopenharmony_ci */
10108c2ecf20Sopenharmony_cistatic int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
10118c2ecf20Sopenharmony_ci				   bool write)
10128c2ecf20Sopenharmony_ci{
10138c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
10148c2ecf20Sopenharmony_ci	gfn_t gfn = gpa >> PAGE_SHIFT;
10158c2ecf20Sopenharmony_ci	pte_t *ptep;
10168c2ecf20Sopenharmony_ci	kvm_pfn_t pfn = 0;	/* silence bogus GCC warning */
10178c2ecf20Sopenharmony_ci	bool pfn_valid = false;
10188c2ecf20Sopenharmony_ci	int ret = 0;
10198c2ecf20Sopenharmony_ci	struct kvm_memory_slot *slot;
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
10228c2ecf20Sopenharmony_ci
10238c2ecf20Sopenharmony_ci	/* Fast path - just check GPA page table for an existing entry */
10248c2ecf20Sopenharmony_ci	ptep = kvm_pte_for_gpa(kvm, NULL, gpa);
10258c2ecf20Sopenharmony_ci	if (!ptep || !pte_present(*ptep)) {
10268c2ecf20Sopenharmony_ci		ret = -EFAULT;
10278c2ecf20Sopenharmony_ci		goto out;
10288c2ecf20Sopenharmony_ci	}
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_ci	/* Track access to pages marked old */
10318c2ecf20Sopenharmony_ci	if (!pte_young(*ptep)) {
10328c2ecf20Sopenharmony_ci		set_pte(ptep, pte_mkyoung(*ptep));
10338c2ecf20Sopenharmony_ci		pfn = pte_pfn(*ptep);
10348c2ecf20Sopenharmony_ci		pfn_valid = true;
10358c2ecf20Sopenharmony_ci		/* call kvm_set_pfn_accessed() after unlock */
10368c2ecf20Sopenharmony_ci	}
10378c2ecf20Sopenharmony_ci	if (write && !pte_dirty(*ptep)) {
10388c2ecf20Sopenharmony_ci		if (!pte_write(*ptep)) {
10398c2ecf20Sopenharmony_ci			ret = -EFAULT;
10408c2ecf20Sopenharmony_ci			goto out;
10418c2ecf20Sopenharmony_ci		}
10428c2ecf20Sopenharmony_ci
10438c2ecf20Sopenharmony_ci		if (kvm_pte_huge(*ptep)) {
10448c2ecf20Sopenharmony_ci			/*
10458c2ecf20Sopenharmony_ci			 * Do not set write permission when dirty logging is
10468c2ecf20Sopenharmony_ci			 * enabled for HugePages
10478c2ecf20Sopenharmony_ci			 */
10488c2ecf20Sopenharmony_ci			slot = gfn_to_memslot(kvm, gfn);
10498c2ecf20Sopenharmony_ci			if (slot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
10508c2ecf20Sopenharmony_ci				ret = -EFAULT;
10518c2ecf20Sopenharmony_ci				goto out;
10528c2ecf20Sopenharmony_ci			}
10538c2ecf20Sopenharmony_ci		}
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci		/* Track dirtying of writeable pages */
10568c2ecf20Sopenharmony_ci		set_pte(ptep, pte_mkdirty(*ptep));
10578c2ecf20Sopenharmony_ci		pfn = pte_pfn(*ptep);
10588c2ecf20Sopenharmony_ci		if (pmd_huge(*((pmd_t *)ptep))) {
10598c2ecf20Sopenharmony_ci			int i;
10608c2ecf20Sopenharmony_ci			gfn_t base_gfn = (gpa & PMD_MASK) >> PAGE_SHIFT;
10618c2ecf20Sopenharmony_ci
10628c2ecf20Sopenharmony_ci			for (i = 0; i < PTRS_PER_PTE; i++)
10638c2ecf20Sopenharmony_ci				mark_page_dirty(kvm, base_gfn + i);
10648c2ecf20Sopenharmony_ci		} else
10658c2ecf20Sopenharmony_ci			mark_page_dirty(kvm, gfn);
10668c2ecf20Sopenharmony_ci		kvm_set_pfn_dirty(pfn);
10678c2ecf20Sopenharmony_ci	}
10688c2ecf20Sopenharmony_ci
10698c2ecf20Sopenharmony_ciout:
10708c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
10718c2ecf20Sopenharmony_ci	if (pfn_valid)
10728c2ecf20Sopenharmony_ci		kvm_set_pfn_accessed(pfn);
10738c2ecf20Sopenharmony_ci	return ret;
10748c2ecf20Sopenharmony_ci}
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_ci/*
10778c2ecf20Sopenharmony_ci * Split huge page
10788c2ecf20Sopenharmony_ci */
10798c2ecf20Sopenharmony_cistatic pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, pte_t *ptep, gfn_t gfn,
10808c2ecf20Sopenharmony_ci		struct vm_area_struct *vma, unsigned long hva)
10818c2ecf20Sopenharmony_ci{
10828c2ecf20Sopenharmony_ci	int i;
10838c2ecf20Sopenharmony_ci	pte_t val, *child;
10848c2ecf20Sopenharmony_ci	struct kvm_mmu_memory_cache *memcache;
10858c2ecf20Sopenharmony_ci
10868c2ecf20Sopenharmony_ci	memcache = &vcpu->arch.mmu_page_cache;
10878c2ecf20Sopenharmony_ci	child = kvm_mmu_memory_cache_alloc(memcache);
10888c2ecf20Sopenharmony_ci	val = kvm_pte_mksmall(*ptep);
10898c2ecf20Sopenharmony_ci	for (i = 0; i < PTRS_PER_PTE; i++) {
10908c2ecf20Sopenharmony_ci		kvm_set_pte(child + i, val);
10918c2ecf20Sopenharmony_ci		pte_val(val) += PAGE_SIZE;
10928c2ecf20Sopenharmony_ci	}
10938c2ecf20Sopenharmony_ci
10948c2ecf20Sopenharmony_ci	/* The later kvm_flush_tlb_gpa() will flush hugepage tlb */
10958c2ecf20Sopenharmony_ci	pte_val(val) = (unsigned long)child;
10968c2ecf20Sopenharmony_ci	kvm_set_pte(ptep, val);
10978c2ecf20Sopenharmony_ci	return child + (gfn & (PTRS_PER_PTE - 1));
10988c2ecf20Sopenharmony_ci}
10998c2ecf20Sopenharmony_ci
11008c2ecf20Sopenharmony_ci/**
11018c2ecf20Sopenharmony_ci * kvm_map_page() - Map a guest physical page.
11028c2ecf20Sopenharmony_ci * @vcpu:		VCPU pointer.
11038c2ecf20Sopenharmony_ci * @gpa:		Guest physical address of fault.
11048c2ecf20Sopenharmony_ci * @write:	Whether the fault was due to a write.
11058c2ecf20Sopenharmony_ci *
11068c2ecf20Sopenharmony_ci * Handle GPA faults by creating a new GPA mapping (or updating an existing
11078c2ecf20Sopenharmony_ci * one).
11088c2ecf20Sopenharmony_ci *
11098c2ecf20Sopenharmony_ci * This takes care of marking pages young or dirty (idle/dirty page tracking),
11108c2ecf20Sopenharmony_ci * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
11118c2ecf20Sopenharmony_ci * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
11128c2ecf20Sopenharmony_ci * caller.
11138c2ecf20Sopenharmony_ci *
11148c2ecf20Sopenharmony_ci * Returns:	0 on success, in which case the caller may use the @out_entry
11158c2ecf20Sopenharmony_ci *		and @out_buddy PTEs to update derived mappings and resume guest
11168c2ecf20Sopenharmony_ci *		execution.
11178c2ecf20Sopenharmony_ci *		-EFAULT if there is no memory region at @gpa or a write was
11188c2ecf20Sopenharmony_ci *		attempted to a read-only memory region. This is usually handled
11198c2ecf20Sopenharmony_ci *		as an MMIO access.
11208c2ecf20Sopenharmony_ci */
11218c2ecf20Sopenharmony_cistatic int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
11228c2ecf20Sopenharmony_ci			     bool write)
11238c2ecf20Sopenharmony_ci{
11248c2ecf20Sopenharmony_ci	bool writeable;
11258c2ecf20Sopenharmony_ci	bool force_pte = false;
11268c2ecf20Sopenharmony_ci	int i, srcu_idx, err = 0, retry_no = 0;
11278c2ecf20Sopenharmony_ci	unsigned long hva;
11288c2ecf20Sopenharmony_ci	unsigned long mmu_seq;
11298c2ecf20Sopenharmony_ci	unsigned long prot_bits;
11308c2ecf20Sopenharmony_ci	unsigned long vma_pagesize;
11318c2ecf20Sopenharmony_ci	pte_t *ptep;
11328c2ecf20Sopenharmony_ci	kvm_pfn_t pfn;
11338c2ecf20Sopenharmony_ci	gfn_t gfn = gpa >> PAGE_SHIFT;
11348c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
11358c2ecf20Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
11368c2ecf20Sopenharmony_ci	struct kvm_memory_slot *memslot;
11378c2ecf20Sopenharmony_ci	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
11388c2ecf20Sopenharmony_ci
11398c2ecf20Sopenharmony_ci	/* Try the fast path to handle old / clean pages */
11408c2ecf20Sopenharmony_ci	srcu_idx = srcu_read_lock(&kvm->srcu);
11418c2ecf20Sopenharmony_ci	err = kvm_map_page_fast(vcpu, gpa, write);
11428c2ecf20Sopenharmony_ci	if (!err)
11438c2ecf20Sopenharmony_ci		goto out;
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_ci	memslot = gfn_to_memslot(kvm, gfn);
11468c2ecf20Sopenharmony_ci	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable);
11478c2ecf20Sopenharmony_ci	if (kvm_is_error_hva(hva) || (write && !writeable))
11488c2ecf20Sopenharmony_ci		goto out;
11498c2ecf20Sopenharmony_ci
11508c2ecf20Sopenharmony_ci	/* Let's check if we will get back a huge page backed by hugetlbfs */
11518c2ecf20Sopenharmony_ci	mmap_read_lock(current->mm);
11528c2ecf20Sopenharmony_ci	vma = find_vma_intersection(current->mm, hva, hva + 1);
11538c2ecf20Sopenharmony_ci	if (unlikely(!vma)) {
11548c2ecf20Sopenharmony_ci		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
11558c2ecf20Sopenharmony_ci		mmap_read_unlock(current->mm);
11568c2ecf20Sopenharmony_ci		err = -EFAULT;
11578c2ecf20Sopenharmony_ci		goto out;
11588c2ecf20Sopenharmony_ci	}
11598c2ecf20Sopenharmony_ci
11608c2ecf20Sopenharmony_ci	vma_pagesize = vma_kernel_pagesize(vma);
11618c2ecf20Sopenharmony_ci	if ((vma_pagesize == PMD_SIZE) &&
11628c2ecf20Sopenharmony_ci		!fault_supports_huge_mapping(memslot, hva, write)) {
11638c2ecf20Sopenharmony_ci		force_pte = true;
11648c2ecf20Sopenharmony_ci		vma_pagesize = PAGE_SIZE;
11658c2ecf20Sopenharmony_ci		++vcpu->stat.huge_dec_exits;
11668c2ecf20Sopenharmony_ci	}
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_ci	/* PMD is not folded, adjust gfn to new boundary */
11698c2ecf20Sopenharmony_ci	if (vma_pagesize == PMD_SIZE)
11708c2ecf20Sopenharmony_ci		gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
11718c2ecf20Sopenharmony_ci
11728c2ecf20Sopenharmony_ci	mmap_read_unlock(current->mm);
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_ci	/* We need a minimum of cached pages ready for page table creation */
11758c2ecf20Sopenharmony_ci	err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
11768c2ecf20Sopenharmony_ci	if (err)
11778c2ecf20Sopenharmony_ci		goto out;
11788c2ecf20Sopenharmony_ci
11798c2ecf20Sopenharmony_ciretry:
11808c2ecf20Sopenharmony_ci	/*
11818c2ecf20Sopenharmony_ci	 * Used to check for invalidations in progress, of the pfn that is
11828c2ecf20Sopenharmony_ci	 * returned by pfn_to_pfn_prot below.
11838c2ecf20Sopenharmony_ci	 */
11848c2ecf20Sopenharmony_ci	mmu_seq = kvm->mmu_notifier_seq;
11858c2ecf20Sopenharmony_ci	/*
11868c2ecf20Sopenharmony_ci	 * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in
11878c2ecf20Sopenharmony_ci	 * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
11888c2ecf20Sopenharmony_ci	 * risk the page we get a reference to getting unmapped before we have a
11898c2ecf20Sopenharmony_ci	 * chance to grab the mmu_lock without mmu_notifier_retry() noticing.
11908c2ecf20Sopenharmony_ci	 *
11918c2ecf20Sopenharmony_ci	 * This smp_rmb() pairs with the effective smp_wmb() of the combination
11928c2ecf20Sopenharmony_ci	 * of the pte_unmap_unlock() after the PTE is zapped, and the
11938c2ecf20Sopenharmony_ci	 * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
11948c2ecf20Sopenharmony_ci	 * mmu_notifier_seq is incremented.
11958c2ecf20Sopenharmony_ci	 */
11968c2ecf20Sopenharmony_ci	smp_rmb();
11978c2ecf20Sopenharmony_ci
11988c2ecf20Sopenharmony_ci	/* Slow path - ask KVM core whether we can access this GPA */
11998c2ecf20Sopenharmony_ci	pfn = gfn_to_pfn_prot(kvm, gfn, write, &writeable);
12008c2ecf20Sopenharmony_ci	if (is_error_noslot_pfn(pfn)) {
12018c2ecf20Sopenharmony_ci		err = -EFAULT;
12028c2ecf20Sopenharmony_ci		goto out;
12038c2ecf20Sopenharmony_ci	}
12048c2ecf20Sopenharmony_ci
12058c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
12068c2ecf20Sopenharmony_ci	/* Check if an invalidation has taken place since we got pfn */
12078c2ecf20Sopenharmony_ci	if (mmu_notifier_retry(kvm, mmu_seq)) {
12088c2ecf20Sopenharmony_ci		/*
12098c2ecf20Sopenharmony_ci		 * This can happen when mappings are changed asynchronously, but
12108c2ecf20Sopenharmony_ci		 * also synchronously if a COW is triggered by
12118c2ecf20Sopenharmony_ci		 * gfn_to_pfn_prot().
12128c2ecf20Sopenharmony_ci		 */
12138c2ecf20Sopenharmony_ci		spin_unlock(&kvm->mmu_lock);
12148c2ecf20Sopenharmony_ci		kvm_set_pfn_accessed(pfn);
12158c2ecf20Sopenharmony_ci		kvm_release_pfn_clean(pfn);
12168c2ecf20Sopenharmony_ci		if (retry_no > 100) {
12178c2ecf20Sopenharmony_ci			retry_no = 0;
12188c2ecf20Sopenharmony_ci			schedule();
12198c2ecf20Sopenharmony_ci		}
12208c2ecf20Sopenharmony_ci		retry_no++;
12218c2ecf20Sopenharmony_ci		goto retry;
12228c2ecf20Sopenharmony_ci	}
12238c2ecf20Sopenharmony_ci
12248c2ecf20Sopenharmony_ci	if (vma_pagesize == PAGE_SIZE && !force_pte) {
12258c2ecf20Sopenharmony_ci		/*
12268c2ecf20Sopenharmony_ci		 * Only PMD_SIZE transparent hugepages(THP) are
12278c2ecf20Sopenharmony_ci		 * currently supported. This code will need to be
12288c2ecf20Sopenharmony_ci		 * updated to support other THP sizes.
12298c2ecf20Sopenharmony_ci		 *
12308c2ecf20Sopenharmony_ci		 * Make sure the host VA and the guest IPA are sufficiently
12318c2ecf20Sopenharmony_ci		 * aligned and that the block is contained within the memslot.
12328c2ecf20Sopenharmony_ci		 */
12338c2ecf20Sopenharmony_ci		++vcpu->stat.huge_thp_exits;
12348c2ecf20Sopenharmony_ci		if (fault_supports_huge_mapping(memslot, hva, write) &&
12358c2ecf20Sopenharmony_ci		    transparent_hugepage_adjust(&pfn, &gpa)) {
12368c2ecf20Sopenharmony_ci			++vcpu->stat.huge_adjust_exits;
12378c2ecf20Sopenharmony_ci			vma_pagesize = PMD_SIZE;
12388c2ecf20Sopenharmony_ci		}
12398c2ecf20Sopenharmony_ci	}
12408c2ecf20Sopenharmony_ci
12418c2ecf20Sopenharmony_ci	/* Set up the prot bits */
12428c2ecf20Sopenharmony_ci	prot_bits = _PAGE_PRESENT | __READABLE;
12438c2ecf20Sopenharmony_ci	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
12448c2ecf20Sopenharmony_ci		prot_bits |= _CACHE_SUC;
12458c2ecf20Sopenharmony_ci	else
12468c2ecf20Sopenharmony_ci		prot_bits |= _CACHE_CC;
12478c2ecf20Sopenharmony_ci
12488c2ecf20Sopenharmony_ci	if (writeable) {
12498c2ecf20Sopenharmony_ci		prot_bits |= _PAGE_WRITE;
12508c2ecf20Sopenharmony_ci		if (write) {
12518c2ecf20Sopenharmony_ci			prot_bits |= __WRITEABLE;
12528c2ecf20Sopenharmony_ci			mark_page_dirty(kvm, gfn);
12538c2ecf20Sopenharmony_ci			kvm_set_pfn_dirty(pfn);
12548c2ecf20Sopenharmony_ci		}
12558c2ecf20Sopenharmony_ci	}
12568c2ecf20Sopenharmony_ci
12578c2ecf20Sopenharmony_ci	if (vma_pagesize == PMD_SIZE) {
12588c2ecf20Sopenharmony_ci		pmd_t new_pmd = pfn_pmd(pfn, __pgprot(prot_bits));
12598c2ecf20Sopenharmony_ci		new_pmd = pmd_mkhuge(new_pmd);
12608c2ecf20Sopenharmony_ci		if (writeable && write) {
12618c2ecf20Sopenharmony_ci			gfn_t base_gfn = (gpa & PMD_MASK) >> PAGE_SHIFT;
12628c2ecf20Sopenharmony_ci			for (i = 0; i < PTRS_PER_PTE; i++)
12638c2ecf20Sopenharmony_ci				mark_page_dirty(kvm, base_gfn + i);
12648c2ecf20Sopenharmony_ci		}
12658c2ecf20Sopenharmony_ci
12668c2ecf20Sopenharmony_ci		++vcpu->stat.huge_set_exits;
12678c2ecf20Sopenharmony_ci		kvm_set_pmd_huge(vcpu, memcache, gpa, &new_pmd);
12688c2ecf20Sopenharmony_ci	} else {
12698c2ecf20Sopenharmony_ci		pte_t new_pte = pfn_pte(pfn, __pgprot(prot_bits));
12708c2ecf20Sopenharmony_ci		if (writeable && write)
12718c2ecf20Sopenharmony_ci			mark_page_dirty(kvm, gfn);
12728c2ecf20Sopenharmony_ci
12738c2ecf20Sopenharmony_ci		/* Ensure page tables are allocated */
12748c2ecf20Sopenharmony_ci		ptep = kvm_pte_for_gpa(kvm, memcache, gpa);
12758c2ecf20Sopenharmony_ci		if (ptep && kvm_pte_huge(*ptep) && write)
12768c2ecf20Sopenharmony_ci			ptep = kvm_split_huge(vcpu, ptep, gfn, vma, hva);
12778c2ecf20Sopenharmony_ci
12788c2ecf20Sopenharmony_ci		set_pte(ptep, new_pte);
12798c2ecf20Sopenharmony_ci		err = 0;
12808c2ecf20Sopenharmony_ci	}
12818c2ecf20Sopenharmony_ci
12828c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
12838c2ecf20Sopenharmony_ci	kvm_release_pfn_clean(pfn);
12848c2ecf20Sopenharmony_ci	kvm_set_pfn_accessed(pfn);
12858c2ecf20Sopenharmony_ciout:
12868c2ecf20Sopenharmony_ci	srcu_read_unlock(&kvm->srcu, srcu_idx);
12878c2ecf20Sopenharmony_ci	return err;
12888c2ecf20Sopenharmony_ci}
12898c2ecf20Sopenharmony_ci
12908c2ecf20Sopenharmony_ciint kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv,
12918c2ecf20Sopenharmony_ci				      bool write)
12928c2ecf20Sopenharmony_ci{
12938c2ecf20Sopenharmony_ci	int ret;
12948c2ecf20Sopenharmony_ci
12958c2ecf20Sopenharmony_ci	ret = kvm_map_page(vcpu, badv, write);
12968c2ecf20Sopenharmony_ci	if (ret)
12978c2ecf20Sopenharmony_ci		return ret;
12988c2ecf20Sopenharmony_ci
12998c2ecf20Sopenharmony_ci	/* Invalidate this entry in the TLB */
13008c2ecf20Sopenharmony_ci	return kvm_tlb_flush_gpa(vcpu, badv);
13018c2ecf20Sopenharmony_ci}
13028c2ecf20Sopenharmony_ci
13038c2ecf20Sopenharmony_ci/**
13048c2ecf20Sopenharmony_ci * kvm_flush_tlb_all() - Flush all root TLB entries for
13058c2ecf20Sopenharmony_ci * guests.
13068c2ecf20Sopenharmony_ci *
13078c2ecf20Sopenharmony_ci * Invalidate all entries including GVA-->GPA and GPA-->HPA mappings.
13088c2ecf20Sopenharmony_ci */
13098c2ecf20Sopenharmony_civoid kvm_flush_tlb_all(void)
13108c2ecf20Sopenharmony_ci{
13118c2ecf20Sopenharmony_ci	unsigned long flags;
13128c2ecf20Sopenharmony_ci
13138c2ecf20Sopenharmony_ci	local_irq_save(flags);
13148c2ecf20Sopenharmony_ci	invtlb_all(INVTLB_ALLGID, 0, 0);
13158c2ecf20Sopenharmony_ci	local_irq_restore(flags);
13168c2ecf20Sopenharmony_ci}
1317