162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci/* 462306a36Sopenharmony_ci * Xen mmu operations 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This file contains the various mmu fetch and update operations. 762306a36Sopenharmony_ci * The most important job they must perform is the mapping between the 862306a36Sopenharmony_ci * domain's pfn and the overall machine mfns. 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Xen allows guests to directly update the pagetable, in a controlled 1162306a36Sopenharmony_ci * fashion. In other words, the guest modifies the same pagetable 1262306a36Sopenharmony_ci * that the CPU actually uses, which eliminates the overhead of having 1362306a36Sopenharmony_ci * a separate shadow pagetable. 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * In order to allow this, it falls on the guest domain to map its 1662306a36Sopenharmony_ci * notion of a "physical" pfn - which is just a domain-local linear 1762306a36Sopenharmony_ci * address - into a real "machine address" which the CPU's MMU can 1862306a36Sopenharmony_ci * use. 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be 2162306a36Sopenharmony_ci * inserted directly into the pagetable. When creating a new 2262306a36Sopenharmony_ci * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely, 2362306a36Sopenharmony_ci * when reading the content back with __(pgd|pmd|pte)_val, it converts 2462306a36Sopenharmony_ci * the mfn back into a pfn. 2562306a36Sopenharmony_ci * 2662306a36Sopenharmony_ci * The other constraint is that all pages which make up a pagetable 2762306a36Sopenharmony_ci * must be mapped read-only in the guest. This prevents uncontrolled 2862306a36Sopenharmony_ci * guest updates to the pagetable. Xen strictly enforces this, and 2962306a36Sopenharmony_ci * will disallow any pagetable update which will end up mapping a 3062306a36Sopenharmony_ci * pagetable page RW, and will disallow using any writable page as a 3162306a36Sopenharmony_ci * pagetable. 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * Naively, when loading %cr3 with the base of a new pagetable, Xen 3462306a36Sopenharmony_ci * would need to validate the whole pagetable before going on. 3562306a36Sopenharmony_ci * Naturally, this is quite slow. The solution is to "pin" a 3662306a36Sopenharmony_ci * pagetable, which enforces all the constraints on the pagetable even 3762306a36Sopenharmony_ci * when it is not actively in use. This menas that Xen can be assured 3862306a36Sopenharmony_ci * that it is still valid when you do load it into %cr3, and doesn't 3962306a36Sopenharmony_ci * need to revalidate it. 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_ci#include <linux/sched/mm.h> 4462306a36Sopenharmony_ci#include <linux/debugfs.h> 4562306a36Sopenharmony_ci#include <linux/bug.h> 4662306a36Sopenharmony_ci#include <linux/vmalloc.h> 4762306a36Sopenharmony_ci#include <linux/export.h> 4862306a36Sopenharmony_ci#include <linux/init.h> 4962306a36Sopenharmony_ci#include <linux/gfp.h> 5062306a36Sopenharmony_ci#include <linux/memblock.h> 5162306a36Sopenharmony_ci#include <linux/seq_file.h> 5262306a36Sopenharmony_ci#include <linux/crash_dump.h> 5362306a36Sopenharmony_ci#include <linux/pgtable.h> 5462306a36Sopenharmony_ci#ifdef CONFIG_KEXEC_CORE 5562306a36Sopenharmony_ci#include <linux/kexec.h> 5662306a36Sopenharmony_ci#endif 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci#include <trace/events/xen.h> 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci#include <asm/tlbflush.h> 6162306a36Sopenharmony_ci#include <asm/fixmap.h> 6262306a36Sopenharmony_ci#include <asm/mmu_context.h> 6362306a36Sopenharmony_ci#include <asm/setup.h> 6462306a36Sopenharmony_ci#include <asm/paravirt.h> 6562306a36Sopenharmony_ci#include <asm/e820/api.h> 6662306a36Sopenharmony_ci#include <asm/linkage.h> 6762306a36Sopenharmony_ci#include <asm/page.h> 6862306a36Sopenharmony_ci#include <asm/init.h> 6962306a36Sopenharmony_ci#include <asm/memtype.h> 7062306a36Sopenharmony_ci#include <asm/smp.h> 7162306a36Sopenharmony_ci#include <asm/tlb.h> 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci#include <asm/xen/hypercall.h> 7462306a36Sopenharmony_ci#include <asm/xen/hypervisor.h> 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci#include <xen/xen.h> 7762306a36Sopenharmony_ci#include <xen/page.h> 7862306a36Sopenharmony_ci#include <xen/interface/xen.h> 7962306a36Sopenharmony_ci#include <xen/interface/hvm/hvm_op.h> 8062306a36Sopenharmony_ci#include <xen/interface/version.h> 8162306a36Sopenharmony_ci#include <xen/interface/memory.h> 8262306a36Sopenharmony_ci#include <xen/hvc-console.h> 8362306a36Sopenharmony_ci#include <xen/swiotlb-xen.h> 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci#include "multicalls.h" 8662306a36Sopenharmony_ci#include "mmu.h" 8762306a36Sopenharmony_ci#include "debugfs.h" 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci/* 9062306a36Sopenharmony_ci * Prototypes for functions called via PV_CALLEE_SAVE_REGS_THUNK() in order 9162306a36Sopenharmony_ci * to avoid warnings with "-Wmissing-prototypes". 9262306a36Sopenharmony_ci */ 9362306a36Sopenharmony_cipteval_t xen_pte_val(pte_t pte); 9462306a36Sopenharmony_cipgdval_t xen_pgd_val(pgd_t pgd); 9562306a36Sopenharmony_cipmdval_t xen_pmd_val(pmd_t pmd); 9662306a36Sopenharmony_cipudval_t xen_pud_val(pud_t pud); 9762306a36Sopenharmony_cip4dval_t xen_p4d_val(p4d_t p4d); 9862306a36Sopenharmony_cipte_t xen_make_pte(pteval_t pte); 9962306a36Sopenharmony_cipgd_t xen_make_pgd(pgdval_t pgd); 10062306a36Sopenharmony_cipmd_t xen_make_pmd(pmdval_t pmd); 10162306a36Sopenharmony_cipud_t xen_make_pud(pudval_t pud); 10262306a36Sopenharmony_cip4d_t xen_make_p4d(p4dval_t p4d); 10362306a36Sopenharmony_cipte_t xen_make_pte_init(pteval_t pte); 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci#ifdef CONFIG_X86_VSYSCALL_EMULATION 10662306a36Sopenharmony_ci/* l3 pud for userspace vsyscall mapping */ 10762306a36Sopenharmony_cistatic pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; 10862306a36Sopenharmony_ci#endif 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci/* 11162306a36Sopenharmony_ci * Protects atomic reservation decrease/increase against concurrent increases. 11262306a36Sopenharmony_ci * Also protects non-atomic updates of current_pages and balloon lists. 11362306a36Sopenharmony_ci */ 11462306a36Sopenharmony_cistatic DEFINE_SPINLOCK(xen_reservation_lock); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci/* 11762306a36Sopenharmony_ci * Note about cr3 (pagetable base) values: 11862306a36Sopenharmony_ci * 11962306a36Sopenharmony_ci * xen_cr3 contains the current logical cr3 value; it contains the 12062306a36Sopenharmony_ci * last set cr3. This may not be the current effective cr3, because 12162306a36Sopenharmony_ci * its update may be being lazily deferred. However, a vcpu looking 12262306a36Sopenharmony_ci * at its own cr3 can use this value knowing that it everything will 12362306a36Sopenharmony_ci * be self-consistent. 12462306a36Sopenharmony_ci * 12562306a36Sopenharmony_ci * xen_current_cr3 contains the actual vcpu cr3; it is set once the 12662306a36Sopenharmony_ci * hypercall to set the vcpu cr3 is complete (so it may be a little 12762306a36Sopenharmony_ci * out of date, but it will never be set early). If one vcpu is 12862306a36Sopenharmony_ci * looking at another vcpu's cr3 value, it should use this variable. 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_ciDEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ 13162306a36Sopenharmony_ciDEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic phys_addr_t xen_pt_base, xen_pt_size __initdata; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready); 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci/* 13862306a36Sopenharmony_ci * Just beyond the highest usermode address. STACK_TOP_MAX has a 13962306a36Sopenharmony_ci * redzone above it, so round it up to a PGD boundary. 14062306a36Sopenharmony_ci */ 14162306a36Sopenharmony_ci#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_civoid make_lowmem_page_readonly(void *vaddr) 14462306a36Sopenharmony_ci{ 14562306a36Sopenharmony_ci pte_t *pte, ptev; 14662306a36Sopenharmony_ci unsigned long address = (unsigned long)vaddr; 14762306a36Sopenharmony_ci unsigned int level; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci pte = lookup_address(address, &level); 15062306a36Sopenharmony_ci if (pte == NULL) 15162306a36Sopenharmony_ci return; /* vaddr missing */ 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci ptev = pte_wrprotect(*pte); 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci if (HYPERVISOR_update_va_mapping(address, ptev, 0)) 15662306a36Sopenharmony_ci BUG(); 15762306a36Sopenharmony_ci} 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_civoid make_lowmem_page_readwrite(void *vaddr) 16062306a36Sopenharmony_ci{ 16162306a36Sopenharmony_ci pte_t *pte, ptev; 16262306a36Sopenharmony_ci unsigned long address = (unsigned long)vaddr; 16362306a36Sopenharmony_ci unsigned int level; 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci pte = lookup_address(address, &level); 16662306a36Sopenharmony_ci if (pte == NULL) 16762306a36Sopenharmony_ci return; /* vaddr missing */ 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci ptev = pte_mkwrite_novma(*pte); 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci if (HYPERVISOR_update_va_mapping(address, ptev, 0)) 17262306a36Sopenharmony_ci BUG(); 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci/* 17762306a36Sopenharmony_ci * During early boot all page table pages are pinned, but we do not have struct 17862306a36Sopenharmony_ci * pages, so return true until struct pages are ready. 17962306a36Sopenharmony_ci */ 18062306a36Sopenharmony_cistatic bool xen_page_pinned(void *ptr) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci if (static_branch_likely(&xen_struct_pages_ready)) { 18362306a36Sopenharmony_ci struct page *page = virt_to_page(ptr); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci return PagePinned(page); 18662306a36Sopenharmony_ci } 18762306a36Sopenharmony_ci return true; 18862306a36Sopenharmony_ci} 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_cistatic void xen_extend_mmu_update(const struct mmu_update *update) 19162306a36Sopenharmony_ci{ 19262306a36Sopenharmony_ci struct multicall_space mcs; 19362306a36Sopenharmony_ci struct mmu_update *u; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci if (mcs.mc != NULL) { 19862306a36Sopenharmony_ci mcs.mc->args[1]++; 19962306a36Sopenharmony_ci } else { 20062306a36Sopenharmony_ci mcs = __xen_mc_entry(sizeof(*u)); 20162306a36Sopenharmony_ci MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci u = mcs.args; 20562306a36Sopenharmony_ci *u = *update; 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_cistatic void xen_extend_mmuext_op(const struct mmuext_op *op) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci struct multicall_space mcs; 21162306a36Sopenharmony_ci struct mmuext_op *u; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u)); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (mcs.mc != NULL) { 21662306a36Sopenharmony_ci mcs.mc->args[1]++; 21762306a36Sopenharmony_ci } else { 21862306a36Sopenharmony_ci mcs = __xen_mc_entry(sizeof(*u)); 21962306a36Sopenharmony_ci MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); 22062306a36Sopenharmony_ci } 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci u = mcs.args; 22362306a36Sopenharmony_ci *u = *op; 22462306a36Sopenharmony_ci} 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_cistatic void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct mmu_update u; 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci preempt_disable(); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci xen_mc_batch(); 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci /* ptr may be ioremapped for 64-bit pagetable setup */ 23562306a36Sopenharmony_ci u.ptr = arbitrary_virt_to_machine(ptr).maddr; 23662306a36Sopenharmony_ci u.val = pmd_val_ma(val); 23762306a36Sopenharmony_ci xen_extend_mmu_update(&u); 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci preempt_enable(); 24262306a36Sopenharmony_ci} 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_cistatic void xen_set_pmd(pmd_t *ptr, pmd_t val) 24562306a36Sopenharmony_ci{ 24662306a36Sopenharmony_ci trace_xen_mmu_set_pmd(ptr, val); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci /* If page is not pinned, we can just update the entry 24962306a36Sopenharmony_ci directly */ 25062306a36Sopenharmony_ci if (!xen_page_pinned(ptr)) { 25162306a36Sopenharmony_ci *ptr = val; 25262306a36Sopenharmony_ci return; 25362306a36Sopenharmony_ci } 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci xen_set_pmd_hyper(ptr, val); 25662306a36Sopenharmony_ci} 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci/* 25962306a36Sopenharmony_ci * Associate a virtual page frame with a given physical page frame 26062306a36Sopenharmony_ci * and protection flags for that frame. 26162306a36Sopenharmony_ci */ 26262306a36Sopenharmony_civoid __init set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) 26362306a36Sopenharmony_ci{ 26462306a36Sopenharmony_ci if (HYPERVISOR_update_va_mapping(vaddr, mfn_pte(mfn, flags), 26562306a36Sopenharmony_ci UVMF_INVLPG)) 26662306a36Sopenharmony_ci BUG(); 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_cistatic bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci struct mmu_update u; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci if (xen_get_lazy_mode() != XEN_LAZY_MMU) 27462306a36Sopenharmony_ci return false; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci xen_mc_batch(); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; 27962306a36Sopenharmony_ci u.val = pte_val_ma(pteval); 28062306a36Sopenharmony_ci xen_extend_mmu_update(&u); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci return true; 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_cistatic inline void __xen_set_pte(pte_t *ptep, pte_t pteval) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci if (!xen_batched_set_pte(ptep, pteval)) { 29062306a36Sopenharmony_ci /* 29162306a36Sopenharmony_ci * Could call native_set_pte() here and trap and 29262306a36Sopenharmony_ci * emulate the PTE write, but a hypercall is much cheaper. 29362306a36Sopenharmony_ci */ 29462306a36Sopenharmony_ci struct mmu_update u; 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; 29762306a36Sopenharmony_ci u.val = pte_val_ma(pteval); 29862306a36Sopenharmony_ci HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci} 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_cistatic void xen_set_pte(pte_t *ptep, pte_t pteval) 30362306a36Sopenharmony_ci{ 30462306a36Sopenharmony_ci trace_xen_mmu_set_pte(ptep, pteval); 30562306a36Sopenharmony_ci __xen_set_pte(ptep, pteval); 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cipte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma, 30962306a36Sopenharmony_ci unsigned long addr, pte_t *ptep) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci /* Just return the pte as-is. We preserve the bits on commit */ 31262306a36Sopenharmony_ci trace_xen_mmu_ptep_modify_prot_start(vma->vm_mm, addr, ptep, *ptep); 31362306a36Sopenharmony_ci return *ptep; 31462306a36Sopenharmony_ci} 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_civoid xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, 31762306a36Sopenharmony_ci pte_t *ptep, pte_t pte) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci struct mmu_update u; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci trace_xen_mmu_ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte); 32262306a36Sopenharmony_ci xen_mc_batch(); 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; 32562306a36Sopenharmony_ci u.val = pte_val_ma(pte); 32662306a36Sopenharmony_ci xen_extend_mmu_update(&u); 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci/* Assume pteval_t is equivalent to all the other *val_t types. */ 33262306a36Sopenharmony_cistatic pteval_t pte_mfn_to_pfn(pteval_t val) 33362306a36Sopenharmony_ci{ 33462306a36Sopenharmony_ci if (val & _PAGE_PRESENT) { 33562306a36Sopenharmony_ci unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT; 33662306a36Sopenharmony_ci unsigned long pfn = mfn_to_pfn(mfn); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci pteval_t flags = val & PTE_FLAGS_MASK; 33962306a36Sopenharmony_ci if (unlikely(pfn == ~0)) 34062306a36Sopenharmony_ci val = flags & ~_PAGE_PRESENT; 34162306a36Sopenharmony_ci else 34262306a36Sopenharmony_ci val = ((pteval_t)pfn << PAGE_SHIFT) | flags; 34362306a36Sopenharmony_ci } 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci return val; 34662306a36Sopenharmony_ci} 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_cistatic pteval_t pte_pfn_to_mfn(pteval_t val) 34962306a36Sopenharmony_ci{ 35062306a36Sopenharmony_ci if (val & _PAGE_PRESENT) { 35162306a36Sopenharmony_ci unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 35262306a36Sopenharmony_ci pteval_t flags = val & PTE_FLAGS_MASK; 35362306a36Sopenharmony_ci unsigned long mfn; 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci mfn = __pfn_to_mfn(pfn); 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci /* 35862306a36Sopenharmony_ci * If there's no mfn for the pfn, then just create an 35962306a36Sopenharmony_ci * empty non-present pte. Unfortunately this loses 36062306a36Sopenharmony_ci * information about the original pfn, so 36162306a36Sopenharmony_ci * pte_mfn_to_pfn is asymmetric. 36262306a36Sopenharmony_ci */ 36362306a36Sopenharmony_ci if (unlikely(mfn == INVALID_P2M_ENTRY)) { 36462306a36Sopenharmony_ci mfn = 0; 36562306a36Sopenharmony_ci flags = 0; 36662306a36Sopenharmony_ci } else 36762306a36Sopenharmony_ci mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); 36862306a36Sopenharmony_ci val = ((pteval_t)mfn << PAGE_SHIFT) | flags; 36962306a36Sopenharmony_ci } 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci return val; 37262306a36Sopenharmony_ci} 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci__visible pteval_t xen_pte_val(pte_t pte) 37562306a36Sopenharmony_ci{ 37662306a36Sopenharmony_ci pteval_t pteval = pte.pte; 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci return pte_mfn_to_pfn(pteval); 37962306a36Sopenharmony_ci} 38062306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci__visible pgdval_t xen_pgd_val(pgd_t pgd) 38362306a36Sopenharmony_ci{ 38462306a36Sopenharmony_ci return pte_mfn_to_pfn(pgd.pgd); 38562306a36Sopenharmony_ci} 38662306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci__visible pte_t xen_make_pte(pteval_t pte) 38962306a36Sopenharmony_ci{ 39062306a36Sopenharmony_ci pte = pte_pfn_to_mfn(pte); 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci return native_make_pte(pte); 39362306a36Sopenharmony_ci} 39462306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci__visible pgd_t xen_make_pgd(pgdval_t pgd) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci pgd = pte_pfn_to_mfn(pgd); 39962306a36Sopenharmony_ci return native_make_pgd(pgd); 40062306a36Sopenharmony_ci} 40162306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci__visible pmdval_t xen_pmd_val(pmd_t pmd) 40462306a36Sopenharmony_ci{ 40562306a36Sopenharmony_ci return pte_mfn_to_pfn(pmd.pmd); 40662306a36Sopenharmony_ci} 40762306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_cistatic void xen_set_pud_hyper(pud_t *ptr, pud_t val) 41062306a36Sopenharmony_ci{ 41162306a36Sopenharmony_ci struct mmu_update u; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci preempt_disable(); 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci xen_mc_batch(); 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci /* ptr may be ioremapped for 64-bit pagetable setup */ 41862306a36Sopenharmony_ci u.ptr = arbitrary_virt_to_machine(ptr).maddr; 41962306a36Sopenharmony_ci u.val = pud_val_ma(val); 42062306a36Sopenharmony_ci xen_extend_mmu_update(&u); 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci preempt_enable(); 42562306a36Sopenharmony_ci} 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_cistatic void xen_set_pud(pud_t *ptr, pud_t val) 42862306a36Sopenharmony_ci{ 42962306a36Sopenharmony_ci trace_xen_mmu_set_pud(ptr, val); 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci /* If page is not pinned, we can just update the entry 43262306a36Sopenharmony_ci directly */ 43362306a36Sopenharmony_ci if (!xen_page_pinned(ptr)) { 43462306a36Sopenharmony_ci *ptr = val; 43562306a36Sopenharmony_ci return; 43662306a36Sopenharmony_ci } 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci xen_set_pud_hyper(ptr, val); 43962306a36Sopenharmony_ci} 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci__visible pmd_t xen_make_pmd(pmdval_t pmd) 44262306a36Sopenharmony_ci{ 44362306a36Sopenharmony_ci pmd = pte_pfn_to_mfn(pmd); 44462306a36Sopenharmony_ci return native_make_pmd(pmd); 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci__visible pudval_t xen_pud_val(pud_t pud) 44962306a36Sopenharmony_ci{ 45062306a36Sopenharmony_ci return pte_mfn_to_pfn(pud.pud); 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci__visible pud_t xen_make_pud(pudval_t pud) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci pud = pte_pfn_to_mfn(pud); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci return native_make_pud(pud); 45962306a36Sopenharmony_ci} 46062306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_cistatic pgd_t *xen_get_user_pgd(pgd_t *pgd) 46362306a36Sopenharmony_ci{ 46462306a36Sopenharmony_ci pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); 46562306a36Sopenharmony_ci unsigned offset = pgd - pgd_page; 46662306a36Sopenharmony_ci pgd_t *user_ptr = NULL; 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci if (offset < pgd_index(USER_LIMIT)) { 46962306a36Sopenharmony_ci struct page *page = virt_to_page(pgd_page); 47062306a36Sopenharmony_ci user_ptr = (pgd_t *)page->private; 47162306a36Sopenharmony_ci if (user_ptr) 47262306a36Sopenharmony_ci user_ptr += offset; 47362306a36Sopenharmony_ci } 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci return user_ptr; 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cistatic void __xen_set_p4d_hyper(p4d_t *ptr, p4d_t val) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci struct mmu_update u; 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci u.ptr = virt_to_machine(ptr).maddr; 48362306a36Sopenharmony_ci u.val = p4d_val_ma(val); 48462306a36Sopenharmony_ci xen_extend_mmu_update(&u); 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci/* 48862306a36Sopenharmony_ci * Raw hypercall-based set_p4d, intended for in early boot before 48962306a36Sopenharmony_ci * there's a page structure. This implies: 49062306a36Sopenharmony_ci * 1. The only existing pagetable is the kernel's 49162306a36Sopenharmony_ci * 2. It is always pinned 49262306a36Sopenharmony_ci * 3. It has no user pagetable attached to it 49362306a36Sopenharmony_ci */ 49462306a36Sopenharmony_cistatic void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val) 49562306a36Sopenharmony_ci{ 49662306a36Sopenharmony_ci preempt_disable(); 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci xen_mc_batch(); 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci __xen_set_p4d_hyper(ptr, val); 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci preempt_enable(); 50562306a36Sopenharmony_ci} 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_cistatic void xen_set_p4d(p4d_t *ptr, p4d_t val) 50862306a36Sopenharmony_ci{ 50962306a36Sopenharmony_ci pgd_t *user_ptr = xen_get_user_pgd((pgd_t *)ptr); 51062306a36Sopenharmony_ci pgd_t pgd_val; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci trace_xen_mmu_set_p4d(ptr, (p4d_t *)user_ptr, val); 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci /* If page is not pinned, we can just update the entry 51562306a36Sopenharmony_ci directly */ 51662306a36Sopenharmony_ci if (!xen_page_pinned(ptr)) { 51762306a36Sopenharmony_ci *ptr = val; 51862306a36Sopenharmony_ci if (user_ptr) { 51962306a36Sopenharmony_ci WARN_ON(xen_page_pinned(user_ptr)); 52062306a36Sopenharmony_ci pgd_val.pgd = p4d_val_ma(val); 52162306a36Sopenharmony_ci *user_ptr = pgd_val; 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci return; 52462306a36Sopenharmony_ci } 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci /* If it's pinned, then we can at least batch the kernel and 52762306a36Sopenharmony_ci user updates together. */ 52862306a36Sopenharmony_ci xen_mc_batch(); 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci __xen_set_p4d_hyper(ptr, val); 53162306a36Sopenharmony_ci if (user_ptr) 53262306a36Sopenharmony_ci __xen_set_p4d_hyper((p4d_t *)user_ptr, val); 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 53562306a36Sopenharmony_ci} 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci#if CONFIG_PGTABLE_LEVELS >= 5 53862306a36Sopenharmony_ci__visible p4dval_t xen_p4d_val(p4d_t p4d) 53962306a36Sopenharmony_ci{ 54062306a36Sopenharmony_ci return pte_mfn_to_pfn(p4d.p4d); 54162306a36Sopenharmony_ci} 54262306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val); 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci__visible p4d_t xen_make_p4d(p4dval_t p4d) 54562306a36Sopenharmony_ci{ 54662306a36Sopenharmony_ci p4d = pte_pfn_to_mfn(p4d); 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci return native_make_p4d(p4d); 54962306a36Sopenharmony_ci} 55062306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); 55162306a36Sopenharmony_ci#endif /* CONFIG_PGTABLE_LEVELS >= 5 */ 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_cistatic void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, 55462306a36Sopenharmony_ci void (*func)(struct mm_struct *mm, struct page *, 55562306a36Sopenharmony_ci enum pt_level), 55662306a36Sopenharmony_ci bool last, unsigned long limit) 55762306a36Sopenharmony_ci{ 55862306a36Sopenharmony_ci int i, nr; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD; 56162306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 56262306a36Sopenharmony_ci if (!pmd_none(pmd[i])) 56362306a36Sopenharmony_ci (*func)(mm, pmd_page(pmd[i]), PT_PTE); 56462306a36Sopenharmony_ci } 56562306a36Sopenharmony_ci} 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_cistatic void xen_pud_walk(struct mm_struct *mm, pud_t *pud, 56862306a36Sopenharmony_ci void (*func)(struct mm_struct *mm, struct page *, 56962306a36Sopenharmony_ci enum pt_level), 57062306a36Sopenharmony_ci bool last, unsigned long limit) 57162306a36Sopenharmony_ci{ 57262306a36Sopenharmony_ci int i, nr; 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD; 57562306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 57662306a36Sopenharmony_ci pmd_t *pmd; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci if (pud_none(pud[i])) 57962306a36Sopenharmony_ci continue; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci pmd = pmd_offset(&pud[i], 0); 58262306a36Sopenharmony_ci if (PTRS_PER_PMD > 1) 58362306a36Sopenharmony_ci (*func)(mm, virt_to_page(pmd), PT_PMD); 58462306a36Sopenharmony_ci xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit); 58562306a36Sopenharmony_ci } 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_cistatic void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, 58962306a36Sopenharmony_ci void (*func)(struct mm_struct *mm, struct page *, 59062306a36Sopenharmony_ci enum pt_level), 59162306a36Sopenharmony_ci bool last, unsigned long limit) 59262306a36Sopenharmony_ci{ 59362306a36Sopenharmony_ci pud_t *pud; 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci if (p4d_none(*p4d)) 59762306a36Sopenharmony_ci return; 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci pud = pud_offset(p4d, 0); 60062306a36Sopenharmony_ci if (PTRS_PER_PUD > 1) 60162306a36Sopenharmony_ci (*func)(mm, virt_to_page(pud), PT_PUD); 60262306a36Sopenharmony_ci xen_pud_walk(mm, pud, func, last, limit); 60362306a36Sopenharmony_ci} 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci/* 60662306a36Sopenharmony_ci * (Yet another) pagetable walker. This one is intended for pinning a 60762306a36Sopenharmony_ci * pagetable. This means that it walks a pagetable and calls the 60862306a36Sopenharmony_ci * callback function on each page it finds making up the page table, 60962306a36Sopenharmony_ci * at every level. It walks the entire pagetable, but it only bothers 61062306a36Sopenharmony_ci * pinning pte pages which are below limit. In the normal case this 61162306a36Sopenharmony_ci * will be STACK_TOP_MAX, but at boot we need to pin up to 61262306a36Sopenharmony_ci * FIXADDR_TOP. 61362306a36Sopenharmony_ci * 61462306a36Sopenharmony_ci * We must skip the Xen hole in the middle of the address space, just after 61562306a36Sopenharmony_ci * the big x86-64 virtual hole. 61662306a36Sopenharmony_ci */ 61762306a36Sopenharmony_cistatic void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, 61862306a36Sopenharmony_ci void (*func)(struct mm_struct *mm, struct page *, 61962306a36Sopenharmony_ci enum pt_level), 62062306a36Sopenharmony_ci unsigned long limit) 62162306a36Sopenharmony_ci{ 62262306a36Sopenharmony_ci int i, nr; 62362306a36Sopenharmony_ci unsigned hole_low = 0, hole_high = 0; 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci /* The limit is the last byte to be touched */ 62662306a36Sopenharmony_ci limit--; 62762306a36Sopenharmony_ci BUG_ON(limit >= FIXADDR_TOP); 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci /* 63062306a36Sopenharmony_ci * 64-bit has a great big hole in the middle of the address 63162306a36Sopenharmony_ci * space, which contains the Xen mappings. 63262306a36Sopenharmony_ci */ 63362306a36Sopenharmony_ci hole_low = pgd_index(GUARD_HOLE_BASE_ADDR); 63462306a36Sopenharmony_ci hole_high = pgd_index(GUARD_HOLE_END_ADDR); 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci nr = pgd_index(limit) + 1; 63762306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 63862306a36Sopenharmony_ci p4d_t *p4d; 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci if (i >= hole_low && i < hole_high) 64162306a36Sopenharmony_ci continue; 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci if (pgd_none(pgd[i])) 64462306a36Sopenharmony_ci continue; 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci p4d = p4d_offset(&pgd[i], 0); 64762306a36Sopenharmony_ci xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); 64862306a36Sopenharmony_ci } 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci /* Do the top level last, so that the callbacks can use it as 65162306a36Sopenharmony_ci a cue to do final things like tlb flushes. */ 65262306a36Sopenharmony_ci (*func)(mm, virt_to_page(pgd), PT_PGD); 65362306a36Sopenharmony_ci} 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_cistatic void xen_pgd_walk(struct mm_struct *mm, 65662306a36Sopenharmony_ci void (*func)(struct mm_struct *mm, struct page *, 65762306a36Sopenharmony_ci enum pt_level), 65862306a36Sopenharmony_ci unsigned long limit) 65962306a36Sopenharmony_ci{ 66062306a36Sopenharmony_ci __xen_pgd_walk(mm, mm->pgd, func, limit); 66162306a36Sopenharmony_ci} 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci/* If we're using split pte locks, then take the page's lock and 66462306a36Sopenharmony_ci return a pointer to it. Otherwise return NULL. */ 66562306a36Sopenharmony_cistatic spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) 66662306a36Sopenharmony_ci{ 66762306a36Sopenharmony_ci spinlock_t *ptl = NULL; 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci#if USE_SPLIT_PTE_PTLOCKS 67062306a36Sopenharmony_ci ptl = ptlock_ptr(page_ptdesc(page)); 67162306a36Sopenharmony_ci spin_lock_nest_lock(ptl, &mm->page_table_lock); 67262306a36Sopenharmony_ci#endif 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci return ptl; 67562306a36Sopenharmony_ci} 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_cistatic void xen_pte_unlock(void *v) 67862306a36Sopenharmony_ci{ 67962306a36Sopenharmony_ci spinlock_t *ptl = v; 68062306a36Sopenharmony_ci spin_unlock(ptl); 68162306a36Sopenharmony_ci} 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_cistatic void xen_do_pin(unsigned level, unsigned long pfn) 68462306a36Sopenharmony_ci{ 68562306a36Sopenharmony_ci struct mmuext_op op; 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci op.cmd = level; 68862306a36Sopenharmony_ci op.arg1.mfn = pfn_to_mfn(pfn); 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci xen_extend_mmuext_op(&op); 69162306a36Sopenharmony_ci} 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_cistatic void xen_pin_page(struct mm_struct *mm, struct page *page, 69462306a36Sopenharmony_ci enum pt_level level) 69562306a36Sopenharmony_ci{ 69662306a36Sopenharmony_ci unsigned pgfl = TestSetPagePinned(page); 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci if (!pgfl) { 69962306a36Sopenharmony_ci void *pt = lowmem_page_address(page); 70062306a36Sopenharmony_ci unsigned long pfn = page_to_pfn(page); 70162306a36Sopenharmony_ci struct multicall_space mcs = __xen_mc_entry(0); 70262306a36Sopenharmony_ci spinlock_t *ptl; 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci /* 70562306a36Sopenharmony_ci * We need to hold the pagetable lock between the time 70662306a36Sopenharmony_ci * we make the pagetable RO and when we actually pin 70762306a36Sopenharmony_ci * it. If we don't, then other users may come in and 70862306a36Sopenharmony_ci * attempt to update the pagetable by writing it, 70962306a36Sopenharmony_ci * which will fail because the memory is RO but not 71062306a36Sopenharmony_ci * pinned, so Xen won't do the trap'n'emulate. 71162306a36Sopenharmony_ci * 71262306a36Sopenharmony_ci * If we're using split pte locks, we can't hold the 71362306a36Sopenharmony_ci * entire pagetable's worth of locks during the 71462306a36Sopenharmony_ci * traverse, because we may wrap the preempt count (8 71562306a36Sopenharmony_ci * bits). The solution is to mark RO and pin each PTE 71662306a36Sopenharmony_ci * page while holding the lock. This means the number 71762306a36Sopenharmony_ci * of locks we end up holding is never more than a 71862306a36Sopenharmony_ci * batch size (~32 entries, at present). 71962306a36Sopenharmony_ci * 72062306a36Sopenharmony_ci * If we're not using split pte locks, we needn't pin 72162306a36Sopenharmony_ci * the PTE pages independently, because we're 72262306a36Sopenharmony_ci * protected by the overall pagetable lock. 72362306a36Sopenharmony_ci */ 72462306a36Sopenharmony_ci ptl = NULL; 72562306a36Sopenharmony_ci if (level == PT_PTE) 72662306a36Sopenharmony_ci ptl = xen_pte_lock(page, mm); 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, 72962306a36Sopenharmony_ci pfn_pte(pfn, PAGE_KERNEL_RO), 73062306a36Sopenharmony_ci level == PT_PGD ? UVMF_TLB_FLUSH : 0); 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci if (ptl) { 73362306a36Sopenharmony_ci xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci /* Queue a deferred unlock for when this batch 73662306a36Sopenharmony_ci is completed. */ 73762306a36Sopenharmony_ci xen_mc_callback(xen_pte_unlock, ptl); 73862306a36Sopenharmony_ci } 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci} 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci/* This is called just after a mm has been created, but it has not 74362306a36Sopenharmony_ci been used yet. We need to make sure that its pagetable is all 74462306a36Sopenharmony_ci read-only, and can be pinned. */ 74562306a36Sopenharmony_cistatic void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) 74662306a36Sopenharmony_ci{ 74762306a36Sopenharmony_ci pgd_t *user_pgd = xen_get_user_pgd(pgd); 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci trace_xen_mmu_pgd_pin(mm, pgd); 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci xen_mc_batch(); 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci __xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT); 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci if (user_pgd) { 75862306a36Sopenharmony_ci xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); 75962306a36Sopenharmony_ci xen_do_pin(MMUEXT_PIN_L4_TABLE, 76062306a36Sopenharmony_ci PFN_DOWN(__pa(user_pgd))); 76162306a36Sopenharmony_ci } 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci xen_mc_issue(0); 76462306a36Sopenharmony_ci} 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_cistatic void xen_pgd_pin(struct mm_struct *mm) 76762306a36Sopenharmony_ci{ 76862306a36Sopenharmony_ci __xen_pgd_pin(mm, mm->pgd); 76962306a36Sopenharmony_ci} 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci/* 77262306a36Sopenharmony_ci * On save, we need to pin all pagetables to make sure they get their 77362306a36Sopenharmony_ci * mfns turned into pfns. Search the list for any unpinned pgds and pin 77462306a36Sopenharmony_ci * them (unpinned pgds are not currently in use, probably because the 77562306a36Sopenharmony_ci * process is under construction or destruction). 77662306a36Sopenharmony_ci * 77762306a36Sopenharmony_ci * Expected to be called in stop_machine() ("equivalent to taking 77862306a36Sopenharmony_ci * every spinlock in the system"), so the locking doesn't really 77962306a36Sopenharmony_ci * matter all that much. 78062306a36Sopenharmony_ci */ 78162306a36Sopenharmony_civoid xen_mm_pin_all(void) 78262306a36Sopenharmony_ci{ 78362306a36Sopenharmony_ci struct page *page; 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci spin_lock(&pgd_lock); 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci list_for_each_entry(page, &pgd_list, lru) { 78862306a36Sopenharmony_ci if (!PagePinned(page)) { 78962306a36Sopenharmony_ci __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page)); 79062306a36Sopenharmony_ci SetPageSavePinned(page); 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci } 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci spin_unlock(&pgd_lock); 79562306a36Sopenharmony_ci} 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_cistatic void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, 79862306a36Sopenharmony_ci enum pt_level level) 79962306a36Sopenharmony_ci{ 80062306a36Sopenharmony_ci SetPagePinned(page); 80162306a36Sopenharmony_ci} 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci/* 80462306a36Sopenharmony_ci * The init_mm pagetable is really pinned as soon as its created, but 80562306a36Sopenharmony_ci * that's before we have page structures to store the bits. So do all 80662306a36Sopenharmony_ci * the book-keeping now once struct pages for allocated pages are 80762306a36Sopenharmony_ci * initialized. This happens only after memblock_free_all() is called. 80862306a36Sopenharmony_ci */ 80962306a36Sopenharmony_cistatic void __init xen_after_bootmem(void) 81062306a36Sopenharmony_ci{ 81162306a36Sopenharmony_ci static_branch_enable(&xen_struct_pages_ready); 81262306a36Sopenharmony_ci#ifdef CONFIG_X86_VSYSCALL_EMULATION 81362306a36Sopenharmony_ci SetPagePinned(virt_to_page(level3_user_vsyscall)); 81462306a36Sopenharmony_ci#endif 81562306a36Sopenharmony_ci xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); 81662306a36Sopenharmony_ci} 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_cistatic void xen_unpin_page(struct mm_struct *mm, struct page *page, 81962306a36Sopenharmony_ci enum pt_level level) 82062306a36Sopenharmony_ci{ 82162306a36Sopenharmony_ci unsigned pgfl = TestClearPagePinned(page); 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci if (pgfl) { 82462306a36Sopenharmony_ci void *pt = lowmem_page_address(page); 82562306a36Sopenharmony_ci unsigned long pfn = page_to_pfn(page); 82662306a36Sopenharmony_ci spinlock_t *ptl = NULL; 82762306a36Sopenharmony_ci struct multicall_space mcs; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci /* 83062306a36Sopenharmony_ci * Do the converse to pin_page. If we're using split 83162306a36Sopenharmony_ci * pte locks, we must be holding the lock for while 83262306a36Sopenharmony_ci * the pte page is unpinned but still RO to prevent 83362306a36Sopenharmony_ci * concurrent updates from seeing it in this 83462306a36Sopenharmony_ci * partially-pinned state. 83562306a36Sopenharmony_ci */ 83662306a36Sopenharmony_ci if (level == PT_PTE) { 83762306a36Sopenharmony_ci ptl = xen_pte_lock(page, mm); 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci if (ptl) 84062306a36Sopenharmony_ci xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); 84162306a36Sopenharmony_ci } 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci mcs = __xen_mc_entry(0); 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, 84662306a36Sopenharmony_ci pfn_pte(pfn, PAGE_KERNEL), 84762306a36Sopenharmony_ci level == PT_PGD ? UVMF_TLB_FLUSH : 0); 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci if (ptl) { 85062306a36Sopenharmony_ci /* unlock when batch completed */ 85162306a36Sopenharmony_ci xen_mc_callback(xen_pte_unlock, ptl); 85262306a36Sopenharmony_ci } 85362306a36Sopenharmony_ci } 85462306a36Sopenharmony_ci} 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci/* Release a pagetables pages back as normal RW */ 85762306a36Sopenharmony_cistatic void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) 85862306a36Sopenharmony_ci{ 85962306a36Sopenharmony_ci pgd_t *user_pgd = xen_get_user_pgd(pgd); 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci trace_xen_mmu_pgd_unpin(mm, pgd); 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci xen_mc_batch(); 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci if (user_pgd) { 86862306a36Sopenharmony_ci xen_do_pin(MMUEXT_UNPIN_TABLE, 86962306a36Sopenharmony_ci PFN_DOWN(__pa(user_pgd))); 87062306a36Sopenharmony_ci xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); 87162306a36Sopenharmony_ci } 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci xen_mc_issue(0); 87662306a36Sopenharmony_ci} 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_cistatic void xen_pgd_unpin(struct mm_struct *mm) 87962306a36Sopenharmony_ci{ 88062306a36Sopenharmony_ci __xen_pgd_unpin(mm, mm->pgd); 88162306a36Sopenharmony_ci} 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci/* 88462306a36Sopenharmony_ci * On resume, undo any pinning done at save, so that the rest of the 88562306a36Sopenharmony_ci * kernel doesn't see any unexpected pinned pagetables. 88662306a36Sopenharmony_ci */ 88762306a36Sopenharmony_civoid xen_mm_unpin_all(void) 88862306a36Sopenharmony_ci{ 88962306a36Sopenharmony_ci struct page *page; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci spin_lock(&pgd_lock); 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci list_for_each_entry(page, &pgd_list, lru) { 89462306a36Sopenharmony_ci if (PageSavePinned(page)) { 89562306a36Sopenharmony_ci BUG_ON(!PagePinned(page)); 89662306a36Sopenharmony_ci __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page)); 89762306a36Sopenharmony_ci ClearPageSavePinned(page); 89862306a36Sopenharmony_ci } 89962306a36Sopenharmony_ci } 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci spin_unlock(&pgd_lock); 90262306a36Sopenharmony_ci} 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_cistatic void xen_enter_mmap(struct mm_struct *mm) 90562306a36Sopenharmony_ci{ 90662306a36Sopenharmony_ci spin_lock(&mm->page_table_lock); 90762306a36Sopenharmony_ci xen_pgd_pin(mm); 90862306a36Sopenharmony_ci spin_unlock(&mm->page_table_lock); 90962306a36Sopenharmony_ci} 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_cistatic void drop_mm_ref_this_cpu(void *info) 91262306a36Sopenharmony_ci{ 91362306a36Sopenharmony_ci struct mm_struct *mm = info; 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm) 91662306a36Sopenharmony_ci leave_mm(smp_processor_id()); 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci /* 91962306a36Sopenharmony_ci * If this cpu still has a stale cr3 reference, then make sure 92062306a36Sopenharmony_ci * it has been flushed. 92162306a36Sopenharmony_ci */ 92262306a36Sopenharmony_ci if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd)) 92362306a36Sopenharmony_ci xen_mc_flush(); 92462306a36Sopenharmony_ci} 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci#ifdef CONFIG_SMP 92762306a36Sopenharmony_ci/* 92862306a36Sopenharmony_ci * Another cpu may still have their %cr3 pointing at the pagetable, so 92962306a36Sopenharmony_ci * we need to repoint it somewhere else before we can unpin it. 93062306a36Sopenharmony_ci */ 93162306a36Sopenharmony_cistatic void xen_drop_mm_ref(struct mm_struct *mm) 93262306a36Sopenharmony_ci{ 93362306a36Sopenharmony_ci cpumask_var_t mask; 93462306a36Sopenharmony_ci unsigned cpu; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci drop_mm_ref_this_cpu(mm); 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci /* Get the "official" set of cpus referring to our pagetable. */ 93962306a36Sopenharmony_ci if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { 94062306a36Sopenharmony_ci for_each_online_cpu(cpu) { 94162306a36Sopenharmony_ci if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) 94262306a36Sopenharmony_ci continue; 94362306a36Sopenharmony_ci smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1); 94462306a36Sopenharmony_ci } 94562306a36Sopenharmony_ci return; 94662306a36Sopenharmony_ci } 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci /* 94962306a36Sopenharmony_ci * It's possible that a vcpu may have a stale reference to our 95062306a36Sopenharmony_ci * cr3, because its in lazy mode, and it hasn't yet flushed 95162306a36Sopenharmony_ci * its set of pending hypercalls yet. In this case, we can 95262306a36Sopenharmony_ci * look at its actual current cr3 value, and force it to flush 95362306a36Sopenharmony_ci * if needed. 95462306a36Sopenharmony_ci */ 95562306a36Sopenharmony_ci cpumask_clear(mask); 95662306a36Sopenharmony_ci for_each_online_cpu(cpu) { 95762306a36Sopenharmony_ci if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) 95862306a36Sopenharmony_ci cpumask_set_cpu(cpu, mask); 95962306a36Sopenharmony_ci } 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1); 96262306a36Sopenharmony_ci free_cpumask_var(mask); 96362306a36Sopenharmony_ci} 96462306a36Sopenharmony_ci#else 96562306a36Sopenharmony_cistatic void xen_drop_mm_ref(struct mm_struct *mm) 96662306a36Sopenharmony_ci{ 96762306a36Sopenharmony_ci drop_mm_ref_this_cpu(mm); 96862306a36Sopenharmony_ci} 96962306a36Sopenharmony_ci#endif 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci/* 97262306a36Sopenharmony_ci * While a process runs, Xen pins its pagetables, which means that the 97362306a36Sopenharmony_ci * hypervisor forces it to be read-only, and it controls all updates 97462306a36Sopenharmony_ci * to it. This means that all pagetable updates have to go via the 97562306a36Sopenharmony_ci * hypervisor, which is moderately expensive. 97662306a36Sopenharmony_ci * 97762306a36Sopenharmony_ci * Since we're pulling the pagetable down, we switch to use init_mm, 97862306a36Sopenharmony_ci * unpin old process pagetable and mark it all read-write, which 97962306a36Sopenharmony_ci * allows further operations on it to be simple memory accesses. 98062306a36Sopenharmony_ci * 98162306a36Sopenharmony_ci * The only subtle point is that another CPU may be still using the 98262306a36Sopenharmony_ci * pagetable because of lazy tlb flushing. This means we need need to 98362306a36Sopenharmony_ci * switch all CPUs off this pagetable before we can unpin it. 98462306a36Sopenharmony_ci */ 98562306a36Sopenharmony_cistatic void xen_exit_mmap(struct mm_struct *mm) 98662306a36Sopenharmony_ci{ 98762306a36Sopenharmony_ci get_cpu(); /* make sure we don't move around */ 98862306a36Sopenharmony_ci xen_drop_mm_ref(mm); 98962306a36Sopenharmony_ci put_cpu(); 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci spin_lock(&mm->page_table_lock); 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci /* pgd may not be pinned in the error exit path of execve */ 99462306a36Sopenharmony_ci if (xen_page_pinned(mm->pgd)) 99562306a36Sopenharmony_ci xen_pgd_unpin(mm); 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci spin_unlock(&mm->page_table_lock); 99862306a36Sopenharmony_ci} 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_cistatic void xen_post_allocator_init(void); 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_cistatic void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 100362306a36Sopenharmony_ci{ 100462306a36Sopenharmony_ci struct mmuext_op op; 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci op.cmd = cmd; 100762306a36Sopenharmony_ci op.arg1.mfn = pfn_to_mfn(pfn); 100862306a36Sopenharmony_ci if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) 100962306a36Sopenharmony_ci BUG(); 101062306a36Sopenharmony_ci} 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_cistatic void __init xen_cleanhighmap(unsigned long vaddr, 101362306a36Sopenharmony_ci unsigned long vaddr_end) 101462306a36Sopenharmony_ci{ 101562306a36Sopenharmony_ci unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; 101662306a36Sopenharmony_ci pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci /* NOTE: The loop is more greedy than the cleanup_highmap variant. 101962306a36Sopenharmony_ci * We include the PMD passed in on _both_ boundaries. */ 102062306a36Sopenharmony_ci for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD)); 102162306a36Sopenharmony_ci pmd++, vaddr += PMD_SIZE) { 102262306a36Sopenharmony_ci if (pmd_none(*pmd)) 102362306a36Sopenharmony_ci continue; 102462306a36Sopenharmony_ci if (vaddr < (unsigned long) _text || vaddr > kernel_end) 102562306a36Sopenharmony_ci set_pmd(pmd, __pmd(0)); 102662306a36Sopenharmony_ci } 102762306a36Sopenharmony_ci /* In case we did something silly, we should crash in this function 102862306a36Sopenharmony_ci * instead of somewhere later and be confusing. */ 102962306a36Sopenharmony_ci xen_mc_flush(); 103062306a36Sopenharmony_ci} 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci/* 103362306a36Sopenharmony_ci * Make a page range writeable and free it. 103462306a36Sopenharmony_ci */ 103562306a36Sopenharmony_cistatic void __init xen_free_ro_pages(unsigned long paddr, unsigned long size) 103662306a36Sopenharmony_ci{ 103762306a36Sopenharmony_ci void *vaddr = __va(paddr); 103862306a36Sopenharmony_ci void *vaddr_end = vaddr + size; 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) 104162306a36Sopenharmony_ci make_lowmem_page_readwrite(vaddr); 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci memblock_phys_free(paddr, size); 104462306a36Sopenharmony_ci} 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_cistatic void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin) 104762306a36Sopenharmony_ci{ 104862306a36Sopenharmony_ci unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK; 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci if (unpin) 105162306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa)); 105262306a36Sopenharmony_ci ClearPagePinned(virt_to_page(__va(pa))); 105362306a36Sopenharmony_ci xen_free_ro_pages(pa, PAGE_SIZE); 105462306a36Sopenharmony_ci} 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_cistatic void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin) 105762306a36Sopenharmony_ci{ 105862306a36Sopenharmony_ci unsigned long pa; 105962306a36Sopenharmony_ci pte_t *pte_tbl; 106062306a36Sopenharmony_ci int i; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci if (pmd_large(*pmd)) { 106362306a36Sopenharmony_ci pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK; 106462306a36Sopenharmony_ci xen_free_ro_pages(pa, PMD_SIZE); 106562306a36Sopenharmony_ci return; 106662306a36Sopenharmony_ci } 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci pte_tbl = pte_offset_kernel(pmd, 0); 106962306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PTE; i++) { 107062306a36Sopenharmony_ci if (pte_none(pte_tbl[i])) 107162306a36Sopenharmony_ci continue; 107262306a36Sopenharmony_ci pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT; 107362306a36Sopenharmony_ci xen_free_ro_pages(pa, PAGE_SIZE); 107462306a36Sopenharmony_ci } 107562306a36Sopenharmony_ci set_pmd(pmd, __pmd(0)); 107662306a36Sopenharmony_ci xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin); 107762306a36Sopenharmony_ci} 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_cistatic void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin) 108062306a36Sopenharmony_ci{ 108162306a36Sopenharmony_ci unsigned long pa; 108262306a36Sopenharmony_ci pmd_t *pmd_tbl; 108362306a36Sopenharmony_ci int i; 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci if (pud_large(*pud)) { 108662306a36Sopenharmony_ci pa = pud_val(*pud) & PHYSICAL_PAGE_MASK; 108762306a36Sopenharmony_ci xen_free_ro_pages(pa, PUD_SIZE); 108862306a36Sopenharmony_ci return; 108962306a36Sopenharmony_ci } 109062306a36Sopenharmony_ci 109162306a36Sopenharmony_ci pmd_tbl = pmd_offset(pud, 0); 109262306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PMD; i++) { 109362306a36Sopenharmony_ci if (pmd_none(pmd_tbl[i])) 109462306a36Sopenharmony_ci continue; 109562306a36Sopenharmony_ci xen_cleanmfnmap_pmd(pmd_tbl + i, unpin); 109662306a36Sopenharmony_ci } 109762306a36Sopenharmony_ci set_pud(pud, __pud(0)); 109862306a36Sopenharmony_ci xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin); 109962306a36Sopenharmony_ci} 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_cistatic void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin) 110262306a36Sopenharmony_ci{ 110362306a36Sopenharmony_ci unsigned long pa; 110462306a36Sopenharmony_ci pud_t *pud_tbl; 110562306a36Sopenharmony_ci int i; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci if (p4d_large(*p4d)) { 110862306a36Sopenharmony_ci pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK; 110962306a36Sopenharmony_ci xen_free_ro_pages(pa, P4D_SIZE); 111062306a36Sopenharmony_ci return; 111162306a36Sopenharmony_ci } 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci pud_tbl = pud_offset(p4d, 0); 111462306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PUD; i++) { 111562306a36Sopenharmony_ci if (pud_none(pud_tbl[i])) 111662306a36Sopenharmony_ci continue; 111762306a36Sopenharmony_ci xen_cleanmfnmap_pud(pud_tbl + i, unpin); 111862306a36Sopenharmony_ci } 111962306a36Sopenharmony_ci set_p4d(p4d, __p4d(0)); 112062306a36Sopenharmony_ci xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin); 112162306a36Sopenharmony_ci} 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci/* 112462306a36Sopenharmony_ci * Since it is well isolated we can (and since it is perhaps large we should) 112562306a36Sopenharmony_ci * also free the page tables mapping the initial P->M table. 112662306a36Sopenharmony_ci */ 112762306a36Sopenharmony_cistatic void __init xen_cleanmfnmap(unsigned long vaddr) 112862306a36Sopenharmony_ci{ 112962306a36Sopenharmony_ci pgd_t *pgd; 113062306a36Sopenharmony_ci p4d_t *p4d; 113162306a36Sopenharmony_ci bool unpin; 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci unpin = (vaddr == 2 * PGDIR_SIZE); 113462306a36Sopenharmony_ci vaddr &= PMD_MASK; 113562306a36Sopenharmony_ci pgd = pgd_offset_k(vaddr); 113662306a36Sopenharmony_ci p4d = p4d_offset(pgd, 0); 113762306a36Sopenharmony_ci if (!p4d_none(*p4d)) 113862306a36Sopenharmony_ci xen_cleanmfnmap_p4d(p4d, unpin); 113962306a36Sopenharmony_ci} 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_cistatic void __init xen_pagetable_p2m_free(void) 114262306a36Sopenharmony_ci{ 114362306a36Sopenharmony_ci unsigned long size; 114462306a36Sopenharmony_ci unsigned long addr; 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci /* No memory or already called. */ 114962306a36Sopenharmony_ci if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list) 115062306a36Sopenharmony_ci return; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci /* using __ka address and sticking INVALID_P2M_ENTRY! */ 115362306a36Sopenharmony_ci memset((void *)xen_start_info->mfn_list, 0xff, size); 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci addr = xen_start_info->mfn_list; 115662306a36Sopenharmony_ci /* 115762306a36Sopenharmony_ci * We could be in __ka space. 115862306a36Sopenharmony_ci * We roundup to the PMD, which means that if anybody at this stage is 115962306a36Sopenharmony_ci * using the __ka address of xen_start_info or 116062306a36Sopenharmony_ci * xen_start_info->shared_info they are in going to crash. Fortunately 116162306a36Sopenharmony_ci * we have already revectored in xen_setup_kernel_pagetable. 116262306a36Sopenharmony_ci */ 116362306a36Sopenharmony_ci size = roundup(size, PMD_SIZE); 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci if (addr >= __START_KERNEL_map) { 116662306a36Sopenharmony_ci xen_cleanhighmap(addr, addr + size); 116762306a36Sopenharmony_ci size = PAGE_ALIGN(xen_start_info->nr_pages * 116862306a36Sopenharmony_ci sizeof(unsigned long)); 116962306a36Sopenharmony_ci memblock_free((void *)addr, size); 117062306a36Sopenharmony_ci } else { 117162306a36Sopenharmony_ci xen_cleanmfnmap(addr); 117262306a36Sopenharmony_ci } 117362306a36Sopenharmony_ci} 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_cistatic void __init xen_pagetable_cleanhighmap(void) 117662306a36Sopenharmony_ci{ 117762306a36Sopenharmony_ci unsigned long size; 117862306a36Sopenharmony_ci unsigned long addr; 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci /* At this stage, cleanup_highmap has already cleaned __ka space 118162306a36Sopenharmony_ci * from _brk_limit way up to the max_pfn_mapped (which is the end of 118262306a36Sopenharmony_ci * the ramdisk). We continue on, erasing PMD entries that point to page 118362306a36Sopenharmony_ci * tables - do note that they are accessible at this stage via __va. 118462306a36Sopenharmony_ci * As Xen is aligning the memory end to a 4MB boundary, for good 118562306a36Sopenharmony_ci * measure we also round up to PMD_SIZE * 2 - which means that if 118662306a36Sopenharmony_ci * anybody is using __ka address to the initial boot-stack - and try 118762306a36Sopenharmony_ci * to use it - they are going to crash. The xen_start_info has been 118862306a36Sopenharmony_ci * taken care of already in xen_setup_kernel_pagetable. */ 118962306a36Sopenharmony_ci addr = xen_start_info->pt_base; 119062306a36Sopenharmony_ci size = xen_start_info->nr_pt_frames * PAGE_SIZE; 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2)); 119362306a36Sopenharmony_ci xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); 119462306a36Sopenharmony_ci} 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_cistatic void __init xen_pagetable_p2m_setup(void) 119762306a36Sopenharmony_ci{ 119862306a36Sopenharmony_ci xen_vmalloc_p2m_tree(); 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci xen_pagetable_p2m_free(); 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci xen_pagetable_cleanhighmap(); 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci /* And revector! Bye bye old array */ 120562306a36Sopenharmony_ci xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; 120662306a36Sopenharmony_ci} 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_cistatic void __init xen_pagetable_init(void) 120962306a36Sopenharmony_ci{ 121062306a36Sopenharmony_ci /* 121162306a36Sopenharmony_ci * The majority of further PTE writes is to pagetables already 121262306a36Sopenharmony_ci * announced as such to Xen. Hence it is more efficient to use 121362306a36Sopenharmony_ci * hypercalls for these updates. 121462306a36Sopenharmony_ci */ 121562306a36Sopenharmony_ci pv_ops.mmu.set_pte = __xen_set_pte; 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci paging_init(); 121862306a36Sopenharmony_ci xen_post_allocator_init(); 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_ci xen_pagetable_p2m_setup(); 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci /* Allocate and initialize top and mid mfn levels for p2m structure */ 122362306a36Sopenharmony_ci xen_build_mfn_list_list(); 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci /* Remap memory freed due to conflicts with E820 map */ 122662306a36Sopenharmony_ci xen_remap_memory(); 122762306a36Sopenharmony_ci xen_setup_mfn_list_list(); 122862306a36Sopenharmony_ci} 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_cistatic noinstr void xen_write_cr2(unsigned long cr2) 123162306a36Sopenharmony_ci{ 123262306a36Sopenharmony_ci this_cpu_read(xen_vcpu)->arch.cr2 = cr2; 123362306a36Sopenharmony_ci} 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_cistatic noinline void xen_flush_tlb(void) 123662306a36Sopenharmony_ci{ 123762306a36Sopenharmony_ci struct mmuext_op *op; 123862306a36Sopenharmony_ci struct multicall_space mcs; 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ci preempt_disable(); 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci mcs = xen_mc_entry(sizeof(*op)); 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci op = mcs.args; 124562306a36Sopenharmony_ci op->cmd = MMUEXT_TLB_FLUSH_LOCAL; 124662306a36Sopenharmony_ci MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci preempt_enable(); 125162306a36Sopenharmony_ci} 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_cistatic void xen_flush_tlb_one_user(unsigned long addr) 125462306a36Sopenharmony_ci{ 125562306a36Sopenharmony_ci struct mmuext_op *op; 125662306a36Sopenharmony_ci struct multicall_space mcs; 125762306a36Sopenharmony_ci 125862306a36Sopenharmony_ci trace_xen_mmu_flush_tlb_one_user(addr); 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci preempt_disable(); 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci mcs = xen_mc_entry(sizeof(*op)); 126362306a36Sopenharmony_ci op = mcs.args; 126462306a36Sopenharmony_ci op->cmd = MMUEXT_INVLPG_LOCAL; 126562306a36Sopenharmony_ci op->arg1.linear_addr = addr & PAGE_MASK; 126662306a36Sopenharmony_ci MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci preempt_enable(); 127162306a36Sopenharmony_ci} 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_cistatic void xen_flush_tlb_multi(const struct cpumask *cpus, 127462306a36Sopenharmony_ci const struct flush_tlb_info *info) 127562306a36Sopenharmony_ci{ 127662306a36Sopenharmony_ci struct { 127762306a36Sopenharmony_ci struct mmuext_op op; 127862306a36Sopenharmony_ci DECLARE_BITMAP(mask, NR_CPUS); 127962306a36Sopenharmony_ci } *args; 128062306a36Sopenharmony_ci struct multicall_space mcs; 128162306a36Sopenharmony_ci const size_t mc_entry_size = sizeof(args->op) + 128262306a36Sopenharmony_ci sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus()); 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci trace_xen_mmu_flush_tlb_multi(cpus, info->mm, info->start, info->end); 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ci if (cpumask_empty(cpus)) 128762306a36Sopenharmony_ci return; /* nothing to do */ 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci mcs = xen_mc_entry(mc_entry_size); 129062306a36Sopenharmony_ci args = mcs.args; 129162306a36Sopenharmony_ci args->op.arg2.vcpumask = to_cpumask(args->mask); 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ci /* Remove any offline CPUs */ 129462306a36Sopenharmony_ci cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; 129762306a36Sopenharmony_ci if (info->end != TLB_FLUSH_ALL && 129862306a36Sopenharmony_ci (info->end - info->start) <= PAGE_SIZE) { 129962306a36Sopenharmony_ci args->op.cmd = MMUEXT_INVLPG_MULTI; 130062306a36Sopenharmony_ci args->op.arg1.linear_addr = info->start; 130162306a36Sopenharmony_ci } 130262306a36Sopenharmony_ci 130362306a36Sopenharmony_ci MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 130662306a36Sopenharmony_ci} 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_cistatic unsigned long xen_read_cr3(void) 130962306a36Sopenharmony_ci{ 131062306a36Sopenharmony_ci return this_cpu_read(xen_cr3); 131162306a36Sopenharmony_ci} 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_cistatic void set_current_cr3(void *v) 131462306a36Sopenharmony_ci{ 131562306a36Sopenharmony_ci this_cpu_write(xen_current_cr3, (unsigned long)v); 131662306a36Sopenharmony_ci} 131762306a36Sopenharmony_ci 131862306a36Sopenharmony_cistatic void __xen_write_cr3(bool kernel, unsigned long cr3) 131962306a36Sopenharmony_ci{ 132062306a36Sopenharmony_ci struct mmuext_op op; 132162306a36Sopenharmony_ci unsigned long mfn; 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci trace_xen_mmu_write_cr3(kernel, cr3); 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci if (cr3) 132662306a36Sopenharmony_ci mfn = pfn_to_mfn(PFN_DOWN(cr3)); 132762306a36Sopenharmony_ci else 132862306a36Sopenharmony_ci mfn = 0; 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_ci WARN_ON(mfn == 0 && kernel); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; 133362306a36Sopenharmony_ci op.arg1.mfn = mfn; 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci xen_extend_mmuext_op(&op); 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci if (kernel) { 133862306a36Sopenharmony_ci this_cpu_write(xen_cr3, cr3); 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci /* Update xen_current_cr3 once the batch has actually 134162306a36Sopenharmony_ci been submitted. */ 134262306a36Sopenharmony_ci xen_mc_callback(set_current_cr3, (void *)cr3); 134362306a36Sopenharmony_ci } 134462306a36Sopenharmony_ci} 134562306a36Sopenharmony_cistatic void xen_write_cr3(unsigned long cr3) 134662306a36Sopenharmony_ci{ 134762306a36Sopenharmony_ci pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci BUG_ON(preemptible()); 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci xen_mc_batch(); /* disables interrupts */ 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci /* Update while interrupts are disabled, so its atomic with 135462306a36Sopenharmony_ci respect to ipis */ 135562306a36Sopenharmony_ci this_cpu_write(xen_cr3, cr3); 135662306a36Sopenharmony_ci 135762306a36Sopenharmony_ci __xen_write_cr3(true, cr3); 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci if (user_pgd) 136062306a36Sopenharmony_ci __xen_write_cr3(false, __pa(user_pgd)); 136162306a36Sopenharmony_ci else 136262306a36Sopenharmony_ci __xen_write_cr3(false, 0); 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ 136562306a36Sopenharmony_ci} 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_ci/* 136862306a36Sopenharmony_ci * At the start of the day - when Xen launches a guest, it has already 136962306a36Sopenharmony_ci * built pagetables for the guest. We diligently look over them 137062306a36Sopenharmony_ci * in xen_setup_kernel_pagetable and graft as appropriate them in the 137162306a36Sopenharmony_ci * init_top_pgt and its friends. Then when we are happy we load 137262306a36Sopenharmony_ci * the new init_top_pgt - and continue on. 137362306a36Sopenharmony_ci * 137462306a36Sopenharmony_ci * The generic code starts (start_kernel) and 'init_mem_mapping' sets 137562306a36Sopenharmony_ci * up the rest of the pagetables. When it has completed it loads the cr3. 137662306a36Sopenharmony_ci * N.B. that baremetal would start at 'start_kernel' (and the early 137762306a36Sopenharmony_ci * #PF handler would create bootstrap pagetables) - so we are running 137862306a36Sopenharmony_ci * with the same assumptions as what to do when write_cr3 is executed 137962306a36Sopenharmony_ci * at this point. 138062306a36Sopenharmony_ci * 138162306a36Sopenharmony_ci * Since there are no user-page tables at all, we have two variants 138262306a36Sopenharmony_ci * of xen_write_cr3 - the early bootup (this one), and the late one 138362306a36Sopenharmony_ci * (xen_write_cr3). The reason we have to do that is that in 64-bit 138462306a36Sopenharmony_ci * the Linux kernel and user-space are both in ring 3 while the 138562306a36Sopenharmony_ci * hypervisor is in ring 0. 138662306a36Sopenharmony_ci */ 138762306a36Sopenharmony_cistatic void __init xen_write_cr3_init(unsigned long cr3) 138862306a36Sopenharmony_ci{ 138962306a36Sopenharmony_ci BUG_ON(preemptible()); 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci xen_mc_batch(); /* disables interrupts */ 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci /* Update while interrupts are disabled, so its atomic with 139462306a36Sopenharmony_ci respect to ipis */ 139562306a36Sopenharmony_ci this_cpu_write(xen_cr3, cr3); 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci __xen_write_cr3(true, cr3); 139862306a36Sopenharmony_ci 139962306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ 140062306a36Sopenharmony_ci} 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_cistatic int xen_pgd_alloc(struct mm_struct *mm) 140362306a36Sopenharmony_ci{ 140462306a36Sopenharmony_ci pgd_t *pgd = mm->pgd; 140562306a36Sopenharmony_ci struct page *page = virt_to_page(pgd); 140662306a36Sopenharmony_ci pgd_t *user_pgd; 140762306a36Sopenharmony_ci int ret = -ENOMEM; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci BUG_ON(PagePinned(virt_to_page(pgd))); 141062306a36Sopenharmony_ci BUG_ON(page->private != 0); 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 141362306a36Sopenharmony_ci page->private = (unsigned long)user_pgd; 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci if (user_pgd != NULL) { 141662306a36Sopenharmony_ci#ifdef CONFIG_X86_VSYSCALL_EMULATION 141762306a36Sopenharmony_ci user_pgd[pgd_index(VSYSCALL_ADDR)] = 141862306a36Sopenharmony_ci __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); 141962306a36Sopenharmony_ci#endif 142062306a36Sopenharmony_ci ret = 0; 142162306a36Sopenharmony_ci } 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_ci return ret; 142662306a36Sopenharmony_ci} 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_cistatic void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) 142962306a36Sopenharmony_ci{ 143062306a36Sopenharmony_ci pgd_t *user_pgd = xen_get_user_pgd(pgd); 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci if (user_pgd) 143362306a36Sopenharmony_ci free_page((unsigned long)user_pgd); 143462306a36Sopenharmony_ci} 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci/* 143762306a36Sopenharmony_ci * Init-time set_pte while constructing initial pagetables, which 143862306a36Sopenharmony_ci * doesn't allow RO page table pages to be remapped RW. 143962306a36Sopenharmony_ci * 144062306a36Sopenharmony_ci * If there is no MFN for this PFN then this page is initially 144162306a36Sopenharmony_ci * ballooned out so clear the PTE (as in decrease_reservation() in 144262306a36Sopenharmony_ci * drivers/xen/balloon.c). 144362306a36Sopenharmony_ci * 144462306a36Sopenharmony_ci * Many of these PTE updates are done on unpinned and writable pages 144562306a36Sopenharmony_ci * and doing a hypercall for these is unnecessary and expensive. At 144662306a36Sopenharmony_ci * this point it is rarely possible to tell if a page is pinned, so 144762306a36Sopenharmony_ci * mostly write the PTE directly and rely on Xen trapping and 144862306a36Sopenharmony_ci * emulating any updates as necessary. 144962306a36Sopenharmony_ci */ 145062306a36Sopenharmony_cistatic void __init xen_set_pte_init(pte_t *ptep, pte_t pte) 145162306a36Sopenharmony_ci{ 145262306a36Sopenharmony_ci if (unlikely(is_early_ioremap_ptep(ptep))) 145362306a36Sopenharmony_ci __xen_set_pte(ptep, pte); 145462306a36Sopenharmony_ci else 145562306a36Sopenharmony_ci native_set_pte(ptep, pte); 145662306a36Sopenharmony_ci} 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_ci__visible pte_t xen_make_pte_init(pteval_t pte) 145962306a36Sopenharmony_ci{ 146062306a36Sopenharmony_ci unsigned long pfn; 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci /* 146362306a36Sopenharmony_ci * Pages belonging to the initial p2m list mapped outside the default 146462306a36Sopenharmony_ci * address range must be mapped read-only. This region contains the 146562306a36Sopenharmony_ci * page tables for mapping the p2m list, too, and page tables MUST be 146662306a36Sopenharmony_ci * mapped read-only. 146762306a36Sopenharmony_ci */ 146862306a36Sopenharmony_ci pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT; 146962306a36Sopenharmony_ci if (xen_start_info->mfn_list < __START_KERNEL_map && 147062306a36Sopenharmony_ci pfn >= xen_start_info->first_p2m_pfn && 147162306a36Sopenharmony_ci pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) 147262306a36Sopenharmony_ci pte &= ~_PAGE_RW; 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ci pte = pte_pfn_to_mfn(pte); 147562306a36Sopenharmony_ci return native_make_pte(pte); 147662306a36Sopenharmony_ci} 147762306a36Sopenharmony_ciPV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci/* Early in boot, while setting up the initial pagetable, assume 148062306a36Sopenharmony_ci everything is pinned. */ 148162306a36Sopenharmony_cistatic void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) 148262306a36Sopenharmony_ci{ 148362306a36Sopenharmony_ci#ifdef CONFIG_FLATMEM 148462306a36Sopenharmony_ci BUG_ON(mem_map); /* should only be used early */ 148562306a36Sopenharmony_ci#endif 148662306a36Sopenharmony_ci make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 148762306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); 148862306a36Sopenharmony_ci} 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci/* Used for pmd and pud */ 149162306a36Sopenharmony_cistatic void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) 149262306a36Sopenharmony_ci{ 149362306a36Sopenharmony_ci#ifdef CONFIG_FLATMEM 149462306a36Sopenharmony_ci BUG_ON(mem_map); /* should only be used early */ 149562306a36Sopenharmony_ci#endif 149662306a36Sopenharmony_ci make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 149762306a36Sopenharmony_ci} 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_ci/* Early release_pte assumes that all pts are pinned, since there's 150062306a36Sopenharmony_ci only init_mm and anything attached to that is pinned. */ 150162306a36Sopenharmony_cistatic void __init xen_release_pte_init(unsigned long pfn) 150262306a36Sopenharmony_ci{ 150362306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); 150462306a36Sopenharmony_ci make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 150562306a36Sopenharmony_ci} 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_cistatic void __init xen_release_pmd_init(unsigned long pfn) 150862306a36Sopenharmony_ci{ 150962306a36Sopenharmony_ci make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 151062306a36Sopenharmony_ci} 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_cistatic inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 151362306a36Sopenharmony_ci{ 151462306a36Sopenharmony_ci struct multicall_space mcs; 151562306a36Sopenharmony_ci struct mmuext_op *op; 151662306a36Sopenharmony_ci 151762306a36Sopenharmony_ci mcs = __xen_mc_entry(sizeof(*op)); 151862306a36Sopenharmony_ci op = mcs.args; 151962306a36Sopenharmony_ci op->cmd = cmd; 152062306a36Sopenharmony_ci op->arg1.mfn = pfn_to_mfn(pfn); 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); 152362306a36Sopenharmony_ci} 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_cistatic inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot) 152662306a36Sopenharmony_ci{ 152762306a36Sopenharmony_ci struct multicall_space mcs; 152862306a36Sopenharmony_ci unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT); 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ci mcs = __xen_mc_entry(0); 153162306a36Sopenharmony_ci MULTI_update_va_mapping(mcs.mc, (unsigned long)addr, 153262306a36Sopenharmony_ci pfn_pte(pfn, prot), 0); 153362306a36Sopenharmony_ci} 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci/* This needs to make sure the new pte page is pinned iff its being 153662306a36Sopenharmony_ci attached to a pinned pagetable. */ 153762306a36Sopenharmony_cistatic inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, 153862306a36Sopenharmony_ci unsigned level) 153962306a36Sopenharmony_ci{ 154062306a36Sopenharmony_ci bool pinned = xen_page_pinned(mm->pgd); 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_ci if (pinned) { 154562306a36Sopenharmony_ci struct page *page = pfn_to_page(pfn); 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci pinned = false; 154862306a36Sopenharmony_ci if (static_branch_likely(&xen_struct_pages_ready)) { 154962306a36Sopenharmony_ci pinned = PagePinned(page); 155062306a36Sopenharmony_ci SetPagePinned(page); 155162306a36Sopenharmony_ci } 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci xen_mc_batch(); 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ci __set_pfn_prot(pfn, PAGE_KERNEL_RO); 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned) 155862306a36Sopenharmony_ci __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 156162306a36Sopenharmony_ci } 156262306a36Sopenharmony_ci} 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_cistatic void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) 156562306a36Sopenharmony_ci{ 156662306a36Sopenharmony_ci xen_alloc_ptpage(mm, pfn, PT_PTE); 156762306a36Sopenharmony_ci} 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_cistatic void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) 157062306a36Sopenharmony_ci{ 157162306a36Sopenharmony_ci xen_alloc_ptpage(mm, pfn, PT_PMD); 157262306a36Sopenharmony_ci} 157362306a36Sopenharmony_ci 157462306a36Sopenharmony_ci/* This should never happen until we're OK to use struct page */ 157562306a36Sopenharmony_cistatic inline void xen_release_ptpage(unsigned long pfn, unsigned level) 157662306a36Sopenharmony_ci{ 157762306a36Sopenharmony_ci struct page *page = pfn_to_page(pfn); 157862306a36Sopenharmony_ci bool pinned = PagePinned(page); 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci trace_xen_mmu_release_ptpage(pfn, level, pinned); 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci if (pinned) { 158362306a36Sopenharmony_ci xen_mc_batch(); 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) 158662306a36Sopenharmony_ci __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci __set_pfn_prot(pfn, PAGE_KERNEL); 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 159162306a36Sopenharmony_ci 159262306a36Sopenharmony_ci ClearPagePinned(page); 159362306a36Sopenharmony_ci } 159462306a36Sopenharmony_ci} 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_cistatic void xen_release_pte(unsigned long pfn) 159762306a36Sopenharmony_ci{ 159862306a36Sopenharmony_ci xen_release_ptpage(pfn, PT_PTE); 159962306a36Sopenharmony_ci} 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_cistatic void xen_release_pmd(unsigned long pfn) 160262306a36Sopenharmony_ci{ 160362306a36Sopenharmony_ci xen_release_ptpage(pfn, PT_PMD); 160462306a36Sopenharmony_ci} 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_cistatic void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) 160762306a36Sopenharmony_ci{ 160862306a36Sopenharmony_ci xen_alloc_ptpage(mm, pfn, PT_PUD); 160962306a36Sopenharmony_ci} 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_cistatic void xen_release_pud(unsigned long pfn) 161262306a36Sopenharmony_ci{ 161362306a36Sopenharmony_ci xen_release_ptpage(pfn, PT_PUD); 161462306a36Sopenharmony_ci} 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci/* 161762306a36Sopenharmony_ci * Like __va(), but returns address in the kernel mapping (which is 161862306a36Sopenharmony_ci * all we have until the physical memory mapping has been set up. 161962306a36Sopenharmony_ci */ 162062306a36Sopenharmony_cistatic void * __init __ka(phys_addr_t paddr) 162162306a36Sopenharmony_ci{ 162262306a36Sopenharmony_ci return (void *)(paddr + __START_KERNEL_map); 162362306a36Sopenharmony_ci} 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci/* Convert a machine address to physical address */ 162662306a36Sopenharmony_cistatic unsigned long __init m2p(phys_addr_t maddr) 162762306a36Sopenharmony_ci{ 162862306a36Sopenharmony_ci phys_addr_t paddr; 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci maddr &= XEN_PTE_MFN_MASK; 163162306a36Sopenharmony_ci paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci return paddr; 163462306a36Sopenharmony_ci} 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci/* Convert a machine address to kernel virtual */ 163762306a36Sopenharmony_cistatic void * __init m2v(phys_addr_t maddr) 163862306a36Sopenharmony_ci{ 163962306a36Sopenharmony_ci return __ka(m2p(maddr)); 164062306a36Sopenharmony_ci} 164162306a36Sopenharmony_ci 164262306a36Sopenharmony_ci/* Set the page permissions on an identity-mapped pages */ 164362306a36Sopenharmony_cistatic void __init set_page_prot_flags(void *addr, pgprot_t prot, 164462306a36Sopenharmony_ci unsigned long flags) 164562306a36Sopenharmony_ci{ 164662306a36Sopenharmony_ci unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 164762306a36Sopenharmony_ci pte_t pte = pfn_pte(pfn, prot); 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) 165062306a36Sopenharmony_ci BUG(); 165162306a36Sopenharmony_ci} 165262306a36Sopenharmony_cistatic void __init set_page_prot(void *addr, pgprot_t prot) 165362306a36Sopenharmony_ci{ 165462306a36Sopenharmony_ci return set_page_prot_flags(addr, prot, UVMF_NONE); 165562306a36Sopenharmony_ci} 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_civoid __init xen_setup_machphys_mapping(void) 165862306a36Sopenharmony_ci{ 165962306a36Sopenharmony_ci struct xen_machphys_mapping mapping; 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { 166262306a36Sopenharmony_ci machine_to_phys_mapping = (unsigned long *)mapping.v_start; 166362306a36Sopenharmony_ci machine_to_phys_nr = mapping.max_mfn + 1; 166462306a36Sopenharmony_ci } else { 166562306a36Sopenharmony_ci machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; 166662306a36Sopenharmony_ci } 166762306a36Sopenharmony_ci} 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_cistatic void __init convert_pfn_mfn(void *v) 167062306a36Sopenharmony_ci{ 167162306a36Sopenharmony_ci pte_t *pte = v; 167262306a36Sopenharmony_ci int i; 167362306a36Sopenharmony_ci 167462306a36Sopenharmony_ci /* All levels are converted the same way, so just treat them 167562306a36Sopenharmony_ci as ptes. */ 167662306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PTE; i++) 167762306a36Sopenharmony_ci pte[i] = xen_make_pte(pte[i].pte); 167862306a36Sopenharmony_ci} 167962306a36Sopenharmony_cistatic void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, 168062306a36Sopenharmony_ci unsigned long addr) 168162306a36Sopenharmony_ci{ 168262306a36Sopenharmony_ci if (*pt_base == PFN_DOWN(__pa(addr))) { 168362306a36Sopenharmony_ci set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); 168462306a36Sopenharmony_ci clear_page((void *)addr); 168562306a36Sopenharmony_ci (*pt_base)++; 168662306a36Sopenharmony_ci } 168762306a36Sopenharmony_ci if (*pt_end == PFN_DOWN(__pa(addr))) { 168862306a36Sopenharmony_ci set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); 168962306a36Sopenharmony_ci clear_page((void *)addr); 169062306a36Sopenharmony_ci (*pt_end)--; 169162306a36Sopenharmony_ci } 169262306a36Sopenharmony_ci} 169362306a36Sopenharmony_ci/* 169462306a36Sopenharmony_ci * Set up the initial kernel pagetable. 169562306a36Sopenharmony_ci * 169662306a36Sopenharmony_ci * We can construct this by grafting the Xen provided pagetable into 169762306a36Sopenharmony_ci * head_64.S's preconstructed pagetables. We copy the Xen L2's into 169862306a36Sopenharmony_ci * level2_ident_pgt, and level2_kernel_pgt. This means that only the 169962306a36Sopenharmony_ci * kernel has a physical mapping to start with - but that's enough to 170062306a36Sopenharmony_ci * get __va working. We need to fill in the rest of the physical 170162306a36Sopenharmony_ci * mapping once some sort of allocator has been set up. 170262306a36Sopenharmony_ci */ 170362306a36Sopenharmony_civoid __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 170462306a36Sopenharmony_ci{ 170562306a36Sopenharmony_ci pud_t *l3; 170662306a36Sopenharmony_ci pmd_t *l2; 170762306a36Sopenharmony_ci unsigned long addr[3]; 170862306a36Sopenharmony_ci unsigned long pt_base, pt_end; 170962306a36Sopenharmony_ci unsigned i; 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci /* max_pfn_mapped is the last pfn mapped in the initial memory 171262306a36Sopenharmony_ci * mappings. Considering that on Xen after the kernel mappings we 171362306a36Sopenharmony_ci * have the mappings of some pages that don't exist in pfn space, we 171462306a36Sopenharmony_ci * set max_pfn_mapped to the last real pfn mapped. */ 171562306a36Sopenharmony_ci if (xen_start_info->mfn_list < __START_KERNEL_map) 171662306a36Sopenharmony_ci max_pfn_mapped = xen_start_info->first_p2m_pfn; 171762306a36Sopenharmony_ci else 171862306a36Sopenharmony_ci max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); 172162306a36Sopenharmony_ci pt_end = pt_base + xen_start_info->nr_pt_frames; 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_ci /* Zap identity mapping */ 172462306a36Sopenharmony_ci init_top_pgt[0] = __pgd(0); 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_ci /* Pre-constructed entries are in pfn, so convert to mfn */ 172762306a36Sopenharmony_ci /* L4[273] -> level3_ident_pgt */ 172862306a36Sopenharmony_ci /* L4[511] -> level3_kernel_pgt */ 172962306a36Sopenharmony_ci convert_pfn_mfn(init_top_pgt); 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci /* L3_i[0] -> level2_ident_pgt */ 173262306a36Sopenharmony_ci convert_pfn_mfn(level3_ident_pgt); 173362306a36Sopenharmony_ci /* L3_k[510] -> level2_kernel_pgt */ 173462306a36Sopenharmony_ci /* L3_k[511] -> level2_fixmap_pgt */ 173562306a36Sopenharmony_ci convert_pfn_mfn(level3_kernel_pgt); 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_ci /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */ 173862306a36Sopenharmony_ci convert_pfn_mfn(level2_fixmap_pgt); 173962306a36Sopenharmony_ci 174062306a36Sopenharmony_ci /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 174162306a36Sopenharmony_ci l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 174262306a36Sopenharmony_ci l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ci addr[0] = (unsigned long)pgd; 174562306a36Sopenharmony_ci addr[1] = (unsigned long)l3; 174662306a36Sopenharmony_ci addr[2] = (unsigned long)l2; 174762306a36Sopenharmony_ci /* Graft it onto L4[273][0]. Note that we creating an aliasing problem: 174862306a36Sopenharmony_ci * Both L4[273][0] and L4[511][510] have entries that point to the same 174962306a36Sopenharmony_ci * L2 (PMD) tables. Meaning that if you modify it in __va space 175062306a36Sopenharmony_ci * it will be also modified in the __ka space! (But if you just 175162306a36Sopenharmony_ci * modify the PMD table to point to other PTE's or none, then you 175262306a36Sopenharmony_ci * are OK - which is what cleanup_highmap does) */ 175362306a36Sopenharmony_ci copy_page(level2_ident_pgt, l2); 175462306a36Sopenharmony_ci /* Graft it onto L4[511][510] */ 175562306a36Sopenharmony_ci copy_page(level2_kernel_pgt, l2); 175662306a36Sopenharmony_ci 175762306a36Sopenharmony_ci /* 175862306a36Sopenharmony_ci * Zap execute permission from the ident map. Due to the sharing of 175962306a36Sopenharmony_ci * L1 entries we need to do this in the L2. 176062306a36Sopenharmony_ci */ 176162306a36Sopenharmony_ci if (__supported_pte_mask & _PAGE_NX) { 176262306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PMD; ++i) { 176362306a36Sopenharmony_ci if (pmd_none(level2_ident_pgt[i])) 176462306a36Sopenharmony_ci continue; 176562306a36Sopenharmony_ci level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX); 176662306a36Sopenharmony_ci } 176762306a36Sopenharmony_ci } 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci /* Copy the initial P->M table mappings if necessary. */ 177062306a36Sopenharmony_ci i = pgd_index(xen_start_info->mfn_list); 177162306a36Sopenharmony_ci if (i && i < pgd_index(__START_KERNEL_map)) 177262306a36Sopenharmony_ci init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; 177362306a36Sopenharmony_ci 177462306a36Sopenharmony_ci /* Make pagetable pieces RO */ 177562306a36Sopenharmony_ci set_page_prot(init_top_pgt, PAGE_KERNEL_RO); 177662306a36Sopenharmony_ci set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 177762306a36Sopenharmony_ci set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 177862306a36Sopenharmony_ci set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 177962306a36Sopenharmony_ci set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 178062306a36Sopenharmony_ci set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 178162306a36Sopenharmony_ci 178262306a36Sopenharmony_ci for (i = 0; i < FIXMAP_PMD_NUM; i++) { 178362306a36Sopenharmony_ci set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE, 178462306a36Sopenharmony_ci PAGE_KERNEL_RO); 178562306a36Sopenharmony_ci } 178662306a36Sopenharmony_ci 178762306a36Sopenharmony_ci /* Pin down new L4 */ 178862306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, 178962306a36Sopenharmony_ci PFN_DOWN(__pa_symbol(init_top_pgt))); 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ci /* Unpin Xen-provided one */ 179262306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 179362306a36Sopenharmony_ci 179462306a36Sopenharmony_ci#ifdef CONFIG_X86_VSYSCALL_EMULATION 179562306a36Sopenharmony_ci /* Pin user vsyscall L3 */ 179662306a36Sopenharmony_ci set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 179762306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, 179862306a36Sopenharmony_ci PFN_DOWN(__pa_symbol(level3_user_vsyscall))); 179962306a36Sopenharmony_ci#endif 180062306a36Sopenharmony_ci 180162306a36Sopenharmony_ci /* 180262306a36Sopenharmony_ci * At this stage there can be no user pgd, and no page structure to 180362306a36Sopenharmony_ci * attach it to, so make sure we just set kernel pgd. 180462306a36Sopenharmony_ci */ 180562306a36Sopenharmony_ci xen_mc_batch(); 180662306a36Sopenharmony_ci __xen_write_cr3(true, __pa(init_top_pgt)); 180762306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_CPU); 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_ci /* We can't that easily rip out L3 and L2, as the Xen pagetables are 181062306a36Sopenharmony_ci * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for 181162306a36Sopenharmony_ci * the initial domain. For guests using the toolstack, they are in: 181262306a36Sopenharmony_ci * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only 181362306a36Sopenharmony_ci * rip out the [L4] (pgd), but for guests we shave off three pages. 181462306a36Sopenharmony_ci */ 181562306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(addr); i++) 181662306a36Sopenharmony_ci check_pt_base(&pt_base, &pt_end, addr[i]); 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci /* Our (by three pages) smaller Xen pagetable that we are using */ 181962306a36Sopenharmony_ci xen_pt_base = PFN_PHYS(pt_base); 182062306a36Sopenharmony_ci xen_pt_size = (pt_end - pt_base) * PAGE_SIZE; 182162306a36Sopenharmony_ci memblock_reserve(xen_pt_base, xen_pt_size); 182262306a36Sopenharmony_ci 182362306a36Sopenharmony_ci /* Revector the xen_start_info */ 182462306a36Sopenharmony_ci xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); 182562306a36Sopenharmony_ci} 182662306a36Sopenharmony_ci 182762306a36Sopenharmony_ci/* 182862306a36Sopenharmony_ci * Read a value from a physical address. 182962306a36Sopenharmony_ci */ 183062306a36Sopenharmony_cistatic unsigned long __init xen_read_phys_ulong(phys_addr_t addr) 183162306a36Sopenharmony_ci{ 183262306a36Sopenharmony_ci unsigned long *vaddr; 183362306a36Sopenharmony_ci unsigned long val; 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci vaddr = early_memremap_ro(addr, sizeof(val)); 183662306a36Sopenharmony_ci val = *vaddr; 183762306a36Sopenharmony_ci early_memunmap(vaddr, sizeof(val)); 183862306a36Sopenharmony_ci return val; 183962306a36Sopenharmony_ci} 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci/* 184262306a36Sopenharmony_ci * Translate a virtual address to a physical one without relying on mapped 184362306a36Sopenharmony_ci * page tables. Don't rely on big pages being aligned in (guest) physical 184462306a36Sopenharmony_ci * space! 184562306a36Sopenharmony_ci */ 184662306a36Sopenharmony_cistatic phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) 184762306a36Sopenharmony_ci{ 184862306a36Sopenharmony_ci phys_addr_t pa; 184962306a36Sopenharmony_ci pgd_t pgd; 185062306a36Sopenharmony_ci pud_t pud; 185162306a36Sopenharmony_ci pmd_t pmd; 185262306a36Sopenharmony_ci pte_t pte; 185362306a36Sopenharmony_ci 185462306a36Sopenharmony_ci pa = read_cr3_pa(); 185562306a36Sopenharmony_ci pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) * 185662306a36Sopenharmony_ci sizeof(pgd))); 185762306a36Sopenharmony_ci if (!pgd_present(pgd)) 185862306a36Sopenharmony_ci return 0; 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci pa = pgd_val(pgd) & PTE_PFN_MASK; 186162306a36Sopenharmony_ci pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) * 186262306a36Sopenharmony_ci sizeof(pud))); 186362306a36Sopenharmony_ci if (!pud_present(pud)) 186462306a36Sopenharmony_ci return 0; 186562306a36Sopenharmony_ci pa = pud_val(pud) & PTE_PFN_MASK; 186662306a36Sopenharmony_ci if (pud_large(pud)) 186762306a36Sopenharmony_ci return pa + (vaddr & ~PUD_MASK); 186862306a36Sopenharmony_ci 186962306a36Sopenharmony_ci pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) * 187062306a36Sopenharmony_ci sizeof(pmd))); 187162306a36Sopenharmony_ci if (!pmd_present(pmd)) 187262306a36Sopenharmony_ci return 0; 187362306a36Sopenharmony_ci pa = pmd_val(pmd) & PTE_PFN_MASK; 187462306a36Sopenharmony_ci if (pmd_large(pmd)) 187562306a36Sopenharmony_ci return pa + (vaddr & ~PMD_MASK); 187662306a36Sopenharmony_ci 187762306a36Sopenharmony_ci pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) * 187862306a36Sopenharmony_ci sizeof(pte))); 187962306a36Sopenharmony_ci if (!pte_present(pte)) 188062306a36Sopenharmony_ci return 0; 188162306a36Sopenharmony_ci pa = pte_pfn(pte) << PAGE_SHIFT; 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci return pa | (vaddr & ~PAGE_MASK); 188462306a36Sopenharmony_ci} 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci/* 188762306a36Sopenharmony_ci * Find a new area for the hypervisor supplied p2m list and relocate the p2m to 188862306a36Sopenharmony_ci * this area. 188962306a36Sopenharmony_ci */ 189062306a36Sopenharmony_civoid __init xen_relocate_p2m(void) 189162306a36Sopenharmony_ci{ 189262306a36Sopenharmony_ci phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys; 189362306a36Sopenharmony_ci unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; 189462306a36Sopenharmony_ci int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud; 189562306a36Sopenharmony_ci pte_t *pt; 189662306a36Sopenharmony_ci pmd_t *pmd; 189762306a36Sopenharmony_ci pud_t *pud; 189862306a36Sopenharmony_ci pgd_t *pgd; 189962306a36Sopenharmony_ci unsigned long *new_p2m; 190062306a36Sopenharmony_ci 190162306a36Sopenharmony_ci size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 190262306a36Sopenharmony_ci n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT; 190362306a36Sopenharmony_ci n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; 190462306a36Sopenharmony_ci n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; 190562306a36Sopenharmony_ci n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT; 190662306a36Sopenharmony_ci n_frames = n_pte + n_pt + n_pmd + n_pud; 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ci new_area = xen_find_free_area(PFN_PHYS(n_frames)); 190962306a36Sopenharmony_ci if (!new_area) { 191062306a36Sopenharmony_ci xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n"); 191162306a36Sopenharmony_ci BUG(); 191262306a36Sopenharmony_ci } 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_ci /* 191562306a36Sopenharmony_ci * Setup the page tables for addressing the new p2m list. 191662306a36Sopenharmony_ci * We have asked the hypervisor to map the p2m list at the user address 191762306a36Sopenharmony_ci * PUD_SIZE. It may have done so, or it may have used a kernel space 191862306a36Sopenharmony_ci * address depending on the Xen version. 191962306a36Sopenharmony_ci * To avoid any possible virtual address collision, just use 192062306a36Sopenharmony_ci * 2 * PUD_SIZE for the new area. 192162306a36Sopenharmony_ci */ 192262306a36Sopenharmony_ci pud_phys = new_area; 192362306a36Sopenharmony_ci pmd_phys = pud_phys + PFN_PHYS(n_pud); 192462306a36Sopenharmony_ci pt_phys = pmd_phys + PFN_PHYS(n_pmd); 192562306a36Sopenharmony_ci p2m_pfn = PFN_DOWN(pt_phys) + n_pt; 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ci pgd = __va(read_cr3_pa()); 192862306a36Sopenharmony_ci new_p2m = (unsigned long *)(2 * PGDIR_SIZE); 192962306a36Sopenharmony_ci for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { 193062306a36Sopenharmony_ci pud = early_memremap(pud_phys, PAGE_SIZE); 193162306a36Sopenharmony_ci clear_page(pud); 193262306a36Sopenharmony_ci for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); 193362306a36Sopenharmony_ci idx_pmd++) { 193462306a36Sopenharmony_ci pmd = early_memremap(pmd_phys, PAGE_SIZE); 193562306a36Sopenharmony_ci clear_page(pmd); 193662306a36Sopenharmony_ci for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); 193762306a36Sopenharmony_ci idx_pt++) { 193862306a36Sopenharmony_ci pt = early_memremap(pt_phys, PAGE_SIZE); 193962306a36Sopenharmony_ci clear_page(pt); 194062306a36Sopenharmony_ci for (idx_pte = 0; 194162306a36Sopenharmony_ci idx_pte < min(n_pte, PTRS_PER_PTE); 194262306a36Sopenharmony_ci idx_pte++) { 194362306a36Sopenharmony_ci pt[idx_pte] = pfn_pte(p2m_pfn, 194462306a36Sopenharmony_ci PAGE_KERNEL); 194562306a36Sopenharmony_ci p2m_pfn++; 194662306a36Sopenharmony_ci } 194762306a36Sopenharmony_ci n_pte -= PTRS_PER_PTE; 194862306a36Sopenharmony_ci early_memunmap(pt, PAGE_SIZE); 194962306a36Sopenharmony_ci make_lowmem_page_readonly(__va(pt_phys)); 195062306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, 195162306a36Sopenharmony_ci PFN_DOWN(pt_phys)); 195262306a36Sopenharmony_ci pmd[idx_pt] = __pmd(_PAGE_TABLE | pt_phys); 195362306a36Sopenharmony_ci pt_phys += PAGE_SIZE; 195462306a36Sopenharmony_ci } 195562306a36Sopenharmony_ci n_pt -= PTRS_PER_PMD; 195662306a36Sopenharmony_ci early_memunmap(pmd, PAGE_SIZE); 195762306a36Sopenharmony_ci make_lowmem_page_readonly(__va(pmd_phys)); 195862306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, 195962306a36Sopenharmony_ci PFN_DOWN(pmd_phys)); 196062306a36Sopenharmony_ci pud[idx_pmd] = __pud(_PAGE_TABLE | pmd_phys); 196162306a36Sopenharmony_ci pmd_phys += PAGE_SIZE; 196262306a36Sopenharmony_ci } 196362306a36Sopenharmony_ci n_pmd -= PTRS_PER_PUD; 196462306a36Sopenharmony_ci early_memunmap(pud, PAGE_SIZE); 196562306a36Sopenharmony_ci make_lowmem_page_readonly(__va(pud_phys)); 196662306a36Sopenharmony_ci pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); 196762306a36Sopenharmony_ci set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); 196862306a36Sopenharmony_ci pud_phys += PAGE_SIZE; 196962306a36Sopenharmony_ci } 197062306a36Sopenharmony_ci 197162306a36Sopenharmony_ci /* Now copy the old p2m info to the new area. */ 197262306a36Sopenharmony_ci memcpy(new_p2m, xen_p2m_addr, size); 197362306a36Sopenharmony_ci xen_p2m_addr = new_p2m; 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci /* Release the old p2m list and set new list info. */ 197662306a36Sopenharmony_ci p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list)); 197762306a36Sopenharmony_ci BUG_ON(!p2m_pfn); 197862306a36Sopenharmony_ci p2m_pfn_end = p2m_pfn + PFN_DOWN(size); 197962306a36Sopenharmony_ci 198062306a36Sopenharmony_ci if (xen_start_info->mfn_list < __START_KERNEL_map) { 198162306a36Sopenharmony_ci pfn = xen_start_info->first_p2m_pfn; 198262306a36Sopenharmony_ci pfn_end = xen_start_info->first_p2m_pfn + 198362306a36Sopenharmony_ci xen_start_info->nr_p2m_frames; 198462306a36Sopenharmony_ci set_pgd(pgd + 1, __pgd(0)); 198562306a36Sopenharmony_ci } else { 198662306a36Sopenharmony_ci pfn = p2m_pfn; 198762306a36Sopenharmony_ci pfn_end = p2m_pfn_end; 198862306a36Sopenharmony_ci } 198962306a36Sopenharmony_ci 199062306a36Sopenharmony_ci memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn)); 199162306a36Sopenharmony_ci while (pfn < pfn_end) { 199262306a36Sopenharmony_ci if (pfn == p2m_pfn) { 199362306a36Sopenharmony_ci pfn = p2m_pfn_end; 199462306a36Sopenharmony_ci continue; 199562306a36Sopenharmony_ci } 199662306a36Sopenharmony_ci make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 199762306a36Sopenharmony_ci pfn++; 199862306a36Sopenharmony_ci } 199962306a36Sopenharmony_ci 200062306a36Sopenharmony_ci xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; 200162306a36Sopenharmony_ci xen_start_info->first_p2m_pfn = PFN_DOWN(new_area); 200262306a36Sopenharmony_ci xen_start_info->nr_p2m_frames = n_frames; 200362306a36Sopenharmony_ci} 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_civoid __init xen_reserve_special_pages(void) 200662306a36Sopenharmony_ci{ 200762306a36Sopenharmony_ci phys_addr_t paddr; 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci memblock_reserve(__pa(xen_start_info), PAGE_SIZE); 201062306a36Sopenharmony_ci if (xen_start_info->store_mfn) { 201162306a36Sopenharmony_ci paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn)); 201262306a36Sopenharmony_ci memblock_reserve(paddr, PAGE_SIZE); 201362306a36Sopenharmony_ci } 201462306a36Sopenharmony_ci if (!xen_initial_domain()) { 201562306a36Sopenharmony_ci paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn)); 201662306a36Sopenharmony_ci memblock_reserve(paddr, PAGE_SIZE); 201762306a36Sopenharmony_ci } 201862306a36Sopenharmony_ci} 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_civoid __init xen_pt_check_e820(void) 202162306a36Sopenharmony_ci{ 202262306a36Sopenharmony_ci if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) { 202362306a36Sopenharmony_ci xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n"); 202462306a36Sopenharmony_ci BUG(); 202562306a36Sopenharmony_ci } 202662306a36Sopenharmony_ci} 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_cistatic unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; 202962306a36Sopenharmony_ci 203062306a36Sopenharmony_cistatic void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) 203162306a36Sopenharmony_ci{ 203262306a36Sopenharmony_ci pte_t pte; 203362306a36Sopenharmony_ci unsigned long vaddr; 203462306a36Sopenharmony_ci 203562306a36Sopenharmony_ci phys >>= PAGE_SHIFT; 203662306a36Sopenharmony_ci 203762306a36Sopenharmony_ci switch (idx) { 203862306a36Sopenharmony_ci case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: 203962306a36Sopenharmony_ci#ifdef CONFIG_X86_VSYSCALL_EMULATION 204062306a36Sopenharmony_ci case VSYSCALL_PAGE: 204162306a36Sopenharmony_ci#endif 204262306a36Sopenharmony_ci /* All local page mappings */ 204362306a36Sopenharmony_ci pte = pfn_pte(phys, prot); 204462306a36Sopenharmony_ci break; 204562306a36Sopenharmony_ci 204662306a36Sopenharmony_ci#ifdef CONFIG_X86_LOCAL_APIC 204762306a36Sopenharmony_ci case FIX_APIC_BASE: /* maps dummy local APIC */ 204862306a36Sopenharmony_ci pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); 204962306a36Sopenharmony_ci break; 205062306a36Sopenharmony_ci#endif 205162306a36Sopenharmony_ci 205262306a36Sopenharmony_ci#ifdef CONFIG_X86_IO_APIC 205362306a36Sopenharmony_ci case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END: 205462306a36Sopenharmony_ci /* 205562306a36Sopenharmony_ci * We just don't map the IO APIC - all access is via 205662306a36Sopenharmony_ci * hypercalls. Keep the address in the pte for reference. 205762306a36Sopenharmony_ci */ 205862306a36Sopenharmony_ci pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); 205962306a36Sopenharmony_ci break; 206062306a36Sopenharmony_ci#endif 206162306a36Sopenharmony_ci 206262306a36Sopenharmony_ci case FIX_PARAVIRT_BOOTMAP: 206362306a36Sopenharmony_ci /* This is an MFN, but it isn't an IO mapping from the 206462306a36Sopenharmony_ci IO domain */ 206562306a36Sopenharmony_ci pte = mfn_pte(phys, prot); 206662306a36Sopenharmony_ci break; 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci default: 206962306a36Sopenharmony_ci /* By default, set_fixmap is used for hardware mappings */ 207062306a36Sopenharmony_ci pte = mfn_pte(phys, prot); 207162306a36Sopenharmony_ci break; 207262306a36Sopenharmony_ci } 207362306a36Sopenharmony_ci 207462306a36Sopenharmony_ci vaddr = __fix_to_virt(idx); 207562306a36Sopenharmony_ci if (HYPERVISOR_update_va_mapping(vaddr, pte, UVMF_INVLPG)) 207662306a36Sopenharmony_ci BUG(); 207762306a36Sopenharmony_ci 207862306a36Sopenharmony_ci#ifdef CONFIG_X86_VSYSCALL_EMULATION 207962306a36Sopenharmony_ci /* Replicate changes to map the vsyscall page into the user 208062306a36Sopenharmony_ci pagetable vsyscall mapping. */ 208162306a36Sopenharmony_ci if (idx == VSYSCALL_PAGE) 208262306a36Sopenharmony_ci set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); 208362306a36Sopenharmony_ci#endif 208462306a36Sopenharmony_ci} 208562306a36Sopenharmony_ci 208662306a36Sopenharmony_cistatic void xen_enter_lazy_mmu(void) 208762306a36Sopenharmony_ci{ 208862306a36Sopenharmony_ci enter_lazy(XEN_LAZY_MMU); 208962306a36Sopenharmony_ci} 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_cistatic void xen_flush_lazy_mmu(void) 209262306a36Sopenharmony_ci{ 209362306a36Sopenharmony_ci preempt_disable(); 209462306a36Sopenharmony_ci 209562306a36Sopenharmony_ci if (xen_get_lazy_mode() == XEN_LAZY_MMU) { 209662306a36Sopenharmony_ci arch_leave_lazy_mmu_mode(); 209762306a36Sopenharmony_ci arch_enter_lazy_mmu_mode(); 209862306a36Sopenharmony_ci } 209962306a36Sopenharmony_ci 210062306a36Sopenharmony_ci preempt_enable(); 210162306a36Sopenharmony_ci} 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_cistatic void __init xen_post_allocator_init(void) 210462306a36Sopenharmony_ci{ 210562306a36Sopenharmony_ci pv_ops.mmu.set_pte = xen_set_pte; 210662306a36Sopenharmony_ci pv_ops.mmu.set_pmd = xen_set_pmd; 210762306a36Sopenharmony_ci pv_ops.mmu.set_pud = xen_set_pud; 210862306a36Sopenharmony_ci pv_ops.mmu.set_p4d = xen_set_p4d; 210962306a36Sopenharmony_ci 211062306a36Sopenharmony_ci /* This will work as long as patching hasn't happened yet 211162306a36Sopenharmony_ci (which it hasn't) */ 211262306a36Sopenharmony_ci pv_ops.mmu.alloc_pte = xen_alloc_pte; 211362306a36Sopenharmony_ci pv_ops.mmu.alloc_pmd = xen_alloc_pmd; 211462306a36Sopenharmony_ci pv_ops.mmu.release_pte = xen_release_pte; 211562306a36Sopenharmony_ci pv_ops.mmu.release_pmd = xen_release_pmd; 211662306a36Sopenharmony_ci pv_ops.mmu.alloc_pud = xen_alloc_pud; 211762306a36Sopenharmony_ci pv_ops.mmu.release_pud = xen_release_pud; 211862306a36Sopenharmony_ci pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte); 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci pv_ops.mmu.write_cr3 = &xen_write_cr3; 212162306a36Sopenharmony_ci} 212262306a36Sopenharmony_ci 212362306a36Sopenharmony_cistatic void xen_leave_lazy_mmu(void) 212462306a36Sopenharmony_ci{ 212562306a36Sopenharmony_ci preempt_disable(); 212662306a36Sopenharmony_ci xen_mc_flush(); 212762306a36Sopenharmony_ci leave_lazy(XEN_LAZY_MMU); 212862306a36Sopenharmony_ci preempt_enable(); 212962306a36Sopenharmony_ci} 213062306a36Sopenharmony_ci 213162306a36Sopenharmony_cistatic const typeof(pv_ops) xen_mmu_ops __initconst = { 213262306a36Sopenharmony_ci .mmu = { 213362306a36Sopenharmony_ci .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2), 213462306a36Sopenharmony_ci .write_cr2 = xen_write_cr2, 213562306a36Sopenharmony_ci 213662306a36Sopenharmony_ci .read_cr3 = xen_read_cr3, 213762306a36Sopenharmony_ci .write_cr3 = xen_write_cr3_init, 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_ci .flush_tlb_user = xen_flush_tlb, 214062306a36Sopenharmony_ci .flush_tlb_kernel = xen_flush_tlb, 214162306a36Sopenharmony_ci .flush_tlb_one_user = xen_flush_tlb_one_user, 214262306a36Sopenharmony_ci .flush_tlb_multi = xen_flush_tlb_multi, 214362306a36Sopenharmony_ci .tlb_remove_table = tlb_remove_table, 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_ci .pgd_alloc = xen_pgd_alloc, 214662306a36Sopenharmony_ci .pgd_free = xen_pgd_free, 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci .alloc_pte = xen_alloc_pte_init, 214962306a36Sopenharmony_ci .release_pte = xen_release_pte_init, 215062306a36Sopenharmony_ci .alloc_pmd = xen_alloc_pmd_init, 215162306a36Sopenharmony_ci .release_pmd = xen_release_pmd_init, 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_ci .set_pte = xen_set_pte_init, 215462306a36Sopenharmony_ci .set_pmd = xen_set_pmd_hyper, 215562306a36Sopenharmony_ci 215662306a36Sopenharmony_ci .ptep_modify_prot_start = xen_ptep_modify_prot_start, 215762306a36Sopenharmony_ci .ptep_modify_prot_commit = xen_ptep_modify_prot_commit, 215862306a36Sopenharmony_ci 215962306a36Sopenharmony_ci .pte_val = PV_CALLEE_SAVE(xen_pte_val), 216062306a36Sopenharmony_ci .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), 216362306a36Sopenharmony_ci .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ci .set_pud = xen_set_pud_hyper, 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), 216862306a36Sopenharmony_ci .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), 216962306a36Sopenharmony_ci 217062306a36Sopenharmony_ci .pud_val = PV_CALLEE_SAVE(xen_pud_val), 217162306a36Sopenharmony_ci .make_pud = PV_CALLEE_SAVE(xen_make_pud), 217262306a36Sopenharmony_ci .set_p4d = xen_set_p4d_hyper, 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci .alloc_pud = xen_alloc_pmd_init, 217562306a36Sopenharmony_ci .release_pud = xen_release_pmd_init, 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci#if CONFIG_PGTABLE_LEVELS >= 5 217862306a36Sopenharmony_ci .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), 217962306a36Sopenharmony_ci .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), 218062306a36Sopenharmony_ci#endif 218162306a36Sopenharmony_ci 218262306a36Sopenharmony_ci .enter_mmap = xen_enter_mmap, 218362306a36Sopenharmony_ci .exit_mmap = xen_exit_mmap, 218462306a36Sopenharmony_ci 218562306a36Sopenharmony_ci .lazy_mode = { 218662306a36Sopenharmony_ci .enter = xen_enter_lazy_mmu, 218762306a36Sopenharmony_ci .leave = xen_leave_lazy_mmu, 218862306a36Sopenharmony_ci .flush = xen_flush_lazy_mmu, 218962306a36Sopenharmony_ci }, 219062306a36Sopenharmony_ci 219162306a36Sopenharmony_ci .set_fixmap = xen_set_fixmap, 219262306a36Sopenharmony_ci }, 219362306a36Sopenharmony_ci}; 219462306a36Sopenharmony_ci 219562306a36Sopenharmony_civoid __init xen_init_mmu_ops(void) 219662306a36Sopenharmony_ci{ 219762306a36Sopenharmony_ci x86_init.paging.pagetable_init = xen_pagetable_init; 219862306a36Sopenharmony_ci x86_init.hyper.init_after_bootmem = xen_after_bootmem; 219962306a36Sopenharmony_ci 220062306a36Sopenharmony_ci pv_ops.mmu = xen_mmu_ops.mmu; 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci memset(dummy_mapping, 0xff, PAGE_SIZE); 220362306a36Sopenharmony_ci} 220462306a36Sopenharmony_ci 220562306a36Sopenharmony_ci/* Protected by xen_reservation_lock. */ 220662306a36Sopenharmony_ci#define MAX_CONTIG_ORDER 9 /* 2MB */ 220762306a36Sopenharmony_cistatic unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ci#define VOID_PTE (mfn_pte(0, __pgprot(0))) 221062306a36Sopenharmony_cistatic void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, 221162306a36Sopenharmony_ci unsigned long *in_frames, 221262306a36Sopenharmony_ci unsigned long *out_frames) 221362306a36Sopenharmony_ci{ 221462306a36Sopenharmony_ci int i; 221562306a36Sopenharmony_ci struct multicall_space mcs; 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci xen_mc_batch(); 221862306a36Sopenharmony_ci for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) { 221962306a36Sopenharmony_ci mcs = __xen_mc_entry(0); 222062306a36Sopenharmony_ci 222162306a36Sopenharmony_ci if (in_frames) 222262306a36Sopenharmony_ci in_frames[i] = virt_to_mfn((void *)vaddr); 222362306a36Sopenharmony_ci 222462306a36Sopenharmony_ci MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); 222562306a36Sopenharmony_ci __set_phys_to_machine(virt_to_pfn((void *)vaddr), INVALID_P2M_ENTRY); 222662306a36Sopenharmony_ci 222762306a36Sopenharmony_ci if (out_frames) 222862306a36Sopenharmony_ci out_frames[i] = virt_to_pfn((void *)vaddr); 222962306a36Sopenharmony_ci } 223062306a36Sopenharmony_ci xen_mc_issue(0); 223162306a36Sopenharmony_ci} 223262306a36Sopenharmony_ci 223362306a36Sopenharmony_ci/* 223462306a36Sopenharmony_ci * Update the pfn-to-mfn mappings for a virtual address range, either to 223562306a36Sopenharmony_ci * point to an array of mfns, or contiguously from a single starting 223662306a36Sopenharmony_ci * mfn. 223762306a36Sopenharmony_ci */ 223862306a36Sopenharmony_cistatic void xen_remap_exchanged_ptes(unsigned long vaddr, int order, 223962306a36Sopenharmony_ci unsigned long *mfns, 224062306a36Sopenharmony_ci unsigned long first_mfn) 224162306a36Sopenharmony_ci{ 224262306a36Sopenharmony_ci unsigned i, limit; 224362306a36Sopenharmony_ci unsigned long mfn; 224462306a36Sopenharmony_ci 224562306a36Sopenharmony_ci xen_mc_batch(); 224662306a36Sopenharmony_ci 224762306a36Sopenharmony_ci limit = 1u << order; 224862306a36Sopenharmony_ci for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) { 224962306a36Sopenharmony_ci struct multicall_space mcs; 225062306a36Sopenharmony_ci unsigned flags; 225162306a36Sopenharmony_ci 225262306a36Sopenharmony_ci mcs = __xen_mc_entry(0); 225362306a36Sopenharmony_ci if (mfns) 225462306a36Sopenharmony_ci mfn = mfns[i]; 225562306a36Sopenharmony_ci else 225662306a36Sopenharmony_ci mfn = first_mfn + i; 225762306a36Sopenharmony_ci 225862306a36Sopenharmony_ci if (i < (limit - 1)) 225962306a36Sopenharmony_ci flags = 0; 226062306a36Sopenharmony_ci else { 226162306a36Sopenharmony_ci if (order == 0) 226262306a36Sopenharmony_ci flags = UVMF_INVLPG | UVMF_ALL; 226362306a36Sopenharmony_ci else 226462306a36Sopenharmony_ci flags = UVMF_TLB_FLUSH | UVMF_ALL; 226562306a36Sopenharmony_ci } 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ci MULTI_update_va_mapping(mcs.mc, vaddr, 226862306a36Sopenharmony_ci mfn_pte(mfn, PAGE_KERNEL), flags); 226962306a36Sopenharmony_ci 227062306a36Sopenharmony_ci set_phys_to_machine(virt_to_pfn((void *)vaddr), mfn); 227162306a36Sopenharmony_ci } 227262306a36Sopenharmony_ci 227362306a36Sopenharmony_ci xen_mc_issue(0); 227462306a36Sopenharmony_ci} 227562306a36Sopenharmony_ci 227662306a36Sopenharmony_ci/* 227762306a36Sopenharmony_ci * Perform the hypercall to exchange a region of our pfns to point to 227862306a36Sopenharmony_ci * memory with the required contiguous alignment. Takes the pfns as 227962306a36Sopenharmony_ci * input, and populates mfns as output. 228062306a36Sopenharmony_ci * 228162306a36Sopenharmony_ci * Returns a success code indicating whether the hypervisor was able to 228262306a36Sopenharmony_ci * satisfy the request or not. 228362306a36Sopenharmony_ci */ 228462306a36Sopenharmony_cistatic int xen_exchange_memory(unsigned long extents_in, unsigned int order_in, 228562306a36Sopenharmony_ci unsigned long *pfns_in, 228662306a36Sopenharmony_ci unsigned long extents_out, 228762306a36Sopenharmony_ci unsigned int order_out, 228862306a36Sopenharmony_ci unsigned long *mfns_out, 228962306a36Sopenharmony_ci unsigned int address_bits) 229062306a36Sopenharmony_ci{ 229162306a36Sopenharmony_ci long rc; 229262306a36Sopenharmony_ci int success; 229362306a36Sopenharmony_ci 229462306a36Sopenharmony_ci struct xen_memory_exchange exchange = { 229562306a36Sopenharmony_ci .in = { 229662306a36Sopenharmony_ci .nr_extents = extents_in, 229762306a36Sopenharmony_ci .extent_order = order_in, 229862306a36Sopenharmony_ci .extent_start = pfns_in, 229962306a36Sopenharmony_ci .domid = DOMID_SELF 230062306a36Sopenharmony_ci }, 230162306a36Sopenharmony_ci .out = { 230262306a36Sopenharmony_ci .nr_extents = extents_out, 230362306a36Sopenharmony_ci .extent_order = order_out, 230462306a36Sopenharmony_ci .extent_start = mfns_out, 230562306a36Sopenharmony_ci .address_bits = address_bits, 230662306a36Sopenharmony_ci .domid = DOMID_SELF 230762306a36Sopenharmony_ci } 230862306a36Sopenharmony_ci }; 230962306a36Sopenharmony_ci 231062306a36Sopenharmony_ci BUG_ON(extents_in << order_in != extents_out << order_out); 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_ci rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); 231362306a36Sopenharmony_ci success = (exchange.nr_exchanged == extents_in); 231462306a36Sopenharmony_ci 231562306a36Sopenharmony_ci BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); 231662306a36Sopenharmony_ci BUG_ON(success && (rc != 0)); 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_ci return success; 231962306a36Sopenharmony_ci} 232062306a36Sopenharmony_ci 232162306a36Sopenharmony_ciint xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, 232262306a36Sopenharmony_ci unsigned int address_bits, 232362306a36Sopenharmony_ci dma_addr_t *dma_handle) 232462306a36Sopenharmony_ci{ 232562306a36Sopenharmony_ci unsigned long *in_frames = discontig_frames, out_frame; 232662306a36Sopenharmony_ci unsigned long flags; 232762306a36Sopenharmony_ci int success; 232862306a36Sopenharmony_ci unsigned long vstart = (unsigned long)phys_to_virt(pstart); 232962306a36Sopenharmony_ci 233062306a36Sopenharmony_ci if (unlikely(order > MAX_CONTIG_ORDER)) 233162306a36Sopenharmony_ci return -ENOMEM; 233262306a36Sopenharmony_ci 233362306a36Sopenharmony_ci memset((void *) vstart, 0, PAGE_SIZE << order); 233462306a36Sopenharmony_ci 233562306a36Sopenharmony_ci spin_lock_irqsave(&xen_reservation_lock, flags); 233662306a36Sopenharmony_ci 233762306a36Sopenharmony_ci /* 1. Zap current PTEs, remembering MFNs. */ 233862306a36Sopenharmony_ci xen_zap_pfn_range(vstart, order, in_frames, NULL); 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci /* 2. Get a new contiguous memory extent. */ 234162306a36Sopenharmony_ci out_frame = virt_to_pfn((void *)vstart); 234262306a36Sopenharmony_ci success = xen_exchange_memory(1UL << order, 0, in_frames, 234362306a36Sopenharmony_ci 1, order, &out_frame, 234462306a36Sopenharmony_ci address_bits); 234562306a36Sopenharmony_ci 234662306a36Sopenharmony_ci /* 3. Map the new extent in place of old pages. */ 234762306a36Sopenharmony_ci if (success) 234862306a36Sopenharmony_ci xen_remap_exchanged_ptes(vstart, order, NULL, out_frame); 234962306a36Sopenharmony_ci else 235062306a36Sopenharmony_ci xen_remap_exchanged_ptes(vstart, order, in_frames, 0); 235162306a36Sopenharmony_ci 235262306a36Sopenharmony_ci spin_unlock_irqrestore(&xen_reservation_lock, flags); 235362306a36Sopenharmony_ci 235462306a36Sopenharmony_ci *dma_handle = virt_to_machine(vstart).maddr; 235562306a36Sopenharmony_ci return success ? 0 : -ENOMEM; 235662306a36Sopenharmony_ci} 235762306a36Sopenharmony_ci 235862306a36Sopenharmony_civoid xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) 235962306a36Sopenharmony_ci{ 236062306a36Sopenharmony_ci unsigned long *out_frames = discontig_frames, in_frame; 236162306a36Sopenharmony_ci unsigned long flags; 236262306a36Sopenharmony_ci int success; 236362306a36Sopenharmony_ci unsigned long vstart; 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_ci if (unlikely(order > MAX_CONTIG_ORDER)) 236662306a36Sopenharmony_ci return; 236762306a36Sopenharmony_ci 236862306a36Sopenharmony_ci vstart = (unsigned long)phys_to_virt(pstart); 236962306a36Sopenharmony_ci memset((void *) vstart, 0, PAGE_SIZE << order); 237062306a36Sopenharmony_ci 237162306a36Sopenharmony_ci spin_lock_irqsave(&xen_reservation_lock, flags); 237262306a36Sopenharmony_ci 237362306a36Sopenharmony_ci /* 1. Find start MFN of contiguous extent. */ 237462306a36Sopenharmony_ci in_frame = virt_to_mfn((void *)vstart); 237562306a36Sopenharmony_ci 237662306a36Sopenharmony_ci /* 2. Zap current PTEs. */ 237762306a36Sopenharmony_ci xen_zap_pfn_range(vstart, order, NULL, out_frames); 237862306a36Sopenharmony_ci 237962306a36Sopenharmony_ci /* 3. Do the exchange for non-contiguous MFNs. */ 238062306a36Sopenharmony_ci success = xen_exchange_memory(1, order, &in_frame, 1UL << order, 238162306a36Sopenharmony_ci 0, out_frames, 0); 238262306a36Sopenharmony_ci 238362306a36Sopenharmony_ci /* 4. Map new pages in place of old pages. */ 238462306a36Sopenharmony_ci if (success) 238562306a36Sopenharmony_ci xen_remap_exchanged_ptes(vstart, order, out_frames, 0); 238662306a36Sopenharmony_ci else 238762306a36Sopenharmony_ci xen_remap_exchanged_ptes(vstart, order, NULL, in_frame); 238862306a36Sopenharmony_ci 238962306a36Sopenharmony_ci spin_unlock_irqrestore(&xen_reservation_lock, flags); 239062306a36Sopenharmony_ci} 239162306a36Sopenharmony_ci 239262306a36Sopenharmony_cistatic noinline void xen_flush_tlb_all(void) 239362306a36Sopenharmony_ci{ 239462306a36Sopenharmony_ci struct mmuext_op *op; 239562306a36Sopenharmony_ci struct multicall_space mcs; 239662306a36Sopenharmony_ci 239762306a36Sopenharmony_ci preempt_disable(); 239862306a36Sopenharmony_ci 239962306a36Sopenharmony_ci mcs = xen_mc_entry(sizeof(*op)); 240062306a36Sopenharmony_ci 240162306a36Sopenharmony_ci op = mcs.args; 240262306a36Sopenharmony_ci op->cmd = MMUEXT_TLB_FLUSH_ALL; 240362306a36Sopenharmony_ci MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_MMU); 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci preempt_enable(); 240862306a36Sopenharmony_ci} 240962306a36Sopenharmony_ci 241062306a36Sopenharmony_ci#define REMAP_BATCH_SIZE 16 241162306a36Sopenharmony_ci 241262306a36Sopenharmony_cistruct remap_data { 241362306a36Sopenharmony_ci xen_pfn_t *pfn; 241462306a36Sopenharmony_ci bool contiguous; 241562306a36Sopenharmony_ci bool no_translate; 241662306a36Sopenharmony_ci pgprot_t prot; 241762306a36Sopenharmony_ci struct mmu_update *mmu_update; 241862306a36Sopenharmony_ci}; 241962306a36Sopenharmony_ci 242062306a36Sopenharmony_cistatic int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) 242162306a36Sopenharmony_ci{ 242262306a36Sopenharmony_ci struct remap_data *rmd = data; 242362306a36Sopenharmony_ci pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci /* 242662306a36Sopenharmony_ci * If we have a contiguous range, just update the pfn itself, 242762306a36Sopenharmony_ci * else update pointer to be "next pfn". 242862306a36Sopenharmony_ci */ 242962306a36Sopenharmony_ci if (rmd->contiguous) 243062306a36Sopenharmony_ci (*rmd->pfn)++; 243162306a36Sopenharmony_ci else 243262306a36Sopenharmony_ci rmd->pfn++; 243362306a36Sopenharmony_ci 243462306a36Sopenharmony_ci rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; 243562306a36Sopenharmony_ci rmd->mmu_update->ptr |= rmd->no_translate ? 243662306a36Sopenharmony_ci MMU_PT_UPDATE_NO_TRANSLATE : 243762306a36Sopenharmony_ci MMU_NORMAL_PT_UPDATE; 243862306a36Sopenharmony_ci rmd->mmu_update->val = pte_val_ma(pte); 243962306a36Sopenharmony_ci rmd->mmu_update++; 244062306a36Sopenharmony_ci 244162306a36Sopenharmony_ci return 0; 244262306a36Sopenharmony_ci} 244362306a36Sopenharmony_ci 244462306a36Sopenharmony_ciint xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, 244562306a36Sopenharmony_ci xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, 244662306a36Sopenharmony_ci unsigned int domid, bool no_translate) 244762306a36Sopenharmony_ci{ 244862306a36Sopenharmony_ci int err = 0; 244962306a36Sopenharmony_ci struct remap_data rmd; 245062306a36Sopenharmony_ci struct mmu_update mmu_update[REMAP_BATCH_SIZE]; 245162306a36Sopenharmony_ci unsigned long range; 245262306a36Sopenharmony_ci int mapped = 0; 245362306a36Sopenharmony_ci 245462306a36Sopenharmony_ci BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci rmd.pfn = pfn; 245762306a36Sopenharmony_ci rmd.prot = prot; 245862306a36Sopenharmony_ci /* 245962306a36Sopenharmony_ci * We use the err_ptr to indicate if there we are doing a contiguous 246062306a36Sopenharmony_ci * mapping or a discontiguous mapping. 246162306a36Sopenharmony_ci */ 246262306a36Sopenharmony_ci rmd.contiguous = !err_ptr; 246362306a36Sopenharmony_ci rmd.no_translate = no_translate; 246462306a36Sopenharmony_ci 246562306a36Sopenharmony_ci while (nr) { 246662306a36Sopenharmony_ci int index = 0; 246762306a36Sopenharmony_ci int done = 0; 246862306a36Sopenharmony_ci int batch = min(REMAP_BATCH_SIZE, nr); 246962306a36Sopenharmony_ci int batch_left = batch; 247062306a36Sopenharmony_ci 247162306a36Sopenharmony_ci range = (unsigned long)batch << PAGE_SHIFT; 247262306a36Sopenharmony_ci 247362306a36Sopenharmony_ci rmd.mmu_update = mmu_update; 247462306a36Sopenharmony_ci err = apply_to_page_range(vma->vm_mm, addr, range, 247562306a36Sopenharmony_ci remap_area_pfn_pte_fn, &rmd); 247662306a36Sopenharmony_ci if (err) 247762306a36Sopenharmony_ci goto out; 247862306a36Sopenharmony_ci 247962306a36Sopenharmony_ci /* 248062306a36Sopenharmony_ci * We record the error for each page that gives an error, but 248162306a36Sopenharmony_ci * continue mapping until the whole set is done 248262306a36Sopenharmony_ci */ 248362306a36Sopenharmony_ci do { 248462306a36Sopenharmony_ci int i; 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci err = HYPERVISOR_mmu_update(&mmu_update[index], 248762306a36Sopenharmony_ci batch_left, &done, domid); 248862306a36Sopenharmony_ci 248962306a36Sopenharmony_ci /* 249062306a36Sopenharmony_ci * @err_ptr may be the same buffer as @gfn, so 249162306a36Sopenharmony_ci * only clear it after each chunk of @gfn is 249262306a36Sopenharmony_ci * used. 249362306a36Sopenharmony_ci */ 249462306a36Sopenharmony_ci if (err_ptr) { 249562306a36Sopenharmony_ci for (i = index; i < index + done; i++) 249662306a36Sopenharmony_ci err_ptr[i] = 0; 249762306a36Sopenharmony_ci } 249862306a36Sopenharmony_ci if (err < 0) { 249962306a36Sopenharmony_ci if (!err_ptr) 250062306a36Sopenharmony_ci goto out; 250162306a36Sopenharmony_ci err_ptr[i] = err; 250262306a36Sopenharmony_ci done++; /* Skip failed frame. */ 250362306a36Sopenharmony_ci } else 250462306a36Sopenharmony_ci mapped += done; 250562306a36Sopenharmony_ci batch_left -= done; 250662306a36Sopenharmony_ci index += done; 250762306a36Sopenharmony_ci } while (batch_left); 250862306a36Sopenharmony_ci 250962306a36Sopenharmony_ci nr -= batch; 251062306a36Sopenharmony_ci addr += range; 251162306a36Sopenharmony_ci if (err_ptr) 251262306a36Sopenharmony_ci err_ptr += batch; 251362306a36Sopenharmony_ci cond_resched(); 251462306a36Sopenharmony_ci } 251562306a36Sopenharmony_ciout: 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci xen_flush_tlb_all(); 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_ci return err < 0 ? err : mapped; 252062306a36Sopenharmony_ci} 252162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(xen_remap_pfn); 252262306a36Sopenharmony_ci 252362306a36Sopenharmony_ci#ifdef CONFIG_KEXEC_CORE 252462306a36Sopenharmony_ciphys_addr_t paddr_vmcoreinfo_note(void) 252562306a36Sopenharmony_ci{ 252662306a36Sopenharmony_ci if (xen_pv_domain()) 252762306a36Sopenharmony_ci return virt_to_machine(vmcoreinfo_note).maddr; 252862306a36Sopenharmony_ci else 252962306a36Sopenharmony_ci return __pa(vmcoreinfo_note); 253062306a36Sopenharmony_ci} 253162306a36Sopenharmony_ci#endif /* CONFIG_KEXEC_CORE */ 2532