162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Machine specific setup for xen 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/init.h> 962306a36Sopenharmony_ci#include <linux/iscsi_ibft.h> 1062306a36Sopenharmony_ci#include <linux/sched.h> 1162306a36Sopenharmony_ci#include <linux/kstrtox.h> 1262306a36Sopenharmony_ci#include <linux/mm.h> 1362306a36Sopenharmony_ci#include <linux/pm.h> 1462306a36Sopenharmony_ci#include <linux/memblock.h> 1562306a36Sopenharmony_ci#include <linux/cpuidle.h> 1662306a36Sopenharmony_ci#include <linux/cpufreq.h> 1762306a36Sopenharmony_ci#include <linux/memory_hotplug.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include <asm/elf.h> 2062306a36Sopenharmony_ci#include <asm/vdso.h> 2162306a36Sopenharmony_ci#include <asm/e820/api.h> 2262306a36Sopenharmony_ci#include <asm/setup.h> 2362306a36Sopenharmony_ci#include <asm/acpi.h> 2462306a36Sopenharmony_ci#include <asm/numa.h> 2562306a36Sopenharmony_ci#include <asm/idtentry.h> 2662306a36Sopenharmony_ci#include <asm/xen/hypervisor.h> 2762306a36Sopenharmony_ci#include <asm/xen/hypercall.h> 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#include <xen/xen.h> 3062306a36Sopenharmony_ci#include <xen/page.h> 3162306a36Sopenharmony_ci#include <xen/interface/callback.h> 3262306a36Sopenharmony_ci#include <xen/interface/memory.h> 3362306a36Sopenharmony_ci#include <xen/interface/physdev.h> 3462306a36Sopenharmony_ci#include <xen/features.h> 3562306a36Sopenharmony_ci#include <xen/hvc-console.h> 3662306a36Sopenharmony_ci#include "xen-ops.h" 3762306a36Sopenharmony_ci#include "mmu.h" 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci/* Amount of extra memory space we add to the e820 ranges */ 4262306a36Sopenharmony_cistruct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/* Number of pages released from the initial allocation. */ 4562306a36Sopenharmony_ciunsigned long xen_released_pages; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci/* Memory map would allow PCI passthrough. */ 4862306a36Sopenharmony_cibool xen_pv_pci_possible; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci/* E820 map used during setting up memory. */ 5162306a36Sopenharmony_cistatic struct e820_table xen_e820_table __initdata; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* 5462306a36Sopenharmony_ci * Buffer used to remap identity mapped pages. We only need the virtual space. 5562306a36Sopenharmony_ci * The physical page behind this address is remapped as needed to different 5662306a36Sopenharmony_ci * buffer pages. 5762306a36Sopenharmony_ci */ 5862306a36Sopenharmony_ci#define REMAP_SIZE (P2M_PER_PAGE - 3) 5962306a36Sopenharmony_cistatic struct { 6062306a36Sopenharmony_ci unsigned long next_area_mfn; 6162306a36Sopenharmony_ci unsigned long target_pfn; 6262306a36Sopenharmony_ci unsigned long size; 6362306a36Sopenharmony_ci unsigned long mfns[REMAP_SIZE]; 6462306a36Sopenharmony_ci} xen_remap_buf __initdata __aligned(PAGE_SIZE); 6562306a36Sopenharmony_cistatic unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci/* 6862306a36Sopenharmony_ci * The maximum amount of extra memory compared to the base size. The 6962306a36Sopenharmony_ci * main scaling factor is the size of struct page. At extreme ratios 7062306a36Sopenharmony_ci * of base:extra, all the base memory can be filled with page 7162306a36Sopenharmony_ci * structures for the extra memory, leaving no space for anything 7262306a36Sopenharmony_ci * else. 7362306a36Sopenharmony_ci * 7462306a36Sopenharmony_ci * 10x seems like a reasonable balance between scaling flexibility and 7562306a36Sopenharmony_ci * leaving a practically usable system. 7662306a36Sopenharmony_ci */ 7762306a36Sopenharmony_ci#define EXTRA_MEM_RATIO (10) 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_cistatic bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB); 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_cistatic void __init xen_parse_512gb(void) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci bool val = false; 8462306a36Sopenharmony_ci char *arg; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit"); 8762306a36Sopenharmony_ci if (!arg) 8862306a36Sopenharmony_ci return; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit="); 9162306a36Sopenharmony_ci if (!arg) 9262306a36Sopenharmony_ci val = true; 9362306a36Sopenharmony_ci else if (kstrtobool(arg + strlen("xen_512gb_limit="), &val)) 9462306a36Sopenharmony_ci return; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci xen_512gb_limit = val; 9762306a36Sopenharmony_ci} 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_cistatic void __init xen_add_extra_mem(unsigned long start_pfn, 10062306a36Sopenharmony_ci unsigned long n_pfns) 10162306a36Sopenharmony_ci{ 10262306a36Sopenharmony_ci int i; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci /* 10562306a36Sopenharmony_ci * No need to check for zero size, should happen rarely and will only 10662306a36Sopenharmony_ci * write a new entry regarded to be unused due to zero size. 10762306a36Sopenharmony_ci */ 10862306a36Sopenharmony_ci for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 10962306a36Sopenharmony_ci /* Add new region. */ 11062306a36Sopenharmony_ci if (xen_extra_mem[i].n_pfns == 0) { 11162306a36Sopenharmony_ci xen_extra_mem[i].start_pfn = start_pfn; 11262306a36Sopenharmony_ci xen_extra_mem[i].n_pfns = n_pfns; 11362306a36Sopenharmony_ci break; 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci /* Append to existing region. */ 11662306a36Sopenharmony_ci if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns == 11762306a36Sopenharmony_ci start_pfn) { 11862306a36Sopenharmony_ci xen_extra_mem[i].n_pfns += n_pfns; 11962306a36Sopenharmony_ci break; 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci } 12262306a36Sopenharmony_ci if (i == XEN_EXTRA_MEM_MAX_REGIONS) 12362306a36Sopenharmony_ci printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); 12662306a36Sopenharmony_ci} 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_cistatic void __init xen_del_extra_mem(unsigned long start_pfn, 12962306a36Sopenharmony_ci unsigned long n_pfns) 13062306a36Sopenharmony_ci{ 13162306a36Sopenharmony_ci int i; 13262306a36Sopenharmony_ci unsigned long start_r, size_r; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 13562306a36Sopenharmony_ci start_r = xen_extra_mem[i].start_pfn; 13662306a36Sopenharmony_ci size_r = xen_extra_mem[i].n_pfns; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci /* Start of region. */ 13962306a36Sopenharmony_ci if (start_r == start_pfn) { 14062306a36Sopenharmony_ci BUG_ON(n_pfns > size_r); 14162306a36Sopenharmony_ci xen_extra_mem[i].start_pfn += n_pfns; 14262306a36Sopenharmony_ci xen_extra_mem[i].n_pfns -= n_pfns; 14362306a36Sopenharmony_ci break; 14462306a36Sopenharmony_ci } 14562306a36Sopenharmony_ci /* End of region. */ 14662306a36Sopenharmony_ci if (start_r + size_r == start_pfn + n_pfns) { 14762306a36Sopenharmony_ci BUG_ON(n_pfns > size_r); 14862306a36Sopenharmony_ci xen_extra_mem[i].n_pfns -= n_pfns; 14962306a36Sopenharmony_ci break; 15062306a36Sopenharmony_ci } 15162306a36Sopenharmony_ci /* Mid of region. */ 15262306a36Sopenharmony_ci if (start_pfn > start_r && start_pfn < start_r + size_r) { 15362306a36Sopenharmony_ci BUG_ON(start_pfn + n_pfns > start_r + size_r); 15462306a36Sopenharmony_ci xen_extra_mem[i].n_pfns = start_pfn - start_r; 15562306a36Sopenharmony_ci /* Calling memblock_reserve() again is okay. */ 15662306a36Sopenharmony_ci xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r - 15762306a36Sopenharmony_ci (start_pfn + n_pfns)); 15862306a36Sopenharmony_ci break; 15962306a36Sopenharmony_ci } 16062306a36Sopenharmony_ci } 16162306a36Sopenharmony_ci memblock_phys_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci/* 16562306a36Sopenharmony_ci * Called during boot before the p2m list can take entries beyond the 16662306a36Sopenharmony_ci * hypervisor supplied p2m list. Entries in extra mem are to be regarded as 16762306a36Sopenharmony_ci * invalid. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ciunsigned long __ref xen_chk_extra_mem(unsigned long pfn) 17062306a36Sopenharmony_ci{ 17162306a36Sopenharmony_ci int i; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 17462306a36Sopenharmony_ci if (pfn >= xen_extra_mem[i].start_pfn && 17562306a36Sopenharmony_ci pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns) 17662306a36Sopenharmony_ci return INVALID_P2M_ENTRY; 17762306a36Sopenharmony_ci } 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci return IDENTITY_FRAME(pfn); 18062306a36Sopenharmony_ci} 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci/* 18362306a36Sopenharmony_ci * Mark all pfns of extra mem as invalid in p2m list. 18462306a36Sopenharmony_ci */ 18562306a36Sopenharmony_civoid __init xen_inv_extra_mem(void) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci unsigned long pfn, pfn_s, pfn_e; 18862306a36Sopenharmony_ci int i; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 19162306a36Sopenharmony_ci if (!xen_extra_mem[i].n_pfns) 19262306a36Sopenharmony_ci continue; 19362306a36Sopenharmony_ci pfn_s = xen_extra_mem[i].start_pfn; 19462306a36Sopenharmony_ci pfn_e = pfn_s + xen_extra_mem[i].n_pfns; 19562306a36Sopenharmony_ci for (pfn = pfn_s; pfn < pfn_e; pfn++) 19662306a36Sopenharmony_ci set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 19762306a36Sopenharmony_ci } 19862306a36Sopenharmony_ci} 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci/* 20162306a36Sopenharmony_ci * Finds the next RAM pfn available in the E820 map after min_pfn. 20262306a36Sopenharmony_ci * This function updates min_pfn with the pfn found and returns 20362306a36Sopenharmony_ci * the size of that range or zero if not found. 20462306a36Sopenharmony_ci */ 20562306a36Sopenharmony_cistatic unsigned long __init xen_find_pfn_range(unsigned long *min_pfn) 20662306a36Sopenharmony_ci{ 20762306a36Sopenharmony_ci const struct e820_entry *entry = xen_e820_table.entries; 20862306a36Sopenharmony_ci unsigned int i; 20962306a36Sopenharmony_ci unsigned long done = 0; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) { 21262306a36Sopenharmony_ci unsigned long s_pfn; 21362306a36Sopenharmony_ci unsigned long e_pfn; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (entry->type != E820_TYPE_RAM) 21662306a36Sopenharmony_ci continue; 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci e_pfn = PFN_DOWN(entry->addr + entry->size); 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci /* We only care about E820 after this */ 22162306a36Sopenharmony_ci if (e_pfn <= *min_pfn) 22262306a36Sopenharmony_ci continue; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci s_pfn = PFN_UP(entry->addr); 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci /* If min_pfn falls within the E820 entry, we want to start 22762306a36Sopenharmony_ci * at the min_pfn PFN. 22862306a36Sopenharmony_ci */ 22962306a36Sopenharmony_ci if (s_pfn <= *min_pfn) { 23062306a36Sopenharmony_ci done = e_pfn - *min_pfn; 23162306a36Sopenharmony_ci } else { 23262306a36Sopenharmony_ci done = e_pfn - s_pfn; 23362306a36Sopenharmony_ci *min_pfn = s_pfn; 23462306a36Sopenharmony_ci } 23562306a36Sopenharmony_ci break; 23662306a36Sopenharmony_ci } 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci return done; 23962306a36Sopenharmony_ci} 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_cistatic int __init xen_free_mfn(unsigned long mfn) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci struct xen_memory_reservation reservation = { 24462306a36Sopenharmony_ci .address_bits = 0, 24562306a36Sopenharmony_ci .extent_order = 0, 24662306a36Sopenharmony_ci .domid = DOMID_SELF 24762306a36Sopenharmony_ci }; 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci set_xen_guest_handle(reservation.extent_start, &mfn); 25062306a36Sopenharmony_ci reservation.nr_extents = 1; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 25362306a36Sopenharmony_ci} 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci/* 25662306a36Sopenharmony_ci * This releases a chunk of memory and then does the identity map. It's used 25762306a36Sopenharmony_ci * as a fallback if the remapping fails. 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_cistatic void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, 26062306a36Sopenharmony_ci unsigned long end_pfn, unsigned long nr_pages) 26162306a36Sopenharmony_ci{ 26262306a36Sopenharmony_ci unsigned long pfn, end; 26362306a36Sopenharmony_ci int ret; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci WARN_ON(start_pfn > end_pfn); 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci /* Release pages first. */ 26862306a36Sopenharmony_ci end = min(end_pfn, nr_pages); 26962306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end; pfn++) { 27062306a36Sopenharmony_ci unsigned long mfn = pfn_to_mfn(pfn); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci /* Make sure pfn exists to start with */ 27362306a36Sopenharmony_ci if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) 27462306a36Sopenharmony_ci continue; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci ret = xen_free_mfn(mfn); 27762306a36Sopenharmony_ci WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci if (ret == 1) { 28062306a36Sopenharmony_ci xen_released_pages++; 28162306a36Sopenharmony_ci if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) 28262306a36Sopenharmony_ci break; 28362306a36Sopenharmony_ci } else 28462306a36Sopenharmony_ci break; 28562306a36Sopenharmony_ci } 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci set_phys_range_identity(start_pfn, end_pfn); 28862306a36Sopenharmony_ci} 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci/* 29162306a36Sopenharmony_ci * Helper function to update the p2m and m2p tables and kernel mapping. 29262306a36Sopenharmony_ci */ 29362306a36Sopenharmony_cistatic void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) 29462306a36Sopenharmony_ci{ 29562306a36Sopenharmony_ci struct mmu_update update = { 29662306a36Sopenharmony_ci .ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, 29762306a36Sopenharmony_ci .val = pfn 29862306a36Sopenharmony_ci }; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci /* Update p2m */ 30162306a36Sopenharmony_ci if (!set_phys_to_machine(pfn, mfn)) { 30262306a36Sopenharmony_ci WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", 30362306a36Sopenharmony_ci pfn, mfn); 30462306a36Sopenharmony_ci BUG(); 30562306a36Sopenharmony_ci } 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci /* Update m2p */ 30862306a36Sopenharmony_ci if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { 30962306a36Sopenharmony_ci WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", 31062306a36Sopenharmony_ci mfn, pfn); 31162306a36Sopenharmony_ci BUG(); 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), 31562306a36Sopenharmony_ci mfn_pte(mfn, PAGE_KERNEL), 0)) { 31662306a36Sopenharmony_ci WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", 31762306a36Sopenharmony_ci mfn, pfn); 31862306a36Sopenharmony_ci BUG(); 31962306a36Sopenharmony_ci } 32062306a36Sopenharmony_ci} 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci/* 32362306a36Sopenharmony_ci * This function updates the p2m and m2p tables with an identity map from 32462306a36Sopenharmony_ci * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the 32562306a36Sopenharmony_ci * original allocation at remap_pfn. The information needed for remapping is 32662306a36Sopenharmony_ci * saved in the memory itself to avoid the need for allocating buffers. The 32762306a36Sopenharmony_ci * complete remap information is contained in a list of MFNs each containing 32862306a36Sopenharmony_ci * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. 32962306a36Sopenharmony_ci * This enables us to preserve the original mfn sequence while doing the 33062306a36Sopenharmony_ci * remapping at a time when the memory management is capable of allocating 33162306a36Sopenharmony_ci * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and 33262306a36Sopenharmony_ci * its callers. 33362306a36Sopenharmony_ci */ 33462306a36Sopenharmony_cistatic void __init xen_do_set_identity_and_remap_chunk( 33562306a36Sopenharmony_ci unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) 33662306a36Sopenharmony_ci{ 33762306a36Sopenharmony_ci unsigned long buf = (unsigned long)&xen_remap_buf; 33862306a36Sopenharmony_ci unsigned long mfn_save, mfn; 33962306a36Sopenharmony_ci unsigned long ident_pfn_iter, remap_pfn_iter; 34062306a36Sopenharmony_ci unsigned long ident_end_pfn = start_pfn + size; 34162306a36Sopenharmony_ci unsigned long left = size; 34262306a36Sopenharmony_ci unsigned int i, chunk; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci WARN_ON(size == 0); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci mfn_save = virt_to_mfn((void *)buf); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; 34962306a36Sopenharmony_ci ident_pfn_iter < ident_end_pfn; 35062306a36Sopenharmony_ci ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { 35162306a36Sopenharmony_ci chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci /* Map first pfn to xen_remap_buf */ 35462306a36Sopenharmony_ci mfn = pfn_to_mfn(ident_pfn_iter); 35562306a36Sopenharmony_ci set_pte_mfn(buf, mfn, PAGE_KERNEL); 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci /* Save mapping information in page */ 35862306a36Sopenharmony_ci xen_remap_buf.next_area_mfn = xen_remap_mfn; 35962306a36Sopenharmony_ci xen_remap_buf.target_pfn = remap_pfn_iter; 36062306a36Sopenharmony_ci xen_remap_buf.size = chunk; 36162306a36Sopenharmony_ci for (i = 0; i < chunk; i++) 36262306a36Sopenharmony_ci xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci /* Put remap buf into list. */ 36562306a36Sopenharmony_ci xen_remap_mfn = mfn; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci /* Set identity map */ 36862306a36Sopenharmony_ci set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk); 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci left -= chunk; 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci /* Restore old xen_remap_buf mapping */ 37462306a36Sopenharmony_ci set_pte_mfn(buf, mfn_save, PAGE_KERNEL); 37562306a36Sopenharmony_ci} 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci/* 37862306a36Sopenharmony_ci * This function takes a contiguous pfn range that needs to be identity mapped 37962306a36Sopenharmony_ci * and: 38062306a36Sopenharmony_ci * 38162306a36Sopenharmony_ci * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn. 38262306a36Sopenharmony_ci * 2) Calls the do_ function to actually do the mapping/remapping work. 38362306a36Sopenharmony_ci * 38462306a36Sopenharmony_ci * The goal is to not allocate additional memory but to remap the existing 38562306a36Sopenharmony_ci * pages. In the case of an error the underlying memory is simply released back 38662306a36Sopenharmony_ci * to Xen and not remapped. 38762306a36Sopenharmony_ci */ 38862306a36Sopenharmony_cistatic unsigned long __init xen_set_identity_and_remap_chunk( 38962306a36Sopenharmony_ci unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, 39062306a36Sopenharmony_ci unsigned long remap_pfn) 39162306a36Sopenharmony_ci{ 39262306a36Sopenharmony_ci unsigned long pfn; 39362306a36Sopenharmony_ci unsigned long i = 0; 39462306a36Sopenharmony_ci unsigned long n = end_pfn - start_pfn; 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci if (remap_pfn == 0) 39762306a36Sopenharmony_ci remap_pfn = nr_pages; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci while (i < n) { 40062306a36Sopenharmony_ci unsigned long cur_pfn = start_pfn + i; 40162306a36Sopenharmony_ci unsigned long left = n - i; 40262306a36Sopenharmony_ci unsigned long size = left; 40362306a36Sopenharmony_ci unsigned long remap_range_size; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci /* Do not remap pages beyond the current allocation */ 40662306a36Sopenharmony_ci if (cur_pfn >= nr_pages) { 40762306a36Sopenharmony_ci /* Identity map remaining pages */ 40862306a36Sopenharmony_ci set_phys_range_identity(cur_pfn, cur_pfn + size); 40962306a36Sopenharmony_ci break; 41062306a36Sopenharmony_ci } 41162306a36Sopenharmony_ci if (cur_pfn + size > nr_pages) 41262306a36Sopenharmony_ci size = nr_pages - cur_pfn; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci remap_range_size = xen_find_pfn_range(&remap_pfn); 41562306a36Sopenharmony_ci if (!remap_range_size) { 41662306a36Sopenharmony_ci pr_warn("Unable to find available pfn range, not remapping identity pages\n"); 41762306a36Sopenharmony_ci xen_set_identity_and_release_chunk(cur_pfn, 41862306a36Sopenharmony_ci cur_pfn + left, nr_pages); 41962306a36Sopenharmony_ci break; 42062306a36Sopenharmony_ci } 42162306a36Sopenharmony_ci /* Adjust size to fit in current e820 RAM region */ 42262306a36Sopenharmony_ci if (size > remap_range_size) 42362306a36Sopenharmony_ci size = remap_range_size; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn); 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci /* Update variables to reflect new mappings. */ 42862306a36Sopenharmony_ci i += size; 42962306a36Sopenharmony_ci remap_pfn += size; 43062306a36Sopenharmony_ci } 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci /* 43362306a36Sopenharmony_ci * If the PFNs are currently mapped, their VA mappings need to be 43462306a36Sopenharmony_ci * zapped. 43562306a36Sopenharmony_ci */ 43662306a36Sopenharmony_ci for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) 43762306a36Sopenharmony_ci (void)HYPERVISOR_update_va_mapping( 43862306a36Sopenharmony_ci (unsigned long)__va(pfn << PAGE_SHIFT), 43962306a36Sopenharmony_ci native_make_pte(0), 0); 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci return remap_pfn; 44262306a36Sopenharmony_ci} 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_cistatic unsigned long __init xen_count_remap_pages( 44562306a36Sopenharmony_ci unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, 44662306a36Sopenharmony_ci unsigned long remap_pages) 44762306a36Sopenharmony_ci{ 44862306a36Sopenharmony_ci if (start_pfn >= nr_pages) 44962306a36Sopenharmony_ci return remap_pages; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci return remap_pages + min(end_pfn, nr_pages) - start_pfn; 45262306a36Sopenharmony_ci} 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_cistatic unsigned long __init xen_foreach_remap_area(unsigned long nr_pages, 45562306a36Sopenharmony_ci unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn, 45662306a36Sopenharmony_ci unsigned long nr_pages, unsigned long last_val)) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci phys_addr_t start = 0; 45962306a36Sopenharmony_ci unsigned long ret_val = 0; 46062306a36Sopenharmony_ci const struct e820_entry *entry = xen_e820_table.entries; 46162306a36Sopenharmony_ci int i; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* 46462306a36Sopenharmony_ci * Combine non-RAM regions and gaps until a RAM region (or the 46562306a36Sopenharmony_ci * end of the map) is reached, then call the provided function 46662306a36Sopenharmony_ci * to perform its duty on the non-RAM region. 46762306a36Sopenharmony_ci * 46862306a36Sopenharmony_ci * The combined non-RAM regions are rounded to a whole number 46962306a36Sopenharmony_ci * of pages so any partial pages are accessible via the 1:1 47062306a36Sopenharmony_ci * mapping. This is needed for some BIOSes that put (for 47162306a36Sopenharmony_ci * example) the DMI tables in a reserved region that begins on 47262306a36Sopenharmony_ci * a non-page boundary. 47362306a36Sopenharmony_ci */ 47462306a36Sopenharmony_ci for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) { 47562306a36Sopenharmony_ci phys_addr_t end = entry->addr + entry->size; 47662306a36Sopenharmony_ci if (entry->type == E820_TYPE_RAM || i == xen_e820_table.nr_entries - 1) { 47762306a36Sopenharmony_ci unsigned long start_pfn = PFN_DOWN(start); 47862306a36Sopenharmony_ci unsigned long end_pfn = PFN_UP(end); 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci if (entry->type == E820_TYPE_RAM) 48162306a36Sopenharmony_ci end_pfn = PFN_UP(entry->addr); 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci if (start_pfn < end_pfn) 48462306a36Sopenharmony_ci ret_val = func(start_pfn, end_pfn, nr_pages, 48562306a36Sopenharmony_ci ret_val); 48662306a36Sopenharmony_ci start = end; 48762306a36Sopenharmony_ci } 48862306a36Sopenharmony_ci } 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci return ret_val; 49162306a36Sopenharmony_ci} 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci/* 49462306a36Sopenharmony_ci * Remap the memory prepared in xen_do_set_identity_and_remap_chunk(). 49562306a36Sopenharmony_ci * The remap information (which mfn remap to which pfn) is contained in the 49662306a36Sopenharmony_ci * to be remapped memory itself in a linked list anchored at xen_remap_mfn. 49762306a36Sopenharmony_ci * This scheme allows to remap the different chunks in arbitrary order while 49862306a36Sopenharmony_ci * the resulting mapping will be independent from the order. 49962306a36Sopenharmony_ci */ 50062306a36Sopenharmony_civoid __init xen_remap_memory(void) 50162306a36Sopenharmony_ci{ 50262306a36Sopenharmony_ci unsigned long buf = (unsigned long)&xen_remap_buf; 50362306a36Sopenharmony_ci unsigned long mfn_save, pfn; 50462306a36Sopenharmony_ci unsigned long remapped = 0; 50562306a36Sopenharmony_ci unsigned int i; 50662306a36Sopenharmony_ci unsigned long pfn_s = ~0UL; 50762306a36Sopenharmony_ci unsigned long len = 0; 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci mfn_save = virt_to_mfn((void *)buf); 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci while (xen_remap_mfn != INVALID_P2M_ENTRY) { 51262306a36Sopenharmony_ci /* Map the remap information */ 51362306a36Sopenharmony_ci set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL); 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]); 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci pfn = xen_remap_buf.target_pfn; 51862306a36Sopenharmony_ci for (i = 0; i < xen_remap_buf.size; i++) { 51962306a36Sopenharmony_ci xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]); 52062306a36Sopenharmony_ci remapped++; 52162306a36Sopenharmony_ci pfn++; 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci if (pfn_s == ~0UL || pfn == pfn_s) { 52462306a36Sopenharmony_ci pfn_s = xen_remap_buf.target_pfn; 52562306a36Sopenharmony_ci len += xen_remap_buf.size; 52662306a36Sopenharmony_ci } else if (pfn_s + len == xen_remap_buf.target_pfn) { 52762306a36Sopenharmony_ci len += xen_remap_buf.size; 52862306a36Sopenharmony_ci } else { 52962306a36Sopenharmony_ci xen_del_extra_mem(pfn_s, len); 53062306a36Sopenharmony_ci pfn_s = xen_remap_buf.target_pfn; 53162306a36Sopenharmony_ci len = xen_remap_buf.size; 53262306a36Sopenharmony_ci } 53362306a36Sopenharmony_ci xen_remap_mfn = xen_remap_buf.next_area_mfn; 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci if (pfn_s != ~0UL && len) 53762306a36Sopenharmony_ci xen_del_extra_mem(pfn_s, len); 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci set_pte_mfn(buf, mfn_save, PAGE_KERNEL); 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci pr_info("Remapped %ld page(s)\n", remapped); 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_cistatic unsigned long __init xen_get_pages_limit(void) 54562306a36Sopenharmony_ci{ 54662306a36Sopenharmony_ci unsigned long limit; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci limit = MAXMEM / PAGE_SIZE; 54962306a36Sopenharmony_ci if (!xen_initial_domain() && xen_512gb_limit) 55062306a36Sopenharmony_ci limit = GB(512) / PAGE_SIZE; 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci return limit; 55362306a36Sopenharmony_ci} 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_cistatic unsigned long __init xen_get_max_pages(void) 55662306a36Sopenharmony_ci{ 55762306a36Sopenharmony_ci unsigned long max_pages, limit; 55862306a36Sopenharmony_ci domid_t domid = DOMID_SELF; 55962306a36Sopenharmony_ci long ret; 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci limit = xen_get_pages_limit(); 56262306a36Sopenharmony_ci max_pages = limit; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci /* 56562306a36Sopenharmony_ci * For the initial domain we use the maximum reservation as 56662306a36Sopenharmony_ci * the maximum page. 56762306a36Sopenharmony_ci * 56862306a36Sopenharmony_ci * For guest domains the current maximum reservation reflects 56962306a36Sopenharmony_ci * the current maximum rather than the static maximum. In this 57062306a36Sopenharmony_ci * case the e820 map provided to us will cover the static 57162306a36Sopenharmony_ci * maximum region. 57262306a36Sopenharmony_ci */ 57362306a36Sopenharmony_ci if (xen_initial_domain()) { 57462306a36Sopenharmony_ci ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); 57562306a36Sopenharmony_ci if (ret > 0) 57662306a36Sopenharmony_ci max_pages = ret; 57762306a36Sopenharmony_ci } 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci return min(max_pages, limit); 58062306a36Sopenharmony_ci} 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_cistatic void __init xen_align_and_add_e820_region(phys_addr_t start, 58362306a36Sopenharmony_ci phys_addr_t size, int type) 58462306a36Sopenharmony_ci{ 58562306a36Sopenharmony_ci phys_addr_t end = start + size; 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci /* Align RAM regions to page boundaries. */ 58862306a36Sopenharmony_ci if (type == E820_TYPE_RAM) { 58962306a36Sopenharmony_ci start = PAGE_ALIGN(start); 59062306a36Sopenharmony_ci end &= ~((phys_addr_t)PAGE_SIZE - 1); 59162306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 59262306a36Sopenharmony_ci /* 59362306a36Sopenharmony_ci * Don't allow adding memory not in E820 map while booting the 59462306a36Sopenharmony_ci * system. Once the balloon driver is up it will remove that 59562306a36Sopenharmony_ci * restriction again. 59662306a36Sopenharmony_ci */ 59762306a36Sopenharmony_ci max_mem_size = end; 59862306a36Sopenharmony_ci#endif 59962306a36Sopenharmony_ci } 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci e820__range_add(start, end - start, type); 60262306a36Sopenharmony_ci} 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_cistatic void __init xen_ignore_unusable(void) 60562306a36Sopenharmony_ci{ 60662306a36Sopenharmony_ci struct e820_entry *entry = xen_e820_table.entries; 60762306a36Sopenharmony_ci unsigned int i; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) { 61062306a36Sopenharmony_ci if (entry->type == E820_TYPE_UNUSABLE) 61162306a36Sopenharmony_ci entry->type = E820_TYPE_RAM; 61262306a36Sopenharmony_ci } 61362306a36Sopenharmony_ci} 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_cibool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size) 61662306a36Sopenharmony_ci{ 61762306a36Sopenharmony_ci struct e820_entry *entry; 61862306a36Sopenharmony_ci unsigned mapcnt; 61962306a36Sopenharmony_ci phys_addr_t end; 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci if (!size) 62262306a36Sopenharmony_ci return false; 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci end = start + size; 62562306a36Sopenharmony_ci entry = xen_e820_table.entries; 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) { 62862306a36Sopenharmony_ci if (entry->type == E820_TYPE_RAM && entry->addr <= start && 62962306a36Sopenharmony_ci (entry->addr + entry->size) >= end) 63062306a36Sopenharmony_ci return false; 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci entry++; 63362306a36Sopenharmony_ci } 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci return true; 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci/* 63962306a36Sopenharmony_ci * Find a free area in physical memory not yet reserved and compliant with 64062306a36Sopenharmony_ci * E820 map. 64162306a36Sopenharmony_ci * Used to relocate pre-allocated areas like initrd or p2m list which are in 64262306a36Sopenharmony_ci * conflict with the to be used E820 map. 64362306a36Sopenharmony_ci * In case no area is found, return 0. Otherwise return the physical address 64462306a36Sopenharmony_ci * of the area which is already reserved for convenience. 64562306a36Sopenharmony_ci */ 64662306a36Sopenharmony_ciphys_addr_t __init xen_find_free_area(phys_addr_t size) 64762306a36Sopenharmony_ci{ 64862306a36Sopenharmony_ci unsigned mapcnt; 64962306a36Sopenharmony_ci phys_addr_t addr, start; 65062306a36Sopenharmony_ci struct e820_entry *entry = xen_e820_table.entries; 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++, entry++) { 65362306a36Sopenharmony_ci if (entry->type != E820_TYPE_RAM || entry->size < size) 65462306a36Sopenharmony_ci continue; 65562306a36Sopenharmony_ci start = entry->addr; 65662306a36Sopenharmony_ci for (addr = start; addr < start + size; addr += PAGE_SIZE) { 65762306a36Sopenharmony_ci if (!memblock_is_reserved(addr)) 65862306a36Sopenharmony_ci continue; 65962306a36Sopenharmony_ci start = addr + PAGE_SIZE; 66062306a36Sopenharmony_ci if (start + size > entry->addr + entry->size) 66162306a36Sopenharmony_ci break; 66262306a36Sopenharmony_ci } 66362306a36Sopenharmony_ci if (addr >= start + size) { 66462306a36Sopenharmony_ci memblock_reserve(start, size); 66562306a36Sopenharmony_ci return start; 66662306a36Sopenharmony_ci } 66762306a36Sopenharmony_ci } 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci return 0; 67062306a36Sopenharmony_ci} 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci/* 67362306a36Sopenharmony_ci * Like memcpy, but with physical addresses for dest and src. 67462306a36Sopenharmony_ci */ 67562306a36Sopenharmony_cistatic void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src, 67662306a36Sopenharmony_ci phys_addr_t n) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci phys_addr_t dest_off, src_off, dest_len, src_len, len; 67962306a36Sopenharmony_ci void *from, *to; 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci while (n) { 68262306a36Sopenharmony_ci dest_off = dest & ~PAGE_MASK; 68362306a36Sopenharmony_ci src_off = src & ~PAGE_MASK; 68462306a36Sopenharmony_ci dest_len = n; 68562306a36Sopenharmony_ci if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off) 68662306a36Sopenharmony_ci dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off; 68762306a36Sopenharmony_ci src_len = n; 68862306a36Sopenharmony_ci if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off) 68962306a36Sopenharmony_ci src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off; 69062306a36Sopenharmony_ci len = min(dest_len, src_len); 69162306a36Sopenharmony_ci to = early_memremap(dest - dest_off, dest_len + dest_off); 69262306a36Sopenharmony_ci from = early_memremap(src - src_off, src_len + src_off); 69362306a36Sopenharmony_ci memcpy(to, from, len); 69462306a36Sopenharmony_ci early_memunmap(to, dest_len + dest_off); 69562306a36Sopenharmony_ci early_memunmap(from, src_len + src_off); 69662306a36Sopenharmony_ci n -= len; 69762306a36Sopenharmony_ci dest += len; 69862306a36Sopenharmony_ci src += len; 69962306a36Sopenharmony_ci } 70062306a36Sopenharmony_ci} 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci/* 70362306a36Sopenharmony_ci * Reserve Xen mfn_list. 70462306a36Sopenharmony_ci */ 70562306a36Sopenharmony_cistatic void __init xen_reserve_xen_mfnlist(void) 70662306a36Sopenharmony_ci{ 70762306a36Sopenharmony_ci phys_addr_t start, size; 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci if (xen_start_info->mfn_list >= __START_KERNEL_map) { 71062306a36Sopenharmony_ci start = __pa(xen_start_info->mfn_list); 71162306a36Sopenharmony_ci size = PFN_ALIGN(xen_start_info->nr_pages * 71262306a36Sopenharmony_ci sizeof(unsigned long)); 71362306a36Sopenharmony_ci } else { 71462306a36Sopenharmony_ci start = PFN_PHYS(xen_start_info->first_p2m_pfn); 71562306a36Sopenharmony_ci size = PFN_PHYS(xen_start_info->nr_p2m_frames); 71662306a36Sopenharmony_ci } 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci memblock_reserve(start, size); 71962306a36Sopenharmony_ci if (!xen_is_e820_reserved(start, size)) 72062306a36Sopenharmony_ci return; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci xen_relocate_p2m(); 72362306a36Sopenharmony_ci memblock_phys_free(start, size); 72462306a36Sopenharmony_ci} 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci/** 72762306a36Sopenharmony_ci * xen_memory_setup - Hook for machine specific memory setup. 72862306a36Sopenharmony_ci **/ 72962306a36Sopenharmony_cichar * __init xen_memory_setup(void) 73062306a36Sopenharmony_ci{ 73162306a36Sopenharmony_ci unsigned long max_pfn, pfn_s, n_pfns; 73262306a36Sopenharmony_ci phys_addr_t mem_end, addr, size, chunk_size; 73362306a36Sopenharmony_ci u32 type; 73462306a36Sopenharmony_ci int rc; 73562306a36Sopenharmony_ci struct xen_memory_map memmap; 73662306a36Sopenharmony_ci unsigned long max_pages; 73762306a36Sopenharmony_ci unsigned long extra_pages = 0; 73862306a36Sopenharmony_ci int i; 73962306a36Sopenharmony_ci int op; 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci xen_parse_512gb(); 74262306a36Sopenharmony_ci max_pfn = xen_get_pages_limit(); 74362306a36Sopenharmony_ci max_pfn = min(max_pfn, xen_start_info->nr_pages); 74462306a36Sopenharmony_ci mem_end = PFN_PHYS(max_pfn); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries); 74762306a36Sopenharmony_ci set_xen_guest_handle(memmap.buffer, xen_e820_table.entries); 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) 75062306a36Sopenharmony_ci xen_saved_max_mem_size = max_mem_size; 75162306a36Sopenharmony_ci#endif 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci op = xen_initial_domain() ? 75462306a36Sopenharmony_ci XENMEM_machine_memory_map : 75562306a36Sopenharmony_ci XENMEM_memory_map; 75662306a36Sopenharmony_ci rc = HYPERVISOR_memory_op(op, &memmap); 75762306a36Sopenharmony_ci if (rc == -ENOSYS) { 75862306a36Sopenharmony_ci BUG_ON(xen_initial_domain()); 75962306a36Sopenharmony_ci memmap.nr_entries = 1; 76062306a36Sopenharmony_ci xen_e820_table.entries[0].addr = 0ULL; 76162306a36Sopenharmony_ci xen_e820_table.entries[0].size = mem_end; 76262306a36Sopenharmony_ci /* 8MB slack (to balance backend allocations). */ 76362306a36Sopenharmony_ci xen_e820_table.entries[0].size += 8ULL << 20; 76462306a36Sopenharmony_ci xen_e820_table.entries[0].type = E820_TYPE_RAM; 76562306a36Sopenharmony_ci rc = 0; 76662306a36Sopenharmony_ci } 76762306a36Sopenharmony_ci BUG_ON(rc); 76862306a36Sopenharmony_ci BUG_ON(memmap.nr_entries == 0); 76962306a36Sopenharmony_ci xen_e820_table.nr_entries = memmap.nr_entries; 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci if (xen_initial_domain()) { 77262306a36Sopenharmony_ci /* 77362306a36Sopenharmony_ci * Xen won't allow a 1:1 mapping to be created to UNUSABLE 77462306a36Sopenharmony_ci * regions, so if we're using the machine memory map leave the 77562306a36Sopenharmony_ci * region as RAM as it is in the pseudo-physical map. 77662306a36Sopenharmony_ci * 77762306a36Sopenharmony_ci * UNUSABLE regions in domUs are not handled and will need 77862306a36Sopenharmony_ci * a patch in the future. 77962306a36Sopenharmony_ci */ 78062306a36Sopenharmony_ci xen_ignore_unusable(); 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci#ifdef CONFIG_ISCSI_IBFT_FIND 78362306a36Sopenharmony_ci /* Reserve 0.5 MiB to 1 MiB region so iBFT can be found */ 78462306a36Sopenharmony_ci xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START; 78562306a36Sopenharmony_ci xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START; 78662306a36Sopenharmony_ci xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED; 78762306a36Sopenharmony_ci xen_e820_table.nr_entries++; 78862306a36Sopenharmony_ci#endif 78962306a36Sopenharmony_ci } 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci /* Make sure the Xen-supplied memory map is well-ordered. */ 79262306a36Sopenharmony_ci e820__update_table(&xen_e820_table); 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci max_pages = xen_get_max_pages(); 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci /* How many extra pages do we need due to remapping? */ 79762306a36Sopenharmony_ci max_pages += xen_foreach_remap_area(max_pfn, xen_count_remap_pages); 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci if (max_pages > max_pfn) 80062306a36Sopenharmony_ci extra_pages += max_pages - max_pfn; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci /* 80362306a36Sopenharmony_ci * Clamp the amount of extra memory to a EXTRA_MEM_RATIO 80462306a36Sopenharmony_ci * factor the base size. 80562306a36Sopenharmony_ci * 80662306a36Sopenharmony_ci * Make sure we have no memory above max_pages, as this area 80762306a36Sopenharmony_ci * isn't handled by the p2m management. 80862306a36Sopenharmony_ci */ 80962306a36Sopenharmony_ci extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), 81062306a36Sopenharmony_ci extra_pages, max_pages - max_pfn); 81162306a36Sopenharmony_ci i = 0; 81262306a36Sopenharmony_ci addr = xen_e820_table.entries[0].addr; 81362306a36Sopenharmony_ci size = xen_e820_table.entries[0].size; 81462306a36Sopenharmony_ci while (i < xen_e820_table.nr_entries) { 81562306a36Sopenharmony_ci bool discard = false; 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci chunk_size = size; 81862306a36Sopenharmony_ci type = xen_e820_table.entries[i].type; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci if (type == E820_TYPE_RESERVED) 82162306a36Sopenharmony_ci xen_pv_pci_possible = true; 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci if (type == E820_TYPE_RAM) { 82462306a36Sopenharmony_ci if (addr < mem_end) { 82562306a36Sopenharmony_ci chunk_size = min(size, mem_end - addr); 82662306a36Sopenharmony_ci } else if (extra_pages) { 82762306a36Sopenharmony_ci chunk_size = min(size, PFN_PHYS(extra_pages)); 82862306a36Sopenharmony_ci pfn_s = PFN_UP(addr); 82962306a36Sopenharmony_ci n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s; 83062306a36Sopenharmony_ci extra_pages -= n_pfns; 83162306a36Sopenharmony_ci xen_add_extra_mem(pfn_s, n_pfns); 83262306a36Sopenharmony_ci xen_max_p2m_pfn = pfn_s + n_pfns; 83362306a36Sopenharmony_ci } else 83462306a36Sopenharmony_ci discard = true; 83562306a36Sopenharmony_ci } 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci if (!discard) 83862306a36Sopenharmony_ci xen_align_and_add_e820_region(addr, chunk_size, type); 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci addr += chunk_size; 84162306a36Sopenharmony_ci size -= chunk_size; 84262306a36Sopenharmony_ci if (size == 0) { 84362306a36Sopenharmony_ci i++; 84462306a36Sopenharmony_ci if (i < xen_e820_table.nr_entries) { 84562306a36Sopenharmony_ci addr = xen_e820_table.entries[i].addr; 84662306a36Sopenharmony_ci size = xen_e820_table.entries[i].size; 84762306a36Sopenharmony_ci } 84862306a36Sopenharmony_ci } 84962306a36Sopenharmony_ci } 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ci /* 85262306a36Sopenharmony_ci * Set the rest as identity mapped, in case PCI BARs are 85362306a36Sopenharmony_ci * located here. 85462306a36Sopenharmony_ci */ 85562306a36Sopenharmony_ci set_phys_range_identity(addr / PAGE_SIZE, ~0ul); 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci /* 85862306a36Sopenharmony_ci * In domU, the ISA region is normal, usable memory, but we 85962306a36Sopenharmony_ci * reserve ISA memory anyway because too many things poke 86062306a36Sopenharmony_ci * about in there. 86162306a36Sopenharmony_ci */ 86262306a36Sopenharmony_ci e820__range_add(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_TYPE_RESERVED); 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci e820__update_table(e820_table); 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci /* 86762306a36Sopenharmony_ci * Check whether the kernel itself conflicts with the target E820 map. 86862306a36Sopenharmony_ci * Failing now is better than running into weird problems later due 86962306a36Sopenharmony_ci * to relocating (and even reusing) pages with kernel text or data. 87062306a36Sopenharmony_ci */ 87162306a36Sopenharmony_ci if (xen_is_e820_reserved(__pa_symbol(_text), 87262306a36Sopenharmony_ci __pa_symbol(__bss_stop) - __pa_symbol(_text))) { 87362306a36Sopenharmony_ci xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n"); 87462306a36Sopenharmony_ci BUG(); 87562306a36Sopenharmony_ci } 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci /* 87862306a36Sopenharmony_ci * Check for a conflict of the hypervisor supplied page tables with 87962306a36Sopenharmony_ci * the target E820 map. 88062306a36Sopenharmony_ci */ 88162306a36Sopenharmony_ci xen_pt_check_e820(); 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci xen_reserve_xen_mfnlist(); 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci /* Check for a conflict of the initrd with the target E820 map. */ 88662306a36Sopenharmony_ci if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image, 88762306a36Sopenharmony_ci boot_params.hdr.ramdisk_size)) { 88862306a36Sopenharmony_ci phys_addr_t new_area, start, size; 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci new_area = xen_find_free_area(boot_params.hdr.ramdisk_size); 89162306a36Sopenharmony_ci if (!new_area) { 89262306a36Sopenharmony_ci xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n"); 89362306a36Sopenharmony_ci BUG(); 89462306a36Sopenharmony_ci } 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci start = boot_params.hdr.ramdisk_image; 89762306a36Sopenharmony_ci size = boot_params.hdr.ramdisk_size; 89862306a36Sopenharmony_ci xen_phys_memcpy(new_area, start, size); 89962306a36Sopenharmony_ci pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n", 90062306a36Sopenharmony_ci start, start + size, new_area, new_area + size); 90162306a36Sopenharmony_ci memblock_phys_free(start, size); 90262306a36Sopenharmony_ci boot_params.hdr.ramdisk_image = new_area; 90362306a36Sopenharmony_ci boot_params.ext_ramdisk_image = new_area >> 32; 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci /* 90762306a36Sopenharmony_ci * Set identity map on non-RAM pages and prepare remapping the 90862306a36Sopenharmony_ci * underlying RAM. 90962306a36Sopenharmony_ci */ 91062306a36Sopenharmony_ci xen_foreach_remap_area(max_pfn, xen_set_identity_and_remap_chunk); 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci pr_info("Released %ld page(s)\n", xen_released_pages); 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci return "Xen"; 91562306a36Sopenharmony_ci} 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_cistatic int register_callback(unsigned type, const void *func) 91862306a36Sopenharmony_ci{ 91962306a36Sopenharmony_ci struct callback_register callback = { 92062306a36Sopenharmony_ci .type = type, 92162306a36Sopenharmony_ci .address = XEN_CALLBACK(__KERNEL_CS, func), 92262306a36Sopenharmony_ci .flags = CALLBACKF_mask_events, 92362306a36Sopenharmony_ci }; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); 92662306a36Sopenharmony_ci} 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_civoid xen_enable_sysenter(void) 92962306a36Sopenharmony_ci{ 93062306a36Sopenharmony_ci if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) && 93162306a36Sopenharmony_ci register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat)) 93262306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); 93362306a36Sopenharmony_ci} 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_civoid xen_enable_syscall(void) 93662306a36Sopenharmony_ci{ 93762306a36Sopenharmony_ci int ret; 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); 94062306a36Sopenharmony_ci if (ret != 0) { 94162306a36Sopenharmony_ci printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); 94262306a36Sopenharmony_ci /* Pretty fatal; 64-bit userspace has no other 94362306a36Sopenharmony_ci mechanism for syscalls. */ 94462306a36Sopenharmony_ci } 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) && 94762306a36Sopenharmony_ci register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat)) 94862306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); 94962306a36Sopenharmony_ci} 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_cistatic void __init xen_pvmmu_arch_setup(void) 95262306a36Sopenharmony_ci{ 95362306a36Sopenharmony_ci HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci if (register_callback(CALLBACKTYPE_event, 95662306a36Sopenharmony_ci xen_asm_exc_xen_hypervisor_callback) || 95762306a36Sopenharmony_ci register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 95862306a36Sopenharmony_ci BUG(); 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci xen_enable_sysenter(); 96162306a36Sopenharmony_ci xen_enable_syscall(); 96262306a36Sopenharmony_ci} 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci/* This function is not called for HVM domains */ 96562306a36Sopenharmony_civoid __init xen_arch_setup(void) 96662306a36Sopenharmony_ci{ 96762306a36Sopenharmony_ci xen_panic_handler_init(); 96862306a36Sopenharmony_ci xen_pvmmu_arch_setup(); 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci#ifdef CONFIG_ACPI 97162306a36Sopenharmony_ci if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 97262306a36Sopenharmony_ci printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 97362306a36Sopenharmony_ci disable_acpi(); 97462306a36Sopenharmony_ci } 97562306a36Sopenharmony_ci#endif 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci memcpy(boot_command_line, xen_start_info->cmd_line, 97862306a36Sopenharmony_ci MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 97962306a36Sopenharmony_ci COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci /* Set up idle, making sure it calls safe_halt() pvop */ 98262306a36Sopenharmony_ci disable_cpuidle(); 98362306a36Sopenharmony_ci disable_cpufreq(); 98462306a36Sopenharmony_ci WARN_ON(xen_set_default_idle()); 98562306a36Sopenharmony_ci#ifdef CONFIG_NUMA 98662306a36Sopenharmony_ci numa_off = 1; 98762306a36Sopenharmony_ci#endif 98862306a36Sopenharmony_ci} 989