162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Machine specific setup for xen
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/init.h>
962306a36Sopenharmony_ci#include <linux/iscsi_ibft.h>
1062306a36Sopenharmony_ci#include <linux/sched.h>
1162306a36Sopenharmony_ci#include <linux/kstrtox.h>
1262306a36Sopenharmony_ci#include <linux/mm.h>
1362306a36Sopenharmony_ci#include <linux/pm.h>
1462306a36Sopenharmony_ci#include <linux/memblock.h>
1562306a36Sopenharmony_ci#include <linux/cpuidle.h>
1662306a36Sopenharmony_ci#include <linux/cpufreq.h>
1762306a36Sopenharmony_ci#include <linux/memory_hotplug.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include <asm/elf.h>
2062306a36Sopenharmony_ci#include <asm/vdso.h>
2162306a36Sopenharmony_ci#include <asm/e820/api.h>
2262306a36Sopenharmony_ci#include <asm/setup.h>
2362306a36Sopenharmony_ci#include <asm/acpi.h>
2462306a36Sopenharmony_ci#include <asm/numa.h>
2562306a36Sopenharmony_ci#include <asm/idtentry.h>
2662306a36Sopenharmony_ci#include <asm/xen/hypervisor.h>
2762306a36Sopenharmony_ci#include <asm/xen/hypercall.h>
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#include <xen/xen.h>
3062306a36Sopenharmony_ci#include <xen/page.h>
3162306a36Sopenharmony_ci#include <xen/interface/callback.h>
3262306a36Sopenharmony_ci#include <xen/interface/memory.h>
3362306a36Sopenharmony_ci#include <xen/interface/physdev.h>
3462306a36Sopenharmony_ci#include <xen/features.h>
3562306a36Sopenharmony_ci#include <xen/hvc-console.h>
3662306a36Sopenharmony_ci#include "xen-ops.h"
3762306a36Sopenharmony_ci#include "mmu.h"
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/* Amount of extra memory space we add to the e820 ranges */
4262306a36Sopenharmony_cistruct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci/* Number of pages released from the initial allocation. */
4562306a36Sopenharmony_ciunsigned long xen_released_pages;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci/* Memory map would allow PCI passthrough. */
4862306a36Sopenharmony_cibool xen_pv_pci_possible;
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci/* E820 map used during setting up memory. */
5162306a36Sopenharmony_cistatic struct e820_table xen_e820_table __initdata;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * Buffer used to remap identity mapped pages. We only need the virtual space.
5562306a36Sopenharmony_ci * The physical page behind this address is remapped as needed to different
5662306a36Sopenharmony_ci * buffer pages.
5762306a36Sopenharmony_ci */
5862306a36Sopenharmony_ci#define REMAP_SIZE	(P2M_PER_PAGE - 3)
5962306a36Sopenharmony_cistatic struct {
6062306a36Sopenharmony_ci	unsigned long	next_area_mfn;
6162306a36Sopenharmony_ci	unsigned long	target_pfn;
6262306a36Sopenharmony_ci	unsigned long	size;
6362306a36Sopenharmony_ci	unsigned long	mfns[REMAP_SIZE];
6462306a36Sopenharmony_ci} xen_remap_buf __initdata __aligned(PAGE_SIZE);
6562306a36Sopenharmony_cistatic unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci/*
6862306a36Sopenharmony_ci * The maximum amount of extra memory compared to the base size.  The
6962306a36Sopenharmony_ci * main scaling factor is the size of struct page.  At extreme ratios
7062306a36Sopenharmony_ci * of base:extra, all the base memory can be filled with page
7162306a36Sopenharmony_ci * structures for the extra memory, leaving no space for anything
7262306a36Sopenharmony_ci * else.
7362306a36Sopenharmony_ci *
7462306a36Sopenharmony_ci * 10x seems like a reasonable balance between scaling flexibility and
7562306a36Sopenharmony_ci * leaving a practically usable system.
7662306a36Sopenharmony_ci */
7762306a36Sopenharmony_ci#define EXTRA_MEM_RATIO		(10)
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_cistatic bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cistatic void __init xen_parse_512gb(void)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	bool val = false;
8462306a36Sopenharmony_ci	char *arg;
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit");
8762306a36Sopenharmony_ci	if (!arg)
8862306a36Sopenharmony_ci		return;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
9162306a36Sopenharmony_ci	if (!arg)
9262306a36Sopenharmony_ci		val = true;
9362306a36Sopenharmony_ci	else if (kstrtobool(arg + strlen("xen_512gb_limit="), &val))
9462306a36Sopenharmony_ci		return;
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	xen_512gb_limit = val;
9762306a36Sopenharmony_ci}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_cistatic void __init xen_add_extra_mem(unsigned long start_pfn,
10062306a36Sopenharmony_ci				     unsigned long n_pfns)
10162306a36Sopenharmony_ci{
10262306a36Sopenharmony_ci	int i;
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	/*
10562306a36Sopenharmony_ci	 * No need to check for zero size, should happen rarely and will only
10662306a36Sopenharmony_ci	 * write a new entry regarded to be unused due to zero size.
10762306a36Sopenharmony_ci	 */
10862306a36Sopenharmony_ci	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
10962306a36Sopenharmony_ci		/* Add new region. */
11062306a36Sopenharmony_ci		if (xen_extra_mem[i].n_pfns == 0) {
11162306a36Sopenharmony_ci			xen_extra_mem[i].start_pfn = start_pfn;
11262306a36Sopenharmony_ci			xen_extra_mem[i].n_pfns = n_pfns;
11362306a36Sopenharmony_ci			break;
11462306a36Sopenharmony_ci		}
11562306a36Sopenharmony_ci		/* Append to existing region. */
11662306a36Sopenharmony_ci		if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
11762306a36Sopenharmony_ci		    start_pfn) {
11862306a36Sopenharmony_ci			xen_extra_mem[i].n_pfns += n_pfns;
11962306a36Sopenharmony_ci			break;
12062306a36Sopenharmony_ci		}
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
12362306a36Sopenharmony_ci		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
12662306a36Sopenharmony_ci}
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_cistatic void __init xen_del_extra_mem(unsigned long start_pfn,
12962306a36Sopenharmony_ci				     unsigned long n_pfns)
13062306a36Sopenharmony_ci{
13162306a36Sopenharmony_ci	int i;
13262306a36Sopenharmony_ci	unsigned long start_r, size_r;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
13562306a36Sopenharmony_ci		start_r = xen_extra_mem[i].start_pfn;
13662306a36Sopenharmony_ci		size_r = xen_extra_mem[i].n_pfns;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci		/* Start of region. */
13962306a36Sopenharmony_ci		if (start_r == start_pfn) {
14062306a36Sopenharmony_ci			BUG_ON(n_pfns > size_r);
14162306a36Sopenharmony_ci			xen_extra_mem[i].start_pfn += n_pfns;
14262306a36Sopenharmony_ci			xen_extra_mem[i].n_pfns -= n_pfns;
14362306a36Sopenharmony_ci			break;
14462306a36Sopenharmony_ci		}
14562306a36Sopenharmony_ci		/* End of region. */
14662306a36Sopenharmony_ci		if (start_r + size_r == start_pfn + n_pfns) {
14762306a36Sopenharmony_ci			BUG_ON(n_pfns > size_r);
14862306a36Sopenharmony_ci			xen_extra_mem[i].n_pfns -= n_pfns;
14962306a36Sopenharmony_ci			break;
15062306a36Sopenharmony_ci		}
15162306a36Sopenharmony_ci		/* Mid of region. */
15262306a36Sopenharmony_ci		if (start_pfn > start_r && start_pfn < start_r + size_r) {
15362306a36Sopenharmony_ci			BUG_ON(start_pfn + n_pfns > start_r + size_r);
15462306a36Sopenharmony_ci			xen_extra_mem[i].n_pfns = start_pfn - start_r;
15562306a36Sopenharmony_ci			/* Calling memblock_reserve() again is okay. */
15662306a36Sopenharmony_ci			xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r -
15762306a36Sopenharmony_ci					  (start_pfn + n_pfns));
15862306a36Sopenharmony_ci			break;
15962306a36Sopenharmony_ci		}
16062306a36Sopenharmony_ci	}
16162306a36Sopenharmony_ci	memblock_phys_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci/*
16562306a36Sopenharmony_ci * Called during boot before the p2m list can take entries beyond the
16662306a36Sopenharmony_ci * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
16762306a36Sopenharmony_ci * invalid.
16862306a36Sopenharmony_ci */
16962306a36Sopenharmony_ciunsigned long __ref xen_chk_extra_mem(unsigned long pfn)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	int i;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
17462306a36Sopenharmony_ci		if (pfn >= xen_extra_mem[i].start_pfn &&
17562306a36Sopenharmony_ci		    pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns)
17662306a36Sopenharmony_ci			return INVALID_P2M_ENTRY;
17762306a36Sopenharmony_ci	}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	return IDENTITY_FRAME(pfn);
18062306a36Sopenharmony_ci}
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci/*
18362306a36Sopenharmony_ci * Mark all pfns of extra mem as invalid in p2m list.
18462306a36Sopenharmony_ci */
18562306a36Sopenharmony_civoid __init xen_inv_extra_mem(void)
18662306a36Sopenharmony_ci{
18762306a36Sopenharmony_ci	unsigned long pfn, pfn_s, pfn_e;
18862306a36Sopenharmony_ci	int i;
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
19162306a36Sopenharmony_ci		if (!xen_extra_mem[i].n_pfns)
19262306a36Sopenharmony_ci			continue;
19362306a36Sopenharmony_ci		pfn_s = xen_extra_mem[i].start_pfn;
19462306a36Sopenharmony_ci		pfn_e = pfn_s + xen_extra_mem[i].n_pfns;
19562306a36Sopenharmony_ci		for (pfn = pfn_s; pfn < pfn_e; pfn++)
19662306a36Sopenharmony_ci			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
19762306a36Sopenharmony_ci	}
19862306a36Sopenharmony_ci}
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci/*
20162306a36Sopenharmony_ci * Finds the next RAM pfn available in the E820 map after min_pfn.
20262306a36Sopenharmony_ci * This function updates min_pfn with the pfn found and returns
20362306a36Sopenharmony_ci * the size of that range or zero if not found.
20462306a36Sopenharmony_ci */
20562306a36Sopenharmony_cistatic unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
20662306a36Sopenharmony_ci{
20762306a36Sopenharmony_ci	const struct e820_entry *entry = xen_e820_table.entries;
20862306a36Sopenharmony_ci	unsigned int i;
20962306a36Sopenharmony_ci	unsigned long done = 0;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
21262306a36Sopenharmony_ci		unsigned long s_pfn;
21362306a36Sopenharmony_ci		unsigned long e_pfn;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci		if (entry->type != E820_TYPE_RAM)
21662306a36Sopenharmony_ci			continue;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci		e_pfn = PFN_DOWN(entry->addr + entry->size);
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci		/* We only care about E820 after this */
22162306a36Sopenharmony_ci		if (e_pfn <= *min_pfn)
22262306a36Sopenharmony_ci			continue;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci		s_pfn = PFN_UP(entry->addr);
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci		/* If min_pfn falls within the E820 entry, we want to start
22762306a36Sopenharmony_ci		 * at the min_pfn PFN.
22862306a36Sopenharmony_ci		 */
22962306a36Sopenharmony_ci		if (s_pfn <= *min_pfn) {
23062306a36Sopenharmony_ci			done = e_pfn - *min_pfn;
23162306a36Sopenharmony_ci		} else {
23262306a36Sopenharmony_ci			done = e_pfn - s_pfn;
23362306a36Sopenharmony_ci			*min_pfn = s_pfn;
23462306a36Sopenharmony_ci		}
23562306a36Sopenharmony_ci		break;
23662306a36Sopenharmony_ci	}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	return done;
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_cistatic int __init xen_free_mfn(unsigned long mfn)
24262306a36Sopenharmony_ci{
24362306a36Sopenharmony_ci	struct xen_memory_reservation reservation = {
24462306a36Sopenharmony_ci		.address_bits = 0,
24562306a36Sopenharmony_ci		.extent_order = 0,
24662306a36Sopenharmony_ci		.domid        = DOMID_SELF
24762306a36Sopenharmony_ci	};
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	set_xen_guest_handle(reservation.extent_start, &mfn);
25062306a36Sopenharmony_ci	reservation.nr_extents = 1;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
25362306a36Sopenharmony_ci}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci/*
25662306a36Sopenharmony_ci * This releases a chunk of memory and then does the identity map. It's used
25762306a36Sopenharmony_ci * as a fallback if the remapping fails.
25862306a36Sopenharmony_ci */
25962306a36Sopenharmony_cistatic void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
26062306a36Sopenharmony_ci			unsigned long end_pfn, unsigned long nr_pages)
26162306a36Sopenharmony_ci{
26262306a36Sopenharmony_ci	unsigned long pfn, end;
26362306a36Sopenharmony_ci	int ret;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	WARN_ON(start_pfn > end_pfn);
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	/* Release pages first. */
26862306a36Sopenharmony_ci	end = min(end_pfn, nr_pages);
26962306a36Sopenharmony_ci	for (pfn = start_pfn; pfn < end; pfn++) {
27062306a36Sopenharmony_ci		unsigned long mfn = pfn_to_mfn(pfn);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci		/* Make sure pfn exists to start with */
27362306a36Sopenharmony_ci		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
27462306a36Sopenharmony_ci			continue;
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci		ret = xen_free_mfn(mfn);
27762306a36Sopenharmony_ci		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci		if (ret == 1) {
28062306a36Sopenharmony_ci			xen_released_pages++;
28162306a36Sopenharmony_ci			if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
28262306a36Sopenharmony_ci				break;
28362306a36Sopenharmony_ci		} else
28462306a36Sopenharmony_ci			break;
28562306a36Sopenharmony_ci	}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	set_phys_range_identity(start_pfn, end_pfn);
28862306a36Sopenharmony_ci}
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci/*
29162306a36Sopenharmony_ci * Helper function to update the p2m and m2p tables and kernel mapping.
29262306a36Sopenharmony_ci */
29362306a36Sopenharmony_cistatic void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	struct mmu_update update = {
29662306a36Sopenharmony_ci		.ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
29762306a36Sopenharmony_ci		.val = pfn
29862306a36Sopenharmony_ci	};
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	/* Update p2m */
30162306a36Sopenharmony_ci	if (!set_phys_to_machine(pfn, mfn)) {
30262306a36Sopenharmony_ci		WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
30362306a36Sopenharmony_ci		     pfn, mfn);
30462306a36Sopenharmony_ci		BUG();
30562306a36Sopenharmony_ci	}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci	/* Update m2p */
30862306a36Sopenharmony_ci	if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
30962306a36Sopenharmony_ci		WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
31062306a36Sopenharmony_ci		     mfn, pfn);
31162306a36Sopenharmony_ci		BUG();
31262306a36Sopenharmony_ci	}
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
31562306a36Sopenharmony_ci					 mfn_pte(mfn, PAGE_KERNEL), 0)) {
31662306a36Sopenharmony_ci		WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
31762306a36Sopenharmony_ci		      mfn, pfn);
31862306a36Sopenharmony_ci		BUG();
31962306a36Sopenharmony_ci	}
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci/*
32362306a36Sopenharmony_ci * This function updates the p2m and m2p tables with an identity map from
32462306a36Sopenharmony_ci * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
32562306a36Sopenharmony_ci * original allocation at remap_pfn. The information needed for remapping is
32662306a36Sopenharmony_ci * saved in the memory itself to avoid the need for allocating buffers. The
32762306a36Sopenharmony_ci * complete remap information is contained in a list of MFNs each containing
32862306a36Sopenharmony_ci * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
32962306a36Sopenharmony_ci * This enables us to preserve the original mfn sequence while doing the
33062306a36Sopenharmony_ci * remapping at a time when the memory management is capable of allocating
33162306a36Sopenharmony_ci * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
33262306a36Sopenharmony_ci * its callers.
33362306a36Sopenharmony_ci */
33462306a36Sopenharmony_cistatic void __init xen_do_set_identity_and_remap_chunk(
33562306a36Sopenharmony_ci        unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
33662306a36Sopenharmony_ci{
33762306a36Sopenharmony_ci	unsigned long buf = (unsigned long)&xen_remap_buf;
33862306a36Sopenharmony_ci	unsigned long mfn_save, mfn;
33962306a36Sopenharmony_ci	unsigned long ident_pfn_iter, remap_pfn_iter;
34062306a36Sopenharmony_ci	unsigned long ident_end_pfn = start_pfn + size;
34162306a36Sopenharmony_ci	unsigned long left = size;
34262306a36Sopenharmony_ci	unsigned int i, chunk;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	WARN_ON(size == 0);
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	mfn_save = virt_to_mfn((void *)buf);
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
34962306a36Sopenharmony_ci	     ident_pfn_iter < ident_end_pfn;
35062306a36Sopenharmony_ci	     ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
35162306a36Sopenharmony_ci		chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci		/* Map first pfn to xen_remap_buf */
35462306a36Sopenharmony_ci		mfn = pfn_to_mfn(ident_pfn_iter);
35562306a36Sopenharmony_ci		set_pte_mfn(buf, mfn, PAGE_KERNEL);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci		/* Save mapping information in page */
35862306a36Sopenharmony_ci		xen_remap_buf.next_area_mfn = xen_remap_mfn;
35962306a36Sopenharmony_ci		xen_remap_buf.target_pfn = remap_pfn_iter;
36062306a36Sopenharmony_ci		xen_remap_buf.size = chunk;
36162306a36Sopenharmony_ci		for (i = 0; i < chunk; i++)
36262306a36Sopenharmony_ci			xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci		/* Put remap buf into list. */
36562306a36Sopenharmony_ci		xen_remap_mfn = mfn;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci		/* Set identity map */
36862306a36Sopenharmony_ci		set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk);
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci		left -= chunk;
37162306a36Sopenharmony_ci	}
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	/* Restore old xen_remap_buf mapping */
37462306a36Sopenharmony_ci	set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
37562306a36Sopenharmony_ci}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci/*
37862306a36Sopenharmony_ci * This function takes a contiguous pfn range that needs to be identity mapped
37962306a36Sopenharmony_ci * and:
38062306a36Sopenharmony_ci *
38162306a36Sopenharmony_ci *  1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
38262306a36Sopenharmony_ci *  2) Calls the do_ function to actually do the mapping/remapping work.
38362306a36Sopenharmony_ci *
38462306a36Sopenharmony_ci * The goal is to not allocate additional memory but to remap the existing
38562306a36Sopenharmony_ci * pages. In the case of an error the underlying memory is simply released back
38662306a36Sopenharmony_ci * to Xen and not remapped.
38762306a36Sopenharmony_ci */
38862306a36Sopenharmony_cistatic unsigned long __init xen_set_identity_and_remap_chunk(
38962306a36Sopenharmony_ci	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
39062306a36Sopenharmony_ci	unsigned long remap_pfn)
39162306a36Sopenharmony_ci{
39262306a36Sopenharmony_ci	unsigned long pfn;
39362306a36Sopenharmony_ci	unsigned long i = 0;
39462306a36Sopenharmony_ci	unsigned long n = end_pfn - start_pfn;
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	if (remap_pfn == 0)
39762306a36Sopenharmony_ci		remap_pfn = nr_pages;
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	while (i < n) {
40062306a36Sopenharmony_ci		unsigned long cur_pfn = start_pfn + i;
40162306a36Sopenharmony_ci		unsigned long left = n - i;
40262306a36Sopenharmony_ci		unsigned long size = left;
40362306a36Sopenharmony_ci		unsigned long remap_range_size;
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci		/* Do not remap pages beyond the current allocation */
40662306a36Sopenharmony_ci		if (cur_pfn >= nr_pages) {
40762306a36Sopenharmony_ci			/* Identity map remaining pages */
40862306a36Sopenharmony_ci			set_phys_range_identity(cur_pfn, cur_pfn + size);
40962306a36Sopenharmony_ci			break;
41062306a36Sopenharmony_ci		}
41162306a36Sopenharmony_ci		if (cur_pfn + size > nr_pages)
41262306a36Sopenharmony_ci			size = nr_pages - cur_pfn;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci		remap_range_size = xen_find_pfn_range(&remap_pfn);
41562306a36Sopenharmony_ci		if (!remap_range_size) {
41662306a36Sopenharmony_ci			pr_warn("Unable to find available pfn range, not remapping identity pages\n");
41762306a36Sopenharmony_ci			xen_set_identity_and_release_chunk(cur_pfn,
41862306a36Sopenharmony_ci						cur_pfn + left, nr_pages);
41962306a36Sopenharmony_ci			break;
42062306a36Sopenharmony_ci		}
42162306a36Sopenharmony_ci		/* Adjust size to fit in current e820 RAM region */
42262306a36Sopenharmony_ci		if (size > remap_range_size)
42362306a36Sopenharmony_ci			size = remap_range_size;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci		xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci		/* Update variables to reflect new mappings. */
42862306a36Sopenharmony_ci		i += size;
42962306a36Sopenharmony_ci		remap_pfn += size;
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	/*
43362306a36Sopenharmony_ci	 * If the PFNs are currently mapped, their VA mappings need to be
43462306a36Sopenharmony_ci	 * zapped.
43562306a36Sopenharmony_ci	 */
43662306a36Sopenharmony_ci	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
43762306a36Sopenharmony_ci		(void)HYPERVISOR_update_va_mapping(
43862306a36Sopenharmony_ci			(unsigned long)__va(pfn << PAGE_SHIFT),
43962306a36Sopenharmony_ci			native_make_pte(0), 0);
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	return remap_pfn;
44262306a36Sopenharmony_ci}
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_cistatic unsigned long __init xen_count_remap_pages(
44562306a36Sopenharmony_ci	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
44662306a36Sopenharmony_ci	unsigned long remap_pages)
44762306a36Sopenharmony_ci{
44862306a36Sopenharmony_ci	if (start_pfn >= nr_pages)
44962306a36Sopenharmony_ci		return remap_pages;
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	return remap_pages + min(end_pfn, nr_pages) - start_pfn;
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_cistatic unsigned long __init xen_foreach_remap_area(unsigned long nr_pages,
45562306a36Sopenharmony_ci	unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn,
45662306a36Sopenharmony_ci			      unsigned long nr_pages, unsigned long last_val))
45762306a36Sopenharmony_ci{
45862306a36Sopenharmony_ci	phys_addr_t start = 0;
45962306a36Sopenharmony_ci	unsigned long ret_val = 0;
46062306a36Sopenharmony_ci	const struct e820_entry *entry = xen_e820_table.entries;
46162306a36Sopenharmony_ci	int i;
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	/*
46462306a36Sopenharmony_ci	 * Combine non-RAM regions and gaps until a RAM region (or the
46562306a36Sopenharmony_ci	 * end of the map) is reached, then call the provided function
46662306a36Sopenharmony_ci	 * to perform its duty on the non-RAM region.
46762306a36Sopenharmony_ci	 *
46862306a36Sopenharmony_ci	 * The combined non-RAM regions are rounded to a whole number
46962306a36Sopenharmony_ci	 * of pages so any partial pages are accessible via the 1:1
47062306a36Sopenharmony_ci	 * mapping.  This is needed for some BIOSes that put (for
47162306a36Sopenharmony_ci	 * example) the DMI tables in a reserved region that begins on
47262306a36Sopenharmony_ci	 * a non-page boundary.
47362306a36Sopenharmony_ci	 */
47462306a36Sopenharmony_ci	for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
47562306a36Sopenharmony_ci		phys_addr_t end = entry->addr + entry->size;
47662306a36Sopenharmony_ci		if (entry->type == E820_TYPE_RAM || i == xen_e820_table.nr_entries - 1) {
47762306a36Sopenharmony_ci			unsigned long start_pfn = PFN_DOWN(start);
47862306a36Sopenharmony_ci			unsigned long end_pfn = PFN_UP(end);
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci			if (entry->type == E820_TYPE_RAM)
48162306a36Sopenharmony_ci				end_pfn = PFN_UP(entry->addr);
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci			if (start_pfn < end_pfn)
48462306a36Sopenharmony_ci				ret_val = func(start_pfn, end_pfn, nr_pages,
48562306a36Sopenharmony_ci					       ret_val);
48662306a36Sopenharmony_ci			start = end;
48762306a36Sopenharmony_ci		}
48862306a36Sopenharmony_ci	}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	return ret_val;
49162306a36Sopenharmony_ci}
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci/*
49462306a36Sopenharmony_ci * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
49562306a36Sopenharmony_ci * The remap information (which mfn remap to which pfn) is contained in the
49662306a36Sopenharmony_ci * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
49762306a36Sopenharmony_ci * This scheme allows to remap the different chunks in arbitrary order while
49862306a36Sopenharmony_ci * the resulting mapping will be independent from the order.
49962306a36Sopenharmony_ci */
50062306a36Sopenharmony_civoid __init xen_remap_memory(void)
50162306a36Sopenharmony_ci{
50262306a36Sopenharmony_ci	unsigned long buf = (unsigned long)&xen_remap_buf;
50362306a36Sopenharmony_ci	unsigned long mfn_save, pfn;
50462306a36Sopenharmony_ci	unsigned long remapped = 0;
50562306a36Sopenharmony_ci	unsigned int i;
50662306a36Sopenharmony_ci	unsigned long pfn_s = ~0UL;
50762306a36Sopenharmony_ci	unsigned long len = 0;
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	mfn_save = virt_to_mfn((void *)buf);
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	while (xen_remap_mfn != INVALID_P2M_ENTRY) {
51262306a36Sopenharmony_ci		/* Map the remap information */
51362306a36Sopenharmony_ci		set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci		BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci		pfn = xen_remap_buf.target_pfn;
51862306a36Sopenharmony_ci		for (i = 0; i < xen_remap_buf.size; i++) {
51962306a36Sopenharmony_ci			xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]);
52062306a36Sopenharmony_ci			remapped++;
52162306a36Sopenharmony_ci			pfn++;
52262306a36Sopenharmony_ci		}
52362306a36Sopenharmony_ci		if (pfn_s == ~0UL || pfn == pfn_s) {
52462306a36Sopenharmony_ci			pfn_s = xen_remap_buf.target_pfn;
52562306a36Sopenharmony_ci			len += xen_remap_buf.size;
52662306a36Sopenharmony_ci		} else if (pfn_s + len == xen_remap_buf.target_pfn) {
52762306a36Sopenharmony_ci			len += xen_remap_buf.size;
52862306a36Sopenharmony_ci		} else {
52962306a36Sopenharmony_ci			xen_del_extra_mem(pfn_s, len);
53062306a36Sopenharmony_ci			pfn_s = xen_remap_buf.target_pfn;
53162306a36Sopenharmony_ci			len = xen_remap_buf.size;
53262306a36Sopenharmony_ci		}
53362306a36Sopenharmony_ci		xen_remap_mfn = xen_remap_buf.next_area_mfn;
53462306a36Sopenharmony_ci	}
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	if (pfn_s != ~0UL && len)
53762306a36Sopenharmony_ci		xen_del_extra_mem(pfn_s, len);
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	pr_info("Remapped %ld page(s)\n", remapped);
54262306a36Sopenharmony_ci}
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_cistatic unsigned long __init xen_get_pages_limit(void)
54562306a36Sopenharmony_ci{
54662306a36Sopenharmony_ci	unsigned long limit;
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	limit = MAXMEM / PAGE_SIZE;
54962306a36Sopenharmony_ci	if (!xen_initial_domain() && xen_512gb_limit)
55062306a36Sopenharmony_ci		limit = GB(512) / PAGE_SIZE;
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	return limit;
55362306a36Sopenharmony_ci}
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_cistatic unsigned long __init xen_get_max_pages(void)
55662306a36Sopenharmony_ci{
55762306a36Sopenharmony_ci	unsigned long max_pages, limit;
55862306a36Sopenharmony_ci	domid_t domid = DOMID_SELF;
55962306a36Sopenharmony_ci	long ret;
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	limit = xen_get_pages_limit();
56262306a36Sopenharmony_ci	max_pages = limit;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	/*
56562306a36Sopenharmony_ci	 * For the initial domain we use the maximum reservation as
56662306a36Sopenharmony_ci	 * the maximum page.
56762306a36Sopenharmony_ci	 *
56862306a36Sopenharmony_ci	 * For guest domains the current maximum reservation reflects
56962306a36Sopenharmony_ci	 * the current maximum rather than the static maximum. In this
57062306a36Sopenharmony_ci	 * case the e820 map provided to us will cover the static
57162306a36Sopenharmony_ci	 * maximum region.
57262306a36Sopenharmony_ci	 */
57362306a36Sopenharmony_ci	if (xen_initial_domain()) {
57462306a36Sopenharmony_ci		ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
57562306a36Sopenharmony_ci		if (ret > 0)
57662306a36Sopenharmony_ci			max_pages = ret;
57762306a36Sopenharmony_ci	}
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	return min(max_pages, limit);
58062306a36Sopenharmony_ci}
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_cistatic void __init xen_align_and_add_e820_region(phys_addr_t start,
58362306a36Sopenharmony_ci						 phys_addr_t size, int type)
58462306a36Sopenharmony_ci{
58562306a36Sopenharmony_ci	phys_addr_t end = start + size;
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	/* Align RAM regions to page boundaries. */
58862306a36Sopenharmony_ci	if (type == E820_TYPE_RAM) {
58962306a36Sopenharmony_ci		start = PAGE_ALIGN(start);
59062306a36Sopenharmony_ci		end &= ~((phys_addr_t)PAGE_SIZE - 1);
59162306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
59262306a36Sopenharmony_ci		/*
59362306a36Sopenharmony_ci		 * Don't allow adding memory not in E820 map while booting the
59462306a36Sopenharmony_ci		 * system. Once the balloon driver is up it will remove that
59562306a36Sopenharmony_ci		 * restriction again.
59662306a36Sopenharmony_ci		 */
59762306a36Sopenharmony_ci		max_mem_size = end;
59862306a36Sopenharmony_ci#endif
59962306a36Sopenharmony_ci	}
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	e820__range_add(start, end - start, type);
60262306a36Sopenharmony_ci}
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_cistatic void __init xen_ignore_unusable(void)
60562306a36Sopenharmony_ci{
60662306a36Sopenharmony_ci	struct e820_entry *entry = xen_e820_table.entries;
60762306a36Sopenharmony_ci	unsigned int i;
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
61062306a36Sopenharmony_ci		if (entry->type == E820_TYPE_UNUSABLE)
61162306a36Sopenharmony_ci			entry->type = E820_TYPE_RAM;
61262306a36Sopenharmony_ci	}
61362306a36Sopenharmony_ci}
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_cibool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
61662306a36Sopenharmony_ci{
61762306a36Sopenharmony_ci	struct e820_entry *entry;
61862306a36Sopenharmony_ci	unsigned mapcnt;
61962306a36Sopenharmony_ci	phys_addr_t end;
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	if (!size)
62262306a36Sopenharmony_ci		return false;
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci	end = start + size;
62562306a36Sopenharmony_ci	entry = xen_e820_table.entries;
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci	for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
62862306a36Sopenharmony_ci		if (entry->type == E820_TYPE_RAM && entry->addr <= start &&
62962306a36Sopenharmony_ci		    (entry->addr + entry->size) >= end)
63062306a36Sopenharmony_ci			return false;
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci		entry++;
63362306a36Sopenharmony_ci	}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	return true;
63662306a36Sopenharmony_ci}
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci/*
63962306a36Sopenharmony_ci * Find a free area in physical memory not yet reserved and compliant with
64062306a36Sopenharmony_ci * E820 map.
64162306a36Sopenharmony_ci * Used to relocate pre-allocated areas like initrd or p2m list which are in
64262306a36Sopenharmony_ci * conflict with the to be used E820 map.
64362306a36Sopenharmony_ci * In case no area is found, return 0. Otherwise return the physical address
64462306a36Sopenharmony_ci * of the area which is already reserved for convenience.
64562306a36Sopenharmony_ci */
64662306a36Sopenharmony_ciphys_addr_t __init xen_find_free_area(phys_addr_t size)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	unsigned mapcnt;
64962306a36Sopenharmony_ci	phys_addr_t addr, start;
65062306a36Sopenharmony_ci	struct e820_entry *entry = xen_e820_table.entries;
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++, entry++) {
65362306a36Sopenharmony_ci		if (entry->type != E820_TYPE_RAM || entry->size < size)
65462306a36Sopenharmony_ci			continue;
65562306a36Sopenharmony_ci		start = entry->addr;
65662306a36Sopenharmony_ci		for (addr = start; addr < start + size; addr += PAGE_SIZE) {
65762306a36Sopenharmony_ci			if (!memblock_is_reserved(addr))
65862306a36Sopenharmony_ci				continue;
65962306a36Sopenharmony_ci			start = addr + PAGE_SIZE;
66062306a36Sopenharmony_ci			if (start + size > entry->addr + entry->size)
66162306a36Sopenharmony_ci				break;
66262306a36Sopenharmony_ci		}
66362306a36Sopenharmony_ci		if (addr >= start + size) {
66462306a36Sopenharmony_ci			memblock_reserve(start, size);
66562306a36Sopenharmony_ci			return start;
66662306a36Sopenharmony_ci		}
66762306a36Sopenharmony_ci	}
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	return 0;
67062306a36Sopenharmony_ci}
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci/*
67362306a36Sopenharmony_ci * Like memcpy, but with physical addresses for dest and src.
67462306a36Sopenharmony_ci */
67562306a36Sopenharmony_cistatic void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
67662306a36Sopenharmony_ci				   phys_addr_t n)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	phys_addr_t dest_off, src_off, dest_len, src_len, len;
67962306a36Sopenharmony_ci	void *from, *to;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	while (n) {
68262306a36Sopenharmony_ci		dest_off = dest & ~PAGE_MASK;
68362306a36Sopenharmony_ci		src_off = src & ~PAGE_MASK;
68462306a36Sopenharmony_ci		dest_len = n;
68562306a36Sopenharmony_ci		if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off)
68662306a36Sopenharmony_ci			dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off;
68762306a36Sopenharmony_ci		src_len = n;
68862306a36Sopenharmony_ci		if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off)
68962306a36Sopenharmony_ci			src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off;
69062306a36Sopenharmony_ci		len = min(dest_len, src_len);
69162306a36Sopenharmony_ci		to = early_memremap(dest - dest_off, dest_len + dest_off);
69262306a36Sopenharmony_ci		from = early_memremap(src - src_off, src_len + src_off);
69362306a36Sopenharmony_ci		memcpy(to, from, len);
69462306a36Sopenharmony_ci		early_memunmap(to, dest_len + dest_off);
69562306a36Sopenharmony_ci		early_memunmap(from, src_len + src_off);
69662306a36Sopenharmony_ci		n -= len;
69762306a36Sopenharmony_ci		dest += len;
69862306a36Sopenharmony_ci		src += len;
69962306a36Sopenharmony_ci	}
70062306a36Sopenharmony_ci}
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci/*
70362306a36Sopenharmony_ci * Reserve Xen mfn_list.
70462306a36Sopenharmony_ci */
70562306a36Sopenharmony_cistatic void __init xen_reserve_xen_mfnlist(void)
70662306a36Sopenharmony_ci{
70762306a36Sopenharmony_ci	phys_addr_t start, size;
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	if (xen_start_info->mfn_list >= __START_KERNEL_map) {
71062306a36Sopenharmony_ci		start = __pa(xen_start_info->mfn_list);
71162306a36Sopenharmony_ci		size = PFN_ALIGN(xen_start_info->nr_pages *
71262306a36Sopenharmony_ci				 sizeof(unsigned long));
71362306a36Sopenharmony_ci	} else {
71462306a36Sopenharmony_ci		start = PFN_PHYS(xen_start_info->first_p2m_pfn);
71562306a36Sopenharmony_ci		size = PFN_PHYS(xen_start_info->nr_p2m_frames);
71662306a36Sopenharmony_ci	}
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	memblock_reserve(start, size);
71962306a36Sopenharmony_ci	if (!xen_is_e820_reserved(start, size))
72062306a36Sopenharmony_ci		return;
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	xen_relocate_p2m();
72362306a36Sopenharmony_ci	memblock_phys_free(start, size);
72462306a36Sopenharmony_ci}
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci/**
72762306a36Sopenharmony_ci * xen_memory_setup - Hook for machine specific memory setup.
72862306a36Sopenharmony_ci **/
72962306a36Sopenharmony_cichar * __init xen_memory_setup(void)
73062306a36Sopenharmony_ci{
73162306a36Sopenharmony_ci	unsigned long max_pfn, pfn_s, n_pfns;
73262306a36Sopenharmony_ci	phys_addr_t mem_end, addr, size, chunk_size;
73362306a36Sopenharmony_ci	u32 type;
73462306a36Sopenharmony_ci	int rc;
73562306a36Sopenharmony_ci	struct xen_memory_map memmap;
73662306a36Sopenharmony_ci	unsigned long max_pages;
73762306a36Sopenharmony_ci	unsigned long extra_pages = 0;
73862306a36Sopenharmony_ci	int i;
73962306a36Sopenharmony_ci	int op;
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	xen_parse_512gb();
74262306a36Sopenharmony_ci	max_pfn = xen_get_pages_limit();
74362306a36Sopenharmony_ci	max_pfn = min(max_pfn, xen_start_info->nr_pages);
74462306a36Sopenharmony_ci	mem_end = PFN_PHYS(max_pfn);
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries);
74762306a36Sopenharmony_ci	set_xen_guest_handle(memmap.buffer, xen_e820_table.entries);
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
75062306a36Sopenharmony_ci	xen_saved_max_mem_size = max_mem_size;
75162306a36Sopenharmony_ci#endif
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci	op = xen_initial_domain() ?
75462306a36Sopenharmony_ci		XENMEM_machine_memory_map :
75562306a36Sopenharmony_ci		XENMEM_memory_map;
75662306a36Sopenharmony_ci	rc = HYPERVISOR_memory_op(op, &memmap);
75762306a36Sopenharmony_ci	if (rc == -ENOSYS) {
75862306a36Sopenharmony_ci		BUG_ON(xen_initial_domain());
75962306a36Sopenharmony_ci		memmap.nr_entries = 1;
76062306a36Sopenharmony_ci		xen_e820_table.entries[0].addr = 0ULL;
76162306a36Sopenharmony_ci		xen_e820_table.entries[0].size = mem_end;
76262306a36Sopenharmony_ci		/* 8MB slack (to balance backend allocations). */
76362306a36Sopenharmony_ci		xen_e820_table.entries[0].size += 8ULL << 20;
76462306a36Sopenharmony_ci		xen_e820_table.entries[0].type = E820_TYPE_RAM;
76562306a36Sopenharmony_ci		rc = 0;
76662306a36Sopenharmony_ci	}
76762306a36Sopenharmony_ci	BUG_ON(rc);
76862306a36Sopenharmony_ci	BUG_ON(memmap.nr_entries == 0);
76962306a36Sopenharmony_ci	xen_e820_table.nr_entries = memmap.nr_entries;
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci	if (xen_initial_domain()) {
77262306a36Sopenharmony_ci		/*
77362306a36Sopenharmony_ci		 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
77462306a36Sopenharmony_ci		 * regions, so if we're using the machine memory map leave the
77562306a36Sopenharmony_ci		 * region as RAM as it is in the pseudo-physical map.
77662306a36Sopenharmony_ci		 *
77762306a36Sopenharmony_ci		 * UNUSABLE regions in domUs are not handled and will need
77862306a36Sopenharmony_ci		 * a patch in the future.
77962306a36Sopenharmony_ci		 */
78062306a36Sopenharmony_ci		xen_ignore_unusable();
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci#ifdef CONFIG_ISCSI_IBFT_FIND
78362306a36Sopenharmony_ci		/* Reserve 0.5 MiB to 1 MiB region so iBFT can be found */
78462306a36Sopenharmony_ci		xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START;
78562306a36Sopenharmony_ci		xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START;
78662306a36Sopenharmony_ci		xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED;
78762306a36Sopenharmony_ci		xen_e820_table.nr_entries++;
78862306a36Sopenharmony_ci#endif
78962306a36Sopenharmony_ci	}
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	/* Make sure the Xen-supplied memory map is well-ordered. */
79262306a36Sopenharmony_ci	e820__update_table(&xen_e820_table);
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	max_pages = xen_get_max_pages();
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	/* How many extra pages do we need due to remapping? */
79762306a36Sopenharmony_ci	max_pages += xen_foreach_remap_area(max_pfn, xen_count_remap_pages);
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	if (max_pages > max_pfn)
80062306a36Sopenharmony_ci		extra_pages += max_pages - max_pfn;
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	/*
80362306a36Sopenharmony_ci	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
80462306a36Sopenharmony_ci	 * factor the base size.
80562306a36Sopenharmony_ci	 *
80662306a36Sopenharmony_ci	 * Make sure we have no memory above max_pages, as this area
80762306a36Sopenharmony_ci	 * isn't handled by the p2m management.
80862306a36Sopenharmony_ci	 */
80962306a36Sopenharmony_ci	extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
81062306a36Sopenharmony_ci			   extra_pages, max_pages - max_pfn);
81162306a36Sopenharmony_ci	i = 0;
81262306a36Sopenharmony_ci	addr = xen_e820_table.entries[0].addr;
81362306a36Sopenharmony_ci	size = xen_e820_table.entries[0].size;
81462306a36Sopenharmony_ci	while (i < xen_e820_table.nr_entries) {
81562306a36Sopenharmony_ci		bool discard = false;
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci		chunk_size = size;
81862306a36Sopenharmony_ci		type = xen_e820_table.entries[i].type;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci		if (type == E820_TYPE_RESERVED)
82162306a36Sopenharmony_ci			xen_pv_pci_possible = true;
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci		if (type == E820_TYPE_RAM) {
82462306a36Sopenharmony_ci			if (addr < mem_end) {
82562306a36Sopenharmony_ci				chunk_size = min(size, mem_end - addr);
82662306a36Sopenharmony_ci			} else if (extra_pages) {
82762306a36Sopenharmony_ci				chunk_size = min(size, PFN_PHYS(extra_pages));
82862306a36Sopenharmony_ci				pfn_s = PFN_UP(addr);
82962306a36Sopenharmony_ci				n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
83062306a36Sopenharmony_ci				extra_pages -= n_pfns;
83162306a36Sopenharmony_ci				xen_add_extra_mem(pfn_s, n_pfns);
83262306a36Sopenharmony_ci				xen_max_p2m_pfn = pfn_s + n_pfns;
83362306a36Sopenharmony_ci			} else
83462306a36Sopenharmony_ci				discard = true;
83562306a36Sopenharmony_ci		}
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_ci		if (!discard)
83862306a36Sopenharmony_ci			xen_align_and_add_e820_region(addr, chunk_size, type);
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci		addr += chunk_size;
84162306a36Sopenharmony_ci		size -= chunk_size;
84262306a36Sopenharmony_ci		if (size == 0) {
84362306a36Sopenharmony_ci			i++;
84462306a36Sopenharmony_ci			if (i < xen_e820_table.nr_entries) {
84562306a36Sopenharmony_ci				addr = xen_e820_table.entries[i].addr;
84662306a36Sopenharmony_ci				size = xen_e820_table.entries[i].size;
84762306a36Sopenharmony_ci			}
84862306a36Sopenharmony_ci		}
84962306a36Sopenharmony_ci	}
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci	/*
85262306a36Sopenharmony_ci	 * Set the rest as identity mapped, in case PCI BARs are
85362306a36Sopenharmony_ci	 * located here.
85462306a36Sopenharmony_ci	 */
85562306a36Sopenharmony_ci	set_phys_range_identity(addr / PAGE_SIZE, ~0ul);
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	/*
85862306a36Sopenharmony_ci	 * In domU, the ISA region is normal, usable memory, but we
85962306a36Sopenharmony_ci	 * reserve ISA memory anyway because too many things poke
86062306a36Sopenharmony_ci	 * about in there.
86162306a36Sopenharmony_ci	 */
86262306a36Sopenharmony_ci	e820__range_add(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_TYPE_RESERVED);
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci	e820__update_table(e820_table);
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci	/*
86762306a36Sopenharmony_ci	 * Check whether the kernel itself conflicts with the target E820 map.
86862306a36Sopenharmony_ci	 * Failing now is better than running into weird problems later due
86962306a36Sopenharmony_ci	 * to relocating (and even reusing) pages with kernel text or data.
87062306a36Sopenharmony_ci	 */
87162306a36Sopenharmony_ci	if (xen_is_e820_reserved(__pa_symbol(_text),
87262306a36Sopenharmony_ci			__pa_symbol(__bss_stop) - __pa_symbol(_text))) {
87362306a36Sopenharmony_ci		xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n");
87462306a36Sopenharmony_ci		BUG();
87562306a36Sopenharmony_ci	}
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci	/*
87862306a36Sopenharmony_ci	 * Check for a conflict of the hypervisor supplied page tables with
87962306a36Sopenharmony_ci	 * the target E820 map.
88062306a36Sopenharmony_ci	 */
88162306a36Sopenharmony_ci	xen_pt_check_e820();
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	xen_reserve_xen_mfnlist();
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	/* Check for a conflict of the initrd with the target E820 map. */
88662306a36Sopenharmony_ci	if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image,
88762306a36Sopenharmony_ci				 boot_params.hdr.ramdisk_size)) {
88862306a36Sopenharmony_ci		phys_addr_t new_area, start, size;
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci		new_area = xen_find_free_area(boot_params.hdr.ramdisk_size);
89162306a36Sopenharmony_ci		if (!new_area) {
89262306a36Sopenharmony_ci			xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n");
89362306a36Sopenharmony_ci			BUG();
89462306a36Sopenharmony_ci		}
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci		start = boot_params.hdr.ramdisk_image;
89762306a36Sopenharmony_ci		size = boot_params.hdr.ramdisk_size;
89862306a36Sopenharmony_ci		xen_phys_memcpy(new_area, start, size);
89962306a36Sopenharmony_ci		pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
90062306a36Sopenharmony_ci			start, start + size, new_area, new_area + size);
90162306a36Sopenharmony_ci		memblock_phys_free(start, size);
90262306a36Sopenharmony_ci		boot_params.hdr.ramdisk_image = new_area;
90362306a36Sopenharmony_ci		boot_params.ext_ramdisk_image = new_area >> 32;
90462306a36Sopenharmony_ci	}
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	/*
90762306a36Sopenharmony_ci	 * Set identity map on non-RAM pages and prepare remapping the
90862306a36Sopenharmony_ci	 * underlying RAM.
90962306a36Sopenharmony_ci	 */
91062306a36Sopenharmony_ci	xen_foreach_remap_area(max_pfn, xen_set_identity_and_remap_chunk);
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	pr_info("Released %ld page(s)\n", xen_released_pages);
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	return "Xen";
91562306a36Sopenharmony_ci}
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_cistatic int register_callback(unsigned type, const void *func)
91862306a36Sopenharmony_ci{
91962306a36Sopenharmony_ci	struct callback_register callback = {
92062306a36Sopenharmony_ci		.type = type,
92162306a36Sopenharmony_ci		.address = XEN_CALLBACK(__KERNEL_CS, func),
92262306a36Sopenharmony_ci		.flags = CALLBACKF_mask_events,
92362306a36Sopenharmony_ci	};
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
92662306a36Sopenharmony_ci}
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_civoid xen_enable_sysenter(void)
92962306a36Sopenharmony_ci{
93062306a36Sopenharmony_ci	if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) &&
93162306a36Sopenharmony_ci	    register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat))
93262306a36Sopenharmony_ci		setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
93362306a36Sopenharmony_ci}
93462306a36Sopenharmony_ci
93562306a36Sopenharmony_civoid xen_enable_syscall(void)
93662306a36Sopenharmony_ci{
93762306a36Sopenharmony_ci	int ret;
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
94062306a36Sopenharmony_ci	if (ret != 0) {
94162306a36Sopenharmony_ci		printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
94262306a36Sopenharmony_ci		/* Pretty fatal; 64-bit userspace has no other
94362306a36Sopenharmony_ci		   mechanism for syscalls. */
94462306a36Sopenharmony_ci	}
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci	if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) &&
94762306a36Sopenharmony_ci	    register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat))
94862306a36Sopenharmony_ci		setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
94962306a36Sopenharmony_ci}
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_cistatic void __init xen_pvmmu_arch_setup(void)
95262306a36Sopenharmony_ci{
95362306a36Sopenharmony_ci	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci	if (register_callback(CALLBACKTYPE_event,
95662306a36Sopenharmony_ci			      xen_asm_exc_xen_hypervisor_callback) ||
95762306a36Sopenharmony_ci	    register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
95862306a36Sopenharmony_ci		BUG();
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	xen_enable_sysenter();
96162306a36Sopenharmony_ci	xen_enable_syscall();
96262306a36Sopenharmony_ci}
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci/* This function is not called for HVM domains */
96562306a36Sopenharmony_civoid __init xen_arch_setup(void)
96662306a36Sopenharmony_ci{
96762306a36Sopenharmony_ci	xen_panic_handler_init();
96862306a36Sopenharmony_ci	xen_pvmmu_arch_setup();
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci#ifdef CONFIG_ACPI
97162306a36Sopenharmony_ci	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
97262306a36Sopenharmony_ci		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
97362306a36Sopenharmony_ci		disable_acpi();
97462306a36Sopenharmony_ci	}
97562306a36Sopenharmony_ci#endif
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci	memcpy(boot_command_line, xen_start_info->cmd_line,
97862306a36Sopenharmony_ci	       MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
97962306a36Sopenharmony_ci	       COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	/* Set up idle, making sure it calls safe_halt() pvop */
98262306a36Sopenharmony_ci	disable_cpuidle();
98362306a36Sopenharmony_ci	disable_cpufreq();
98462306a36Sopenharmony_ci	WARN_ON(xen_set_default_idle());
98562306a36Sopenharmony_ci#ifdef CONFIG_NUMA
98662306a36Sopenharmony_ci	numa_off = 1;
98762306a36Sopenharmony_ci#endif
98862306a36Sopenharmony_ci}
989