162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Virtual Memory Map support
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * (C) 2007 sgi. Christoph Lameter.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
862306a36Sopenharmony_ci * virt_to_page, page_address() to be implemented as a base offset
962306a36Sopenharmony_ci * calculation without memory access.
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * However, virtual mappings need a page table and TLBs. Many Linux
1262306a36Sopenharmony_ci * architectures already map their physical space using 1-1 mappings
1362306a36Sopenharmony_ci * via TLBs. For those arches the virtual memory map is essentially
1462306a36Sopenharmony_ci * for free if we use the same page size as the 1-1 mappings. In that
1562306a36Sopenharmony_ci * case the overhead consists of a few additional pages that are
1662306a36Sopenharmony_ci * allocated to create a view of memory for vmemmap.
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * The architecture is expected to provide a vmemmap_populate() function
1962306a36Sopenharmony_ci * to instantiate the mapping.
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ci#include <linux/mm.h>
2262306a36Sopenharmony_ci#include <linux/mmzone.h>
2362306a36Sopenharmony_ci#include <linux/memblock.h>
2462306a36Sopenharmony_ci#include <linux/memremap.h>
2562306a36Sopenharmony_ci#include <linux/highmem.h>
2662306a36Sopenharmony_ci#include <linux/slab.h>
2762306a36Sopenharmony_ci#include <linux/spinlock.h>
2862306a36Sopenharmony_ci#include <linux/vmalloc.h>
2962306a36Sopenharmony_ci#include <linux/sched.h>
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#include <asm/dma.h>
3262306a36Sopenharmony_ci#include <asm/pgalloc.h>
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci/*
3562306a36Sopenharmony_ci * Allocate a block of memory to be used to back the virtual memory map
3662306a36Sopenharmony_ci * or to back the page tables that are used to create the mapping.
3762306a36Sopenharmony_ci * Uses the main allocators if they are available, else bootmem.
3862306a36Sopenharmony_ci */
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic void * __ref __earlyonly_bootmem_alloc(int node,
4162306a36Sopenharmony_ci				unsigned long size,
4262306a36Sopenharmony_ci				unsigned long align,
4362306a36Sopenharmony_ci				unsigned long goal)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	return memblock_alloc_try_nid_raw(size, align, goal,
4662306a36Sopenharmony_ci					       MEMBLOCK_ALLOC_ACCESSIBLE, node);
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_civoid * __meminit vmemmap_alloc_block(unsigned long size, int node)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	/* If the main allocator is up use that, fallback to bootmem. */
5262306a36Sopenharmony_ci	if (slab_is_available()) {
5362306a36Sopenharmony_ci		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
5462306a36Sopenharmony_ci		int order = get_order(size);
5562306a36Sopenharmony_ci		static bool warned;
5662306a36Sopenharmony_ci		struct page *page;
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci		page = alloc_pages_node(node, gfp_mask, order);
5962306a36Sopenharmony_ci		if (page)
6062306a36Sopenharmony_ci			return page_address(page);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci		if (!warned) {
6362306a36Sopenharmony_ci			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
6462306a36Sopenharmony_ci				   "vmemmap alloc failure: order:%u", order);
6562306a36Sopenharmony_ci			warned = true;
6662306a36Sopenharmony_ci		}
6762306a36Sopenharmony_ci		return NULL;
6862306a36Sopenharmony_ci	} else
6962306a36Sopenharmony_ci		return __earlyonly_bootmem_alloc(node, size, size,
7062306a36Sopenharmony_ci				__pa(MAX_DMA_ADDRESS));
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_cistatic void * __meminit altmap_alloc_block_buf(unsigned long size,
7462306a36Sopenharmony_ci					       struct vmem_altmap *altmap);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci/* need to make sure size is all the same during early stage */
7762306a36Sopenharmony_civoid * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
7862306a36Sopenharmony_ci					 struct vmem_altmap *altmap)
7962306a36Sopenharmony_ci{
8062306a36Sopenharmony_ci	void *ptr;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	if (altmap)
8362306a36Sopenharmony_ci		return altmap_alloc_block_buf(size, altmap);
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	ptr = sparse_buffer_alloc(size);
8662306a36Sopenharmony_ci	if (!ptr)
8762306a36Sopenharmony_ci		ptr = vmemmap_alloc_block(size, node);
8862306a36Sopenharmony_ci	return ptr;
8962306a36Sopenharmony_ci}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_cistatic unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
9262306a36Sopenharmony_ci{
9362306a36Sopenharmony_ci	return altmap->base_pfn + altmap->reserve + altmap->alloc
9462306a36Sopenharmony_ci		+ altmap->align;
9562306a36Sopenharmony_ci}
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cistatic unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	unsigned long allocated = altmap->alloc + altmap->align;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	if (altmap->free > allocated)
10262306a36Sopenharmony_ci		return altmap->free - allocated;
10362306a36Sopenharmony_ci	return 0;
10462306a36Sopenharmony_ci}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_cistatic void * __meminit altmap_alloc_block_buf(unsigned long size,
10762306a36Sopenharmony_ci					       struct vmem_altmap *altmap)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	unsigned long pfn, nr_pfns, nr_align;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	if (size & ~PAGE_MASK) {
11262306a36Sopenharmony_ci		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
11362306a36Sopenharmony_ci				__func__, size);
11462306a36Sopenharmony_ci		return NULL;
11562306a36Sopenharmony_ci	}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	pfn = vmem_altmap_next_pfn(altmap);
11862306a36Sopenharmony_ci	nr_pfns = size >> PAGE_SHIFT;
11962306a36Sopenharmony_ci	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
12062306a36Sopenharmony_ci	nr_align = ALIGN(pfn, nr_align) - pfn;
12162306a36Sopenharmony_ci	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
12262306a36Sopenharmony_ci		return NULL;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	altmap->alloc += nr_pfns;
12562306a36Sopenharmony_ci	altmap->align += nr_align;
12662306a36Sopenharmony_ci	pfn += nr_align;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
12962306a36Sopenharmony_ci			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
13062306a36Sopenharmony_ci	return __va(__pfn_to_phys(pfn));
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_civoid __meminit vmemmap_verify(pte_t *pte, int node,
13462306a36Sopenharmony_ci				unsigned long start, unsigned long end)
13562306a36Sopenharmony_ci{
13662306a36Sopenharmony_ci	unsigned long pfn = pte_pfn(ptep_get(pte));
13762306a36Sopenharmony_ci	int actual_node = early_pfn_to_nid(pfn);
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
14062306a36Sopenharmony_ci		pr_warn_once("[%lx-%lx] potential offnode page_structs\n",
14162306a36Sopenharmony_ci			start, end - 1);
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_cipte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
14562306a36Sopenharmony_ci				       struct vmem_altmap *altmap,
14662306a36Sopenharmony_ci				       struct page *reuse)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	pte_t *pte = pte_offset_kernel(pmd, addr);
14962306a36Sopenharmony_ci	if (pte_none(ptep_get(pte))) {
15062306a36Sopenharmony_ci		pte_t entry;
15162306a36Sopenharmony_ci		void *p;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci		if (!reuse) {
15462306a36Sopenharmony_ci			p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
15562306a36Sopenharmony_ci			if (!p)
15662306a36Sopenharmony_ci				return NULL;
15762306a36Sopenharmony_ci		} else {
15862306a36Sopenharmony_ci			/*
15962306a36Sopenharmony_ci			 * When a PTE/PMD entry is freed from the init_mm
16062306a36Sopenharmony_ci			 * there's a free_pages() call to this page allocated
16162306a36Sopenharmony_ci			 * above. Thus this get_page() is paired with the
16262306a36Sopenharmony_ci			 * put_page_testzero() on the freeing path.
16362306a36Sopenharmony_ci			 * This can only called by certain ZONE_DEVICE path,
16462306a36Sopenharmony_ci			 * and through vmemmap_populate_compound_pages() when
16562306a36Sopenharmony_ci			 * slab is available.
16662306a36Sopenharmony_ci			 */
16762306a36Sopenharmony_ci			get_page(reuse);
16862306a36Sopenharmony_ci			p = page_to_virt(reuse);
16962306a36Sopenharmony_ci		}
17062306a36Sopenharmony_ci		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
17162306a36Sopenharmony_ci		set_pte_at(&init_mm, addr, pte, entry);
17262306a36Sopenharmony_ci	}
17362306a36Sopenharmony_ci	return pte;
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_cistatic void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci	void *p = vmemmap_alloc_block(size, node);
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	if (!p)
18162306a36Sopenharmony_ci		return NULL;
18262306a36Sopenharmony_ci	memset(p, 0, size);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	return p;
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_cipmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
18862306a36Sopenharmony_ci{
18962306a36Sopenharmony_ci	pmd_t *pmd = pmd_offset(pud, addr);
19062306a36Sopenharmony_ci	if (pmd_none(*pmd)) {
19162306a36Sopenharmony_ci		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
19262306a36Sopenharmony_ci		if (!p)
19362306a36Sopenharmony_ci			return NULL;
19462306a36Sopenharmony_ci		pmd_populate_kernel(&init_mm, pmd, p);
19562306a36Sopenharmony_ci	}
19662306a36Sopenharmony_ci	return pmd;
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_civoid __weak __meminit pmd_init(void *addr)
20062306a36Sopenharmony_ci{
20162306a36Sopenharmony_ci}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_cipud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
20462306a36Sopenharmony_ci{
20562306a36Sopenharmony_ci	pud_t *pud = pud_offset(p4d, addr);
20662306a36Sopenharmony_ci	if (pud_none(*pud)) {
20762306a36Sopenharmony_ci		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
20862306a36Sopenharmony_ci		if (!p)
20962306a36Sopenharmony_ci			return NULL;
21062306a36Sopenharmony_ci		pmd_init(p);
21162306a36Sopenharmony_ci		pud_populate(&init_mm, pud, p);
21262306a36Sopenharmony_ci	}
21362306a36Sopenharmony_ci	return pud;
21462306a36Sopenharmony_ci}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_civoid __weak __meminit pud_init(void *addr)
21762306a36Sopenharmony_ci{
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cip4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
22162306a36Sopenharmony_ci{
22262306a36Sopenharmony_ci	p4d_t *p4d = p4d_offset(pgd, addr);
22362306a36Sopenharmony_ci	if (p4d_none(*p4d)) {
22462306a36Sopenharmony_ci		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
22562306a36Sopenharmony_ci		if (!p)
22662306a36Sopenharmony_ci			return NULL;
22762306a36Sopenharmony_ci		pud_init(p);
22862306a36Sopenharmony_ci		p4d_populate(&init_mm, p4d, p);
22962306a36Sopenharmony_ci	}
23062306a36Sopenharmony_ci	return p4d;
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cipgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	pgd_t *pgd = pgd_offset_k(addr);
23662306a36Sopenharmony_ci	if (pgd_none(*pgd)) {
23762306a36Sopenharmony_ci		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
23862306a36Sopenharmony_ci		if (!p)
23962306a36Sopenharmony_ci			return NULL;
24062306a36Sopenharmony_ci		pgd_populate(&init_mm, pgd, p);
24162306a36Sopenharmony_ci	}
24262306a36Sopenharmony_ci	return pgd;
24362306a36Sopenharmony_ci}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_cistatic pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node,
24662306a36Sopenharmony_ci					      struct vmem_altmap *altmap,
24762306a36Sopenharmony_ci					      struct page *reuse)
24862306a36Sopenharmony_ci{
24962306a36Sopenharmony_ci	pgd_t *pgd;
25062306a36Sopenharmony_ci	p4d_t *p4d;
25162306a36Sopenharmony_ci	pud_t *pud;
25262306a36Sopenharmony_ci	pmd_t *pmd;
25362306a36Sopenharmony_ci	pte_t *pte;
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	pgd = vmemmap_pgd_populate(addr, node);
25662306a36Sopenharmony_ci	if (!pgd)
25762306a36Sopenharmony_ci		return NULL;
25862306a36Sopenharmony_ci	p4d = vmemmap_p4d_populate(pgd, addr, node);
25962306a36Sopenharmony_ci	if (!p4d)
26062306a36Sopenharmony_ci		return NULL;
26162306a36Sopenharmony_ci	pud = vmemmap_pud_populate(p4d, addr, node);
26262306a36Sopenharmony_ci	if (!pud)
26362306a36Sopenharmony_ci		return NULL;
26462306a36Sopenharmony_ci	pmd = vmemmap_pmd_populate(pud, addr, node);
26562306a36Sopenharmony_ci	if (!pmd)
26662306a36Sopenharmony_ci		return NULL;
26762306a36Sopenharmony_ci	pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse);
26862306a36Sopenharmony_ci	if (!pte)
26962306a36Sopenharmony_ci		return NULL;
27062306a36Sopenharmony_ci	vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	return pte;
27362306a36Sopenharmony_ci}
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_cistatic int __meminit vmemmap_populate_range(unsigned long start,
27662306a36Sopenharmony_ci					    unsigned long end, int node,
27762306a36Sopenharmony_ci					    struct vmem_altmap *altmap,
27862306a36Sopenharmony_ci					    struct page *reuse)
27962306a36Sopenharmony_ci{
28062306a36Sopenharmony_ci	unsigned long addr = start;
28162306a36Sopenharmony_ci	pte_t *pte;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	for (; addr < end; addr += PAGE_SIZE) {
28462306a36Sopenharmony_ci		pte = vmemmap_populate_address(addr, node, altmap, reuse);
28562306a36Sopenharmony_ci		if (!pte)
28662306a36Sopenharmony_ci			return -ENOMEM;
28762306a36Sopenharmony_ci	}
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	return 0;
29062306a36Sopenharmony_ci}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ciint __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
29362306a36Sopenharmony_ci					 int node, struct vmem_altmap *altmap)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	return vmemmap_populate_range(start, end, node, altmap, NULL);
29662306a36Sopenharmony_ci}
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_civoid __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
29962306a36Sopenharmony_ci				      unsigned long addr, unsigned long next)
30062306a36Sopenharmony_ci{
30162306a36Sopenharmony_ci}
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ciint __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
30462306a36Sopenharmony_ci				       unsigned long addr, unsigned long next)
30562306a36Sopenharmony_ci{
30662306a36Sopenharmony_ci	return 0;
30762306a36Sopenharmony_ci}
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ciint __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
31062306a36Sopenharmony_ci					 int node, struct vmem_altmap *altmap)
31162306a36Sopenharmony_ci{
31262306a36Sopenharmony_ci	unsigned long addr;
31362306a36Sopenharmony_ci	unsigned long next;
31462306a36Sopenharmony_ci	pgd_t *pgd;
31562306a36Sopenharmony_ci	p4d_t *p4d;
31662306a36Sopenharmony_ci	pud_t *pud;
31762306a36Sopenharmony_ci	pmd_t *pmd;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	for (addr = start; addr < end; addr = next) {
32062306a36Sopenharmony_ci		next = pmd_addr_end(addr, end);
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci		pgd = vmemmap_pgd_populate(addr, node);
32362306a36Sopenharmony_ci		if (!pgd)
32462306a36Sopenharmony_ci			return -ENOMEM;
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci		p4d = vmemmap_p4d_populate(pgd, addr, node);
32762306a36Sopenharmony_ci		if (!p4d)
32862306a36Sopenharmony_ci			return -ENOMEM;
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci		pud = vmemmap_pud_populate(p4d, addr, node);
33162306a36Sopenharmony_ci		if (!pud)
33262306a36Sopenharmony_ci			return -ENOMEM;
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci		pmd = pmd_offset(pud, addr);
33562306a36Sopenharmony_ci		if (pmd_none(READ_ONCE(*pmd))) {
33662306a36Sopenharmony_ci			void *p;
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
33962306a36Sopenharmony_ci			if (p) {
34062306a36Sopenharmony_ci				vmemmap_set_pmd(pmd, p, node, addr, next);
34162306a36Sopenharmony_ci				continue;
34262306a36Sopenharmony_ci			} else if (altmap) {
34362306a36Sopenharmony_ci				/*
34462306a36Sopenharmony_ci				 * No fallback: In any case we care about, the
34562306a36Sopenharmony_ci				 * altmap should be reasonably sized and aligned
34662306a36Sopenharmony_ci				 * such that vmemmap_alloc_block_buf() will always
34762306a36Sopenharmony_ci				 * succeed. For consistency with the PTE case,
34862306a36Sopenharmony_ci				 * return an error here as failure could indicate
34962306a36Sopenharmony_ci				 * a configuration issue with the size of the altmap.
35062306a36Sopenharmony_ci				 */
35162306a36Sopenharmony_ci				return -ENOMEM;
35262306a36Sopenharmony_ci			}
35362306a36Sopenharmony_ci		} else if (vmemmap_check_pmd(pmd, node, addr, next))
35462306a36Sopenharmony_ci			continue;
35562306a36Sopenharmony_ci		if (vmemmap_populate_basepages(addr, next, node, altmap))
35662306a36Sopenharmony_ci			return -ENOMEM;
35762306a36Sopenharmony_ci	}
35862306a36Sopenharmony_ci	return 0;
35962306a36Sopenharmony_ci}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci#ifndef vmemmap_populate_compound_pages
36262306a36Sopenharmony_ci/*
36362306a36Sopenharmony_ci * For compound pages bigger than section size (e.g. x86 1G compound
36462306a36Sopenharmony_ci * pages with 2M subsection size) fill the rest of sections as tail
36562306a36Sopenharmony_ci * pages.
36662306a36Sopenharmony_ci *
36762306a36Sopenharmony_ci * Note that memremap_pages() resets @nr_range value and will increment
36862306a36Sopenharmony_ci * it after each range successful onlining. Thus the value or @nr_range
36962306a36Sopenharmony_ci * at section memmap populate corresponds to the in-progress range
37062306a36Sopenharmony_ci * being onlined here.
37162306a36Sopenharmony_ci */
37262306a36Sopenharmony_cistatic bool __meminit reuse_compound_section(unsigned long start_pfn,
37362306a36Sopenharmony_ci					     struct dev_pagemap *pgmap)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
37662306a36Sopenharmony_ci	unsigned long offset = start_pfn -
37762306a36Sopenharmony_ci		PHYS_PFN(pgmap->ranges[pgmap->nr_range].start);
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci	return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION;
38062306a36Sopenharmony_ci}
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_cistatic pte_t * __meminit compound_section_tail_page(unsigned long addr)
38362306a36Sopenharmony_ci{
38462306a36Sopenharmony_ci	pte_t *pte;
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	addr -= PAGE_SIZE;
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	/*
38962306a36Sopenharmony_ci	 * Assuming sections are populated sequentially, the previous section's
39062306a36Sopenharmony_ci	 * page data can be reused.
39162306a36Sopenharmony_ci	 */
39262306a36Sopenharmony_ci	pte = pte_offset_kernel(pmd_off_k(addr), addr);
39362306a36Sopenharmony_ci	if (!pte)
39462306a36Sopenharmony_ci		return NULL;
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	return pte;
39762306a36Sopenharmony_ci}
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_cistatic int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
40062306a36Sopenharmony_ci						     unsigned long start,
40162306a36Sopenharmony_ci						     unsigned long end, int node,
40262306a36Sopenharmony_ci						     struct dev_pagemap *pgmap)
40362306a36Sopenharmony_ci{
40462306a36Sopenharmony_ci	unsigned long size, addr;
40562306a36Sopenharmony_ci	pte_t *pte;
40662306a36Sopenharmony_ci	int rc;
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	if (reuse_compound_section(start_pfn, pgmap)) {
40962306a36Sopenharmony_ci		pte = compound_section_tail_page(start);
41062306a36Sopenharmony_ci		if (!pte)
41162306a36Sopenharmony_ci			return -ENOMEM;
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci		/*
41462306a36Sopenharmony_ci		 * Reuse the page that was populated in the prior iteration
41562306a36Sopenharmony_ci		 * with just tail struct pages.
41662306a36Sopenharmony_ci		 */
41762306a36Sopenharmony_ci		return vmemmap_populate_range(start, end, node, NULL,
41862306a36Sopenharmony_ci					      pte_page(ptep_get(pte)));
41962306a36Sopenharmony_ci	}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
42262306a36Sopenharmony_ci	for (addr = start; addr < end; addr += size) {
42362306a36Sopenharmony_ci		unsigned long next, last = addr + size;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci		/* Populate the head page vmemmap page */
42662306a36Sopenharmony_ci		pte = vmemmap_populate_address(addr, node, NULL, NULL);
42762306a36Sopenharmony_ci		if (!pte)
42862306a36Sopenharmony_ci			return -ENOMEM;
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci		/* Populate the tail pages vmemmap page */
43162306a36Sopenharmony_ci		next = addr + PAGE_SIZE;
43262306a36Sopenharmony_ci		pte = vmemmap_populate_address(next, node, NULL, NULL);
43362306a36Sopenharmony_ci		if (!pte)
43462306a36Sopenharmony_ci			return -ENOMEM;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci		/*
43762306a36Sopenharmony_ci		 * Reuse the previous page for the rest of tail pages
43862306a36Sopenharmony_ci		 * See layout diagram in Documentation/mm/vmemmap_dedup.rst
43962306a36Sopenharmony_ci		 */
44062306a36Sopenharmony_ci		next += PAGE_SIZE;
44162306a36Sopenharmony_ci		rc = vmemmap_populate_range(next, last, node, NULL,
44262306a36Sopenharmony_ci					    pte_page(ptep_get(pte)));
44362306a36Sopenharmony_ci		if (rc)
44462306a36Sopenharmony_ci			return -ENOMEM;
44562306a36Sopenharmony_ci	}
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci	return 0;
44862306a36Sopenharmony_ci}
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci#endif
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_cistruct page * __meminit __populate_section_memmap(unsigned long pfn,
45362306a36Sopenharmony_ci		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
45462306a36Sopenharmony_ci		struct dev_pagemap *pgmap)
45562306a36Sopenharmony_ci{
45662306a36Sopenharmony_ci	unsigned long start = (unsigned long) pfn_to_page(pfn);
45762306a36Sopenharmony_ci	unsigned long end = start + nr_pages * sizeof(struct page);
45862306a36Sopenharmony_ci	int r;
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
46162306a36Sopenharmony_ci		!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
46262306a36Sopenharmony_ci		return NULL;
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci	if (vmemmap_can_optimize(altmap, pgmap))
46562306a36Sopenharmony_ci		r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
46662306a36Sopenharmony_ci	else
46762306a36Sopenharmony_ci		r = vmemmap_populate(start, end, nid, altmap);
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	if (r < 0)
47062306a36Sopenharmony_ci		return NULL;
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	return pfn_to_page(pfn);
47362306a36Sopenharmony_ci}
474