162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/mm.h> 362306a36Sopenharmony_ci#include <linux/mmzone.h> 462306a36Sopenharmony_ci#include <linux/memblock.h> 562306a36Sopenharmony_ci#include <linux/page_ext.h> 662306a36Sopenharmony_ci#include <linux/memory.h> 762306a36Sopenharmony_ci#include <linux/vmalloc.h> 862306a36Sopenharmony_ci#include <linux/kmemleak.h> 962306a36Sopenharmony_ci#include <linux/page_owner.h> 1062306a36Sopenharmony_ci#include <linux/page_idle.h> 1162306a36Sopenharmony_ci#include <linux/page_table_check.h> 1262306a36Sopenharmony_ci#include <linux/rcupdate.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci/* 1562306a36Sopenharmony_ci * struct page extension 1662306a36Sopenharmony_ci * 1762306a36Sopenharmony_ci * This is the feature to manage memory for extended data per page. 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * Until now, we must modify struct page itself to store extra data per page. 2062306a36Sopenharmony_ci * This requires rebuilding the kernel and it is really time consuming process. 2162306a36Sopenharmony_ci * And, sometimes, rebuild is impossible due to third party module dependency. 2262306a36Sopenharmony_ci * At last, enlarging struct page could cause un-wanted system behaviour change. 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * This feature is intended to overcome above mentioned problems. This feature 2562306a36Sopenharmony_ci * allocates memory for extended data per page in certain place rather than 2662306a36Sopenharmony_ci * the struct page itself. This memory can be accessed by the accessor 2762306a36Sopenharmony_ci * functions provided by this code. During the boot process, it checks whether 2862306a36Sopenharmony_ci * allocation of huge chunk of memory is needed or not. If not, it avoids 2962306a36Sopenharmony_ci * allocating memory at all. With this advantage, we can include this feature 3062306a36Sopenharmony_ci * into the kernel in default and can avoid rebuild and solve related problems. 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci * To help these things to work well, there are two callbacks for clients. One 3362306a36Sopenharmony_ci * is the need callback which is mandatory if user wants to avoid useless 3462306a36Sopenharmony_ci * memory allocation at boot-time. The other is optional, init callback, which 3562306a36Sopenharmony_ci * is used to do proper initialization after memory is allocated. 3662306a36Sopenharmony_ci * 3762306a36Sopenharmony_ci * The need callback is used to decide whether extended memory allocation is 3862306a36Sopenharmony_ci * needed or not. Sometimes users want to deactivate some features in this 3962306a36Sopenharmony_ci * boot and extra memory would be unnecessary. In this case, to avoid 4062306a36Sopenharmony_ci * allocating huge chunk of memory, each clients represent their need of 4162306a36Sopenharmony_ci * extra memory through the need callback. If one of the need callbacks 4262306a36Sopenharmony_ci * returns true, it means that someone needs extra memory so that 4362306a36Sopenharmony_ci * page extension core should allocates memory for page extension. If 4462306a36Sopenharmony_ci * none of need callbacks return true, memory isn't needed at all in this boot 4562306a36Sopenharmony_ci * and page extension core can skip to allocate memory. As result, 4662306a36Sopenharmony_ci * none of memory is wasted. 4762306a36Sopenharmony_ci * 4862306a36Sopenharmony_ci * When need callback returns true, page_ext checks if there is a request for 4962306a36Sopenharmony_ci * extra memory through size in struct page_ext_operations. If it is non-zero, 5062306a36Sopenharmony_ci * extra space is allocated for each page_ext entry and offset is returned to 5162306a36Sopenharmony_ci * user through offset in struct page_ext_operations. 5262306a36Sopenharmony_ci * 5362306a36Sopenharmony_ci * The init callback is used to do proper initialization after page extension 5462306a36Sopenharmony_ci * is completely initialized. In sparse memory system, extra memory is 5562306a36Sopenharmony_ci * allocated some time later than memmap is allocated. In other words, lifetime 5662306a36Sopenharmony_ci * of memory for page extension isn't same with memmap for struct page. 5762306a36Sopenharmony_ci * Therefore, clients can't store extra data until page extension is 5862306a36Sopenharmony_ci * initialized, even if pages are allocated and used freely. This could 5962306a36Sopenharmony_ci * cause inadequate state of extra data per page, so, to prevent it, client 6062306a36Sopenharmony_ci * can utilize this callback to initialize the state of it correctly. 6162306a36Sopenharmony_ci */ 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM 6462306a36Sopenharmony_ci#define PAGE_EXT_INVALID (0x1) 6562306a36Sopenharmony_ci#endif 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) 6862306a36Sopenharmony_cistatic bool need_page_idle(void) 6962306a36Sopenharmony_ci{ 7062306a36Sopenharmony_ci return true; 7162306a36Sopenharmony_ci} 7262306a36Sopenharmony_cistatic struct page_ext_operations page_idle_ops __initdata = { 7362306a36Sopenharmony_ci .need = need_page_idle, 7462306a36Sopenharmony_ci .need_shared_flags = true, 7562306a36Sopenharmony_ci}; 7662306a36Sopenharmony_ci#endif 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_cistatic struct page_ext_operations *page_ext_ops[] __initdata = { 7962306a36Sopenharmony_ci#ifdef CONFIG_PAGE_OWNER 8062306a36Sopenharmony_ci &page_owner_ops, 8162306a36Sopenharmony_ci#endif 8262306a36Sopenharmony_ci#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) 8362306a36Sopenharmony_ci &page_idle_ops, 8462306a36Sopenharmony_ci#endif 8562306a36Sopenharmony_ci#ifdef CONFIG_PAGE_TABLE_CHECK 8662306a36Sopenharmony_ci &page_table_check_ops, 8762306a36Sopenharmony_ci#endif 8862306a36Sopenharmony_ci}; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ciunsigned long page_ext_size; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_cistatic unsigned long total_usage; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_cibool early_page_ext __meminitdata; 9562306a36Sopenharmony_cistatic int __init setup_early_page_ext(char *str) 9662306a36Sopenharmony_ci{ 9762306a36Sopenharmony_ci early_page_ext = true; 9862306a36Sopenharmony_ci return 0; 9962306a36Sopenharmony_ci} 10062306a36Sopenharmony_ciearly_param("early_page_ext", setup_early_page_ext); 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_cistatic bool __init invoke_need_callbacks(void) 10362306a36Sopenharmony_ci{ 10462306a36Sopenharmony_ci int i; 10562306a36Sopenharmony_ci int entries = ARRAY_SIZE(page_ext_ops); 10662306a36Sopenharmony_ci bool need = false; 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci for (i = 0; i < entries; i++) { 10962306a36Sopenharmony_ci if (page_ext_ops[i]->need()) { 11062306a36Sopenharmony_ci if (page_ext_ops[i]->need_shared_flags) { 11162306a36Sopenharmony_ci page_ext_size = sizeof(struct page_ext); 11262306a36Sopenharmony_ci break; 11362306a36Sopenharmony_ci } 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci } 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci for (i = 0; i < entries; i++) { 11862306a36Sopenharmony_ci if (page_ext_ops[i]->need()) { 11962306a36Sopenharmony_ci page_ext_ops[i]->offset = page_ext_size; 12062306a36Sopenharmony_ci page_ext_size += page_ext_ops[i]->size; 12162306a36Sopenharmony_ci need = true; 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci return need; 12662306a36Sopenharmony_ci} 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_cistatic void __init invoke_init_callbacks(void) 12962306a36Sopenharmony_ci{ 13062306a36Sopenharmony_ci int i; 13162306a36Sopenharmony_ci int entries = ARRAY_SIZE(page_ext_ops); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci for (i = 0; i < entries; i++) { 13462306a36Sopenharmony_ci if (page_ext_ops[i]->init) 13562306a36Sopenharmony_ci page_ext_ops[i]->init(); 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_cistatic inline struct page_ext *get_entry(void *base, unsigned long index) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci return base + page_ext_size * index; 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci#ifndef CONFIG_SPARSEMEM 14562306a36Sopenharmony_civoid __init page_ext_init_flatmem_late(void) 14662306a36Sopenharmony_ci{ 14762306a36Sopenharmony_ci invoke_init_callbacks(); 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_civoid __meminit pgdat_page_ext_init(struct pglist_data *pgdat) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci pgdat->node_page_ext = NULL; 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_cistatic struct page_ext *lookup_page_ext(const struct page *page) 15662306a36Sopenharmony_ci{ 15762306a36Sopenharmony_ci unsigned long pfn = page_to_pfn(page); 15862306a36Sopenharmony_ci unsigned long index; 15962306a36Sopenharmony_ci struct page_ext *base; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci WARN_ON_ONCE(!rcu_read_lock_held()); 16262306a36Sopenharmony_ci base = NODE_DATA(page_to_nid(page))->node_page_ext; 16362306a36Sopenharmony_ci /* 16462306a36Sopenharmony_ci * The sanity checks the page allocator does upon freeing a 16562306a36Sopenharmony_ci * page can reach here before the page_ext arrays are 16662306a36Sopenharmony_ci * allocated when feeding a range of pages to the allocator 16762306a36Sopenharmony_ci * for the first time during bootup or memory hotplug. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ci if (unlikely(!base)) 17062306a36Sopenharmony_ci return NULL; 17162306a36Sopenharmony_ci index = pfn - round_down(node_start_pfn(page_to_nid(page)), 17262306a36Sopenharmony_ci MAX_ORDER_NR_PAGES); 17362306a36Sopenharmony_ci return get_entry(base, index); 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_cistatic int __init alloc_node_page_ext(int nid) 17762306a36Sopenharmony_ci{ 17862306a36Sopenharmony_ci struct page_ext *base; 17962306a36Sopenharmony_ci unsigned long table_size; 18062306a36Sopenharmony_ci unsigned long nr_pages; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci nr_pages = NODE_DATA(nid)->node_spanned_pages; 18362306a36Sopenharmony_ci if (!nr_pages) 18462306a36Sopenharmony_ci return 0; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci /* 18762306a36Sopenharmony_ci * Need extra space if node range is not aligned with 18862306a36Sopenharmony_ci * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm 18962306a36Sopenharmony_ci * checks buddy's status, range could be out of exact node range. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) || 19262306a36Sopenharmony_ci !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) 19362306a36Sopenharmony_ci nr_pages += MAX_ORDER_NR_PAGES; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci table_size = page_ext_size * nr_pages; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci base = memblock_alloc_try_nid( 19862306a36Sopenharmony_ci table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), 19962306a36Sopenharmony_ci MEMBLOCK_ALLOC_ACCESSIBLE, nid); 20062306a36Sopenharmony_ci if (!base) 20162306a36Sopenharmony_ci return -ENOMEM; 20262306a36Sopenharmony_ci NODE_DATA(nid)->node_page_ext = base; 20362306a36Sopenharmony_ci total_usage += table_size; 20462306a36Sopenharmony_ci return 0; 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_civoid __init page_ext_init_flatmem(void) 20862306a36Sopenharmony_ci{ 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci int nid, fail; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci if (!invoke_need_callbacks()) 21362306a36Sopenharmony_ci return; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci for_each_online_node(nid) { 21662306a36Sopenharmony_ci fail = alloc_node_page_ext(nid); 21762306a36Sopenharmony_ci if (fail) 21862306a36Sopenharmony_ci goto fail; 21962306a36Sopenharmony_ci } 22062306a36Sopenharmony_ci pr_info("allocated %ld bytes of page_ext\n", total_usage); 22162306a36Sopenharmony_ci return; 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_cifail: 22462306a36Sopenharmony_ci pr_crit("allocation of page_ext failed.\n"); 22562306a36Sopenharmony_ci panic("Out of memory"); 22662306a36Sopenharmony_ci} 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci#else /* CONFIG_SPARSEMEM */ 22962306a36Sopenharmony_cistatic bool page_ext_invalid(struct page_ext *page_ext) 23062306a36Sopenharmony_ci{ 23162306a36Sopenharmony_ci return !page_ext || (((unsigned long)page_ext & PAGE_EXT_INVALID) == PAGE_EXT_INVALID); 23262306a36Sopenharmony_ci} 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_cistatic struct page_ext *lookup_page_ext(const struct page *page) 23562306a36Sopenharmony_ci{ 23662306a36Sopenharmony_ci unsigned long pfn = page_to_pfn(page); 23762306a36Sopenharmony_ci struct mem_section *section = __pfn_to_section(pfn); 23862306a36Sopenharmony_ci struct page_ext *page_ext = READ_ONCE(section->page_ext); 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci WARN_ON_ONCE(!rcu_read_lock_held()); 24162306a36Sopenharmony_ci /* 24262306a36Sopenharmony_ci * The sanity checks the page allocator does upon freeing a 24362306a36Sopenharmony_ci * page can reach here before the page_ext arrays are 24462306a36Sopenharmony_ci * allocated when feeding a range of pages to the allocator 24562306a36Sopenharmony_ci * for the first time during bootup or memory hotplug. 24662306a36Sopenharmony_ci */ 24762306a36Sopenharmony_ci if (page_ext_invalid(page_ext)) 24862306a36Sopenharmony_ci return NULL; 24962306a36Sopenharmony_ci return get_entry(page_ext, pfn); 25062306a36Sopenharmony_ci} 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_cistatic void *__meminit alloc_page_ext(size_t size, int nid) 25362306a36Sopenharmony_ci{ 25462306a36Sopenharmony_ci gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; 25562306a36Sopenharmony_ci void *addr = NULL; 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci addr = alloc_pages_exact_nid(nid, size, flags); 25862306a36Sopenharmony_ci if (addr) { 25962306a36Sopenharmony_ci kmemleak_alloc(addr, size, 1, flags); 26062306a36Sopenharmony_ci return addr; 26162306a36Sopenharmony_ci } 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci addr = vzalloc_node(size, nid); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci return addr; 26662306a36Sopenharmony_ci} 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_cistatic int __meminit init_section_page_ext(unsigned long pfn, int nid) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci struct mem_section *section; 27162306a36Sopenharmony_ci struct page_ext *base; 27262306a36Sopenharmony_ci unsigned long table_size; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci section = __pfn_to_section(pfn); 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci if (section->page_ext) 27762306a36Sopenharmony_ci return 0; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci table_size = page_ext_size * PAGES_PER_SECTION; 28062306a36Sopenharmony_ci base = alloc_page_ext(table_size, nid); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci /* 28362306a36Sopenharmony_ci * The value stored in section->page_ext is (base - pfn) 28462306a36Sopenharmony_ci * and it does not point to the memory block allocated above, 28562306a36Sopenharmony_ci * causing kmemleak false positives. 28662306a36Sopenharmony_ci */ 28762306a36Sopenharmony_ci kmemleak_not_leak(base); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci if (!base) { 29062306a36Sopenharmony_ci pr_err("page ext allocation failure\n"); 29162306a36Sopenharmony_ci return -ENOMEM; 29262306a36Sopenharmony_ci } 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci /* 29562306a36Sopenharmony_ci * The passed "pfn" may not be aligned to SECTION. For the calculation 29662306a36Sopenharmony_ci * we need to apply a mask. 29762306a36Sopenharmony_ci */ 29862306a36Sopenharmony_ci pfn &= PAGE_SECTION_MASK; 29962306a36Sopenharmony_ci section->page_ext = (void *)base - page_ext_size * pfn; 30062306a36Sopenharmony_ci total_usage += table_size; 30162306a36Sopenharmony_ci return 0; 30262306a36Sopenharmony_ci} 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_cistatic void free_page_ext(void *addr) 30562306a36Sopenharmony_ci{ 30662306a36Sopenharmony_ci if (is_vmalloc_addr(addr)) { 30762306a36Sopenharmony_ci vfree(addr); 30862306a36Sopenharmony_ci } else { 30962306a36Sopenharmony_ci struct page *page = virt_to_page(addr); 31062306a36Sopenharmony_ci size_t table_size; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci table_size = page_ext_size * PAGES_PER_SECTION; 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci BUG_ON(PageReserved(page)); 31562306a36Sopenharmony_ci kmemleak_free(addr); 31662306a36Sopenharmony_ci free_pages_exact(addr, table_size); 31762306a36Sopenharmony_ci } 31862306a36Sopenharmony_ci} 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_cistatic void __free_page_ext(unsigned long pfn) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci struct mem_section *ms; 32362306a36Sopenharmony_ci struct page_ext *base; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci ms = __pfn_to_section(pfn); 32662306a36Sopenharmony_ci if (!ms || !ms->page_ext) 32762306a36Sopenharmony_ci return; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci base = READ_ONCE(ms->page_ext); 33062306a36Sopenharmony_ci /* 33162306a36Sopenharmony_ci * page_ext here can be valid while doing the roll back 33262306a36Sopenharmony_ci * operation in online_page_ext(). 33362306a36Sopenharmony_ci */ 33462306a36Sopenharmony_ci if (page_ext_invalid(base)) 33562306a36Sopenharmony_ci base = (void *)base - PAGE_EXT_INVALID; 33662306a36Sopenharmony_ci WRITE_ONCE(ms->page_ext, NULL); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci base = get_entry(base, pfn); 33962306a36Sopenharmony_ci free_page_ext(base); 34062306a36Sopenharmony_ci} 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_cistatic void __invalidate_page_ext(unsigned long pfn) 34362306a36Sopenharmony_ci{ 34462306a36Sopenharmony_ci struct mem_section *ms; 34562306a36Sopenharmony_ci void *val; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci ms = __pfn_to_section(pfn); 34862306a36Sopenharmony_ci if (!ms || !ms->page_ext) 34962306a36Sopenharmony_ci return; 35062306a36Sopenharmony_ci val = (void *)ms->page_ext + PAGE_EXT_INVALID; 35162306a36Sopenharmony_ci WRITE_ONCE(ms->page_ext, val); 35262306a36Sopenharmony_ci} 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_cistatic int __meminit online_page_ext(unsigned long start_pfn, 35562306a36Sopenharmony_ci unsigned long nr_pages, 35662306a36Sopenharmony_ci int nid) 35762306a36Sopenharmony_ci{ 35862306a36Sopenharmony_ci unsigned long start, end, pfn; 35962306a36Sopenharmony_ci int fail = 0; 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci start = SECTION_ALIGN_DOWN(start_pfn); 36262306a36Sopenharmony_ci end = SECTION_ALIGN_UP(start_pfn + nr_pages); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci if (nid == NUMA_NO_NODE) { 36562306a36Sopenharmony_ci /* 36662306a36Sopenharmony_ci * In this case, "nid" already exists and contains valid memory. 36762306a36Sopenharmony_ci * "start_pfn" passed to us is a pfn which is an arg for 36862306a36Sopenharmony_ci * online__pages(), and start_pfn should exist. 36962306a36Sopenharmony_ci */ 37062306a36Sopenharmony_ci nid = pfn_to_nid(start_pfn); 37162306a36Sopenharmony_ci VM_BUG_ON(!node_online(nid)); 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) 37562306a36Sopenharmony_ci fail = init_section_page_ext(pfn, nid); 37662306a36Sopenharmony_ci if (!fail) 37762306a36Sopenharmony_ci return 0; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci /* rollback */ 38062306a36Sopenharmony_ci end = pfn - PAGES_PER_SECTION; 38162306a36Sopenharmony_ci for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 38262306a36Sopenharmony_ci __free_page_ext(pfn); 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci return -ENOMEM; 38562306a36Sopenharmony_ci} 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_cistatic void __meminit offline_page_ext(unsigned long start_pfn, 38862306a36Sopenharmony_ci unsigned long nr_pages) 38962306a36Sopenharmony_ci{ 39062306a36Sopenharmony_ci unsigned long start, end, pfn; 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci start = SECTION_ALIGN_DOWN(start_pfn); 39362306a36Sopenharmony_ci end = SECTION_ALIGN_UP(start_pfn + nr_pages); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci /* 39662306a36Sopenharmony_ci * Freeing of page_ext is done in 3 steps to avoid 39762306a36Sopenharmony_ci * use-after-free of it: 39862306a36Sopenharmony_ci * 1) Traverse all the sections and mark their page_ext 39962306a36Sopenharmony_ci * as invalid. 40062306a36Sopenharmony_ci * 2) Wait for all the existing users of page_ext who 40162306a36Sopenharmony_ci * started before invalidation to finish. 40262306a36Sopenharmony_ci * 3) Free the page_ext. 40362306a36Sopenharmony_ci */ 40462306a36Sopenharmony_ci for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 40562306a36Sopenharmony_ci __invalidate_page_ext(pfn); 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci synchronize_rcu(); 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 41062306a36Sopenharmony_ci __free_page_ext(pfn); 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_cistatic int __meminit page_ext_callback(struct notifier_block *self, 41462306a36Sopenharmony_ci unsigned long action, void *arg) 41562306a36Sopenharmony_ci{ 41662306a36Sopenharmony_ci struct memory_notify *mn = arg; 41762306a36Sopenharmony_ci int ret = 0; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci switch (action) { 42062306a36Sopenharmony_ci case MEM_GOING_ONLINE: 42162306a36Sopenharmony_ci ret = online_page_ext(mn->start_pfn, 42262306a36Sopenharmony_ci mn->nr_pages, mn->status_change_nid); 42362306a36Sopenharmony_ci break; 42462306a36Sopenharmony_ci case MEM_OFFLINE: 42562306a36Sopenharmony_ci offline_page_ext(mn->start_pfn, 42662306a36Sopenharmony_ci mn->nr_pages); 42762306a36Sopenharmony_ci break; 42862306a36Sopenharmony_ci case MEM_CANCEL_ONLINE: 42962306a36Sopenharmony_ci offline_page_ext(mn->start_pfn, 43062306a36Sopenharmony_ci mn->nr_pages); 43162306a36Sopenharmony_ci break; 43262306a36Sopenharmony_ci case MEM_GOING_OFFLINE: 43362306a36Sopenharmony_ci break; 43462306a36Sopenharmony_ci case MEM_ONLINE: 43562306a36Sopenharmony_ci case MEM_CANCEL_OFFLINE: 43662306a36Sopenharmony_ci break; 43762306a36Sopenharmony_ci } 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci return notifier_from_errno(ret); 44062306a36Sopenharmony_ci} 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_civoid __init page_ext_init(void) 44362306a36Sopenharmony_ci{ 44462306a36Sopenharmony_ci unsigned long pfn; 44562306a36Sopenharmony_ci int nid; 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci if (!invoke_need_callbacks()) 44862306a36Sopenharmony_ci return; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) { 45162306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci start_pfn = node_start_pfn(nid); 45462306a36Sopenharmony_ci end_pfn = node_end_pfn(nid); 45562306a36Sopenharmony_ci /* 45662306a36Sopenharmony_ci * start_pfn and end_pfn may not be aligned to SECTION and the 45762306a36Sopenharmony_ci * page->flags of out of node pages are not initialized. So we 45862306a36Sopenharmony_ci * scan [start_pfn, the biggest section's pfn < end_pfn) here. 45962306a36Sopenharmony_ci */ 46062306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; 46162306a36Sopenharmony_ci pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) { 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci if (!pfn_valid(pfn)) 46462306a36Sopenharmony_ci continue; 46562306a36Sopenharmony_ci /* 46662306a36Sopenharmony_ci * Nodes's pfns can be overlapping. 46762306a36Sopenharmony_ci * We know some arch can have a nodes layout such as 46862306a36Sopenharmony_ci * -------------pfn--------------> 46962306a36Sopenharmony_ci * N0 | N1 | N2 | N0 | N1 | N2|.... 47062306a36Sopenharmony_ci */ 47162306a36Sopenharmony_ci if (pfn_to_nid(pfn) != nid) 47262306a36Sopenharmony_ci continue; 47362306a36Sopenharmony_ci if (init_section_page_ext(pfn, nid)) 47462306a36Sopenharmony_ci goto oom; 47562306a36Sopenharmony_ci cond_resched(); 47662306a36Sopenharmony_ci } 47762306a36Sopenharmony_ci } 47862306a36Sopenharmony_ci hotplug_memory_notifier(page_ext_callback, DEFAULT_CALLBACK_PRI); 47962306a36Sopenharmony_ci pr_info("allocated %ld bytes of page_ext\n", total_usage); 48062306a36Sopenharmony_ci invoke_init_callbacks(); 48162306a36Sopenharmony_ci return; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_cioom: 48462306a36Sopenharmony_ci panic("Out of memory"); 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_civoid __meminit pgdat_page_ext_init(struct pglist_data *pgdat) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci} 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci#endif 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci/** 49462306a36Sopenharmony_ci * page_ext_get() - Get the extended information for a page. 49562306a36Sopenharmony_ci * @page: The page we're interested in. 49662306a36Sopenharmony_ci * 49762306a36Sopenharmony_ci * Ensures that the page_ext will remain valid until page_ext_put() 49862306a36Sopenharmony_ci * is called. 49962306a36Sopenharmony_ci * 50062306a36Sopenharmony_ci * Return: NULL if no page_ext exists for this page. 50162306a36Sopenharmony_ci * Context: Any context. Caller may not sleep until they have called 50262306a36Sopenharmony_ci * page_ext_put(). 50362306a36Sopenharmony_ci */ 50462306a36Sopenharmony_cistruct page_ext *page_ext_get(struct page *page) 50562306a36Sopenharmony_ci{ 50662306a36Sopenharmony_ci struct page_ext *page_ext; 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci rcu_read_lock(); 50962306a36Sopenharmony_ci page_ext = lookup_page_ext(page); 51062306a36Sopenharmony_ci if (!page_ext) { 51162306a36Sopenharmony_ci rcu_read_unlock(); 51262306a36Sopenharmony_ci return NULL; 51362306a36Sopenharmony_ci } 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci return page_ext; 51662306a36Sopenharmony_ci} 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci/** 51962306a36Sopenharmony_ci * page_ext_put() - Working with page extended information is done. 52062306a36Sopenharmony_ci * @page_ext: Page extended information received from page_ext_get(). 52162306a36Sopenharmony_ci * 52262306a36Sopenharmony_ci * The page extended information of the page may not be valid after this 52362306a36Sopenharmony_ci * function is called. 52462306a36Sopenharmony_ci * 52562306a36Sopenharmony_ci * Return: None. 52662306a36Sopenharmony_ci * Context: Any context with corresponding page_ext_get() is called. 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_civoid page_ext_put(struct page_ext *page_ext) 52962306a36Sopenharmony_ci{ 53062306a36Sopenharmony_ci if (unlikely(!page_ext)) 53162306a36Sopenharmony_ci return; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci rcu_read_unlock(); 53462306a36Sopenharmony_ci} 535