162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/mm/page_isolation.c 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/mm.h> 762306a36Sopenharmony_ci#include <linux/page-isolation.h> 862306a36Sopenharmony_ci#include <linux/pageblock-flags.h> 962306a36Sopenharmony_ci#include <linux/memory.h> 1062306a36Sopenharmony_ci#include <linux/hugetlb.h> 1162306a36Sopenharmony_ci#include <linux/page_owner.h> 1262306a36Sopenharmony_ci#include <linux/migrate.h> 1362306a36Sopenharmony_ci#include "internal.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#define CREATE_TRACE_POINTS 1662306a36Sopenharmony_ci#include <trace/events/page_isolation.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* 1962306a36Sopenharmony_ci * This function checks whether the range [start_pfn, end_pfn) includes 2062306a36Sopenharmony_ci * unmovable pages or not. The range must fall into a single pageblock and 2162306a36Sopenharmony_ci * consequently belong to a single zone. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * PageLRU check without isolation or lru_lock could race so that 2462306a36Sopenharmony_ci * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable 2562306a36Sopenharmony_ci * check without lock_page also may miss some movable non-lru pages at 2662306a36Sopenharmony_ci * race condition. So you can't expect this function should be exact. 2762306a36Sopenharmony_ci * 2862306a36Sopenharmony_ci * Returns a page without holding a reference. If the caller wants to 2962306a36Sopenharmony_ci * dereference that page (e.g., dumping), it has to make sure that it 3062306a36Sopenharmony_ci * cannot get removed (e.g., via memory unplug) concurrently. 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_cistatic struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long end_pfn, 3462306a36Sopenharmony_ci int migratetype, int flags) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci struct page *page = pfn_to_page(start_pfn); 3762306a36Sopenharmony_ci struct zone *zone = page_zone(page); 3862306a36Sopenharmony_ci unsigned long pfn; 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci VM_BUG_ON(pageblock_start_pfn(start_pfn) != 4162306a36Sopenharmony_ci pageblock_start_pfn(end_pfn - 1)); 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci if (is_migrate_cma_page(page)) { 4462306a36Sopenharmony_ci /* 4562306a36Sopenharmony_ci * CMA allocations (alloc_contig_range) really need to mark 4662306a36Sopenharmony_ci * isolate CMA pageblocks even when they are not movable in fact 4762306a36Sopenharmony_ci * so consider them movable here. 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_ci if (is_migrate_cma(migratetype)) 5062306a36Sopenharmony_ci return NULL; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci return page; 5362306a36Sopenharmony_ci } 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; pfn++) { 5662306a36Sopenharmony_ci page = pfn_to_page(pfn); 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci /* 5962306a36Sopenharmony_ci * Both, bootmem allocations and memory holes are marked 6062306a36Sopenharmony_ci * PG_reserved and are unmovable. We can even have unmovable 6162306a36Sopenharmony_ci * allocations inside ZONE_MOVABLE, for example when 6262306a36Sopenharmony_ci * specifying "movablecore". 6362306a36Sopenharmony_ci */ 6462306a36Sopenharmony_ci if (PageReserved(page)) 6562306a36Sopenharmony_ci return page; 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci /* 6862306a36Sopenharmony_ci * If the zone is movable and we have ruled out all reserved 6962306a36Sopenharmony_ci * pages then it should be reasonably safe to assume the rest 7062306a36Sopenharmony_ci * is movable. 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_ci if (zone_idx(zone) == ZONE_MOVABLE) 7362306a36Sopenharmony_ci continue; 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci /* 7662306a36Sopenharmony_ci * Hugepages are not in LRU lists, but they're movable. 7762306a36Sopenharmony_ci * THPs are on the LRU, but need to be counted as #small pages. 7862306a36Sopenharmony_ci * We need not scan over tail pages because we don't 7962306a36Sopenharmony_ci * handle each tail page individually in migration. 8062306a36Sopenharmony_ci */ 8162306a36Sopenharmony_ci if (PageHuge(page) || PageTransCompound(page)) { 8262306a36Sopenharmony_ci struct folio *folio = page_folio(page); 8362306a36Sopenharmony_ci unsigned int skip_pages; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci if (PageHuge(page)) { 8662306a36Sopenharmony_ci if (!hugepage_migration_supported(folio_hstate(folio))) 8762306a36Sopenharmony_ci return page; 8862306a36Sopenharmony_ci } else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) { 8962306a36Sopenharmony_ci return page; 9062306a36Sopenharmony_ci } 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci skip_pages = folio_nr_pages(folio) - folio_page_idx(folio, page); 9362306a36Sopenharmony_ci pfn += skip_pages - 1; 9462306a36Sopenharmony_ci continue; 9562306a36Sopenharmony_ci } 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci /* 9862306a36Sopenharmony_ci * We can't use page_count without pin a page 9962306a36Sopenharmony_ci * because another CPU can free compound page. 10062306a36Sopenharmony_ci * This check already skips compound tails of THP 10162306a36Sopenharmony_ci * because their page->_refcount is zero at all time. 10262306a36Sopenharmony_ci */ 10362306a36Sopenharmony_ci if (!page_ref_count(page)) { 10462306a36Sopenharmony_ci if (PageBuddy(page)) 10562306a36Sopenharmony_ci pfn += (1 << buddy_order(page)) - 1; 10662306a36Sopenharmony_ci continue; 10762306a36Sopenharmony_ci } 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci /* 11062306a36Sopenharmony_ci * The HWPoisoned page may be not in buddy system, and 11162306a36Sopenharmony_ci * page_count() is not 0. 11262306a36Sopenharmony_ci */ 11362306a36Sopenharmony_ci if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) 11462306a36Sopenharmony_ci continue; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci /* 11762306a36Sopenharmony_ci * We treat all PageOffline() pages as movable when offlining 11862306a36Sopenharmony_ci * to give drivers a chance to decrement their reference count 11962306a36Sopenharmony_ci * in MEM_GOING_OFFLINE in order to indicate that these pages 12062306a36Sopenharmony_ci * can be offlined as there are no direct references anymore. 12162306a36Sopenharmony_ci * For actually unmovable PageOffline() where the driver does 12262306a36Sopenharmony_ci * not support this, we will fail later when trying to actually 12362306a36Sopenharmony_ci * move these pages that still have a reference count > 0. 12462306a36Sopenharmony_ci * (false negatives in this function only) 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_ci if ((flags & MEMORY_OFFLINE) && PageOffline(page)) 12762306a36Sopenharmony_ci continue; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci if (__PageMovable(page) || PageLRU(page)) 13062306a36Sopenharmony_ci continue; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci /* 13362306a36Sopenharmony_ci * If there are RECLAIMABLE pages, we need to check 13462306a36Sopenharmony_ci * it. But now, memory offline itself doesn't call 13562306a36Sopenharmony_ci * shrink_node_slabs() and it still to be fixed. 13662306a36Sopenharmony_ci */ 13762306a36Sopenharmony_ci return page; 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci return NULL; 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci/* 14362306a36Sopenharmony_ci * This function set pageblock migratetype to isolate if no unmovable page is 14462306a36Sopenharmony_ci * present in [start_pfn, end_pfn). The pageblock must intersect with 14562306a36Sopenharmony_ci * [start_pfn, end_pfn). 14662306a36Sopenharmony_ci */ 14762306a36Sopenharmony_cistatic int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags, 14862306a36Sopenharmony_ci unsigned long start_pfn, unsigned long end_pfn) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci struct zone *zone = page_zone(page); 15162306a36Sopenharmony_ci struct page *unmovable; 15262306a36Sopenharmony_ci unsigned long flags; 15362306a36Sopenharmony_ci unsigned long check_unmovable_start, check_unmovable_end; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci spin_lock_irqsave(&zone->lock, flags); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci /* 15862306a36Sopenharmony_ci * We assume the caller intended to SET migrate type to isolate. 15962306a36Sopenharmony_ci * If it is already set, then someone else must have raced and 16062306a36Sopenharmony_ci * set it before us. 16162306a36Sopenharmony_ci */ 16262306a36Sopenharmony_ci if (is_migrate_isolate_page(page)) { 16362306a36Sopenharmony_ci spin_unlock_irqrestore(&zone->lock, flags); 16462306a36Sopenharmony_ci return -EBUSY; 16562306a36Sopenharmony_ci } 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci /* 16862306a36Sopenharmony_ci * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. 16962306a36Sopenharmony_ci * We just check MOVABLE pages. 17062306a36Sopenharmony_ci * 17162306a36Sopenharmony_ci * Pass the intersection of [start_pfn, end_pfn) and the page's pageblock 17262306a36Sopenharmony_ci * to avoid redundant checks. 17362306a36Sopenharmony_ci */ 17462306a36Sopenharmony_ci check_unmovable_start = max(page_to_pfn(page), start_pfn); 17562306a36Sopenharmony_ci check_unmovable_end = min(pageblock_end_pfn(page_to_pfn(page)), 17662306a36Sopenharmony_ci end_pfn); 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end, 17962306a36Sopenharmony_ci migratetype, isol_flags); 18062306a36Sopenharmony_ci if (!unmovable) { 18162306a36Sopenharmony_ci unsigned long nr_pages; 18262306a36Sopenharmony_ci int mt = get_pageblock_migratetype(page); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci set_pageblock_migratetype(page, MIGRATE_ISOLATE); 18562306a36Sopenharmony_ci zone->nr_isolate_pageblock++; 18662306a36Sopenharmony_ci nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE, 18762306a36Sopenharmony_ci NULL); 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci __mod_zone_freepage_state(zone, -nr_pages, mt); 19062306a36Sopenharmony_ci spin_unlock_irqrestore(&zone->lock, flags); 19162306a36Sopenharmony_ci return 0; 19262306a36Sopenharmony_ci } 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci spin_unlock_irqrestore(&zone->lock, flags); 19562306a36Sopenharmony_ci if (isol_flags & REPORT_FAILURE) { 19662306a36Sopenharmony_ci /* 19762306a36Sopenharmony_ci * printk() with zone->lock held will likely trigger a 19862306a36Sopenharmony_ci * lockdep splat, so defer it here. 19962306a36Sopenharmony_ci */ 20062306a36Sopenharmony_ci dump_page(unmovable, "unmovable page"); 20162306a36Sopenharmony_ci } 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci return -EBUSY; 20462306a36Sopenharmony_ci} 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_cistatic void unset_migratetype_isolate(struct page *page, int migratetype) 20762306a36Sopenharmony_ci{ 20862306a36Sopenharmony_ci struct zone *zone; 20962306a36Sopenharmony_ci unsigned long flags, nr_pages; 21062306a36Sopenharmony_ci bool isolated_page = false; 21162306a36Sopenharmony_ci unsigned int order; 21262306a36Sopenharmony_ci struct page *buddy; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci zone = page_zone(page); 21562306a36Sopenharmony_ci spin_lock_irqsave(&zone->lock, flags); 21662306a36Sopenharmony_ci if (!is_migrate_isolate_page(page)) 21762306a36Sopenharmony_ci goto out; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci /* 22062306a36Sopenharmony_ci * Because freepage with more than pageblock_order on isolated 22162306a36Sopenharmony_ci * pageblock is restricted to merge due to freepage counting problem, 22262306a36Sopenharmony_ci * it is possible that there is free buddy page. 22362306a36Sopenharmony_ci * move_freepages_block() doesn't care of merge so we need other 22462306a36Sopenharmony_ci * approach in order to merge them. Isolation and free will make 22562306a36Sopenharmony_ci * these pages to be merged. 22662306a36Sopenharmony_ci */ 22762306a36Sopenharmony_ci if (PageBuddy(page)) { 22862306a36Sopenharmony_ci order = buddy_order(page); 22962306a36Sopenharmony_ci if (order >= pageblock_order && order < MAX_ORDER) { 23062306a36Sopenharmony_ci buddy = find_buddy_page_pfn(page, page_to_pfn(page), 23162306a36Sopenharmony_ci order, NULL); 23262306a36Sopenharmony_ci if (buddy && !is_migrate_isolate_page(buddy)) { 23362306a36Sopenharmony_ci isolated_page = !!__isolate_free_page(page, order); 23462306a36Sopenharmony_ci /* 23562306a36Sopenharmony_ci * Isolating a free page in an isolated pageblock 23662306a36Sopenharmony_ci * is expected to always work as watermarks don't 23762306a36Sopenharmony_ci * apply here. 23862306a36Sopenharmony_ci */ 23962306a36Sopenharmony_ci VM_WARN_ON(!isolated_page); 24062306a36Sopenharmony_ci } 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci } 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci /* 24562306a36Sopenharmony_ci * If we isolate freepage with more than pageblock_order, there 24662306a36Sopenharmony_ci * should be no freepage in the range, so we could avoid costly 24762306a36Sopenharmony_ci * pageblock scanning for freepage moving. 24862306a36Sopenharmony_ci * 24962306a36Sopenharmony_ci * We didn't actually touch any of the isolated pages, so place them 25062306a36Sopenharmony_ci * to the tail of the freelist. This is an optimization for memory 25162306a36Sopenharmony_ci * onlining - just onlined memory won't immediately be considered for 25262306a36Sopenharmony_ci * allocation. 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_ci if (!isolated_page) { 25562306a36Sopenharmony_ci nr_pages = move_freepages_block(zone, page, migratetype, NULL); 25662306a36Sopenharmony_ci __mod_zone_freepage_state(zone, nr_pages, migratetype); 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci set_pageblock_migratetype(page, migratetype); 25962306a36Sopenharmony_ci if (isolated_page) 26062306a36Sopenharmony_ci __putback_isolated_page(page, order, migratetype); 26162306a36Sopenharmony_ci zone->nr_isolate_pageblock--; 26262306a36Sopenharmony_ciout: 26362306a36Sopenharmony_ci spin_unlock_irqrestore(&zone->lock, flags); 26462306a36Sopenharmony_ci} 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_cistatic inline struct page * 26762306a36Sopenharmony_ci__first_valid_page(unsigned long pfn, unsigned long nr_pages) 26862306a36Sopenharmony_ci{ 26962306a36Sopenharmony_ci int i; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 27262306a36Sopenharmony_ci struct page *page; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci page = pfn_to_online_page(pfn + i); 27562306a36Sopenharmony_ci if (!page) 27662306a36Sopenharmony_ci continue; 27762306a36Sopenharmony_ci return page; 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci return NULL; 28062306a36Sopenharmony_ci} 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci/** 28362306a36Sopenharmony_ci * isolate_single_pageblock() -- tries to isolate a pageblock that might be 28462306a36Sopenharmony_ci * within a free or in-use page. 28562306a36Sopenharmony_ci * @boundary_pfn: pageblock-aligned pfn that a page might cross 28662306a36Sopenharmony_ci * @flags: isolation flags 28762306a36Sopenharmony_ci * @gfp_flags: GFP flags used for migrating pages 28862306a36Sopenharmony_ci * @isolate_before: isolate the pageblock before the boundary_pfn 28962306a36Sopenharmony_ci * @skip_isolation: the flag to skip the pageblock isolation in second 29062306a36Sopenharmony_ci * isolate_single_pageblock() 29162306a36Sopenharmony_ci * @migratetype: migrate type to set in error recovery. 29262306a36Sopenharmony_ci * 29362306a36Sopenharmony_ci * Free and in-use pages can be as big as MAX_ORDER and contain more than one 29462306a36Sopenharmony_ci * pageblock. When not all pageblocks within a page are isolated at the same 29562306a36Sopenharmony_ci * time, free page accounting can go wrong. For example, in the case of 29662306a36Sopenharmony_ci * MAX_ORDER = pageblock_order + 1, a MAX_ORDER page has two pagelbocks. 29762306a36Sopenharmony_ci * [ MAX_ORDER ] 29862306a36Sopenharmony_ci * [ pageblock0 | pageblock1 ] 29962306a36Sopenharmony_ci * When either pageblock is isolated, if it is a free page, the page is not 30062306a36Sopenharmony_ci * split into separate migratetype lists, which is supposed to; if it is an 30162306a36Sopenharmony_ci * in-use page and freed later, __free_one_page() does not split the free page 30262306a36Sopenharmony_ci * either. The function handles this by splitting the free page or migrating 30362306a36Sopenharmony_ci * the in-use page then splitting the free page. 30462306a36Sopenharmony_ci */ 30562306a36Sopenharmony_cistatic int isolate_single_pageblock(unsigned long boundary_pfn, int flags, 30662306a36Sopenharmony_ci gfp_t gfp_flags, bool isolate_before, bool skip_isolation, 30762306a36Sopenharmony_ci int migratetype) 30862306a36Sopenharmony_ci{ 30962306a36Sopenharmony_ci unsigned long start_pfn; 31062306a36Sopenharmony_ci unsigned long isolate_pageblock; 31162306a36Sopenharmony_ci unsigned long pfn; 31262306a36Sopenharmony_ci struct zone *zone; 31362306a36Sopenharmony_ci int ret; 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci VM_BUG_ON(!pageblock_aligned(boundary_pfn)); 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci if (isolate_before) 31862306a36Sopenharmony_ci isolate_pageblock = boundary_pfn - pageblock_nr_pages; 31962306a36Sopenharmony_ci else 32062306a36Sopenharmony_ci isolate_pageblock = boundary_pfn; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci /* 32362306a36Sopenharmony_ci * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid 32462306a36Sopenharmony_ci * only isolating a subset of pageblocks from a bigger than pageblock 32562306a36Sopenharmony_ci * free or in-use page. Also make sure all to-be-isolated pageblocks 32662306a36Sopenharmony_ci * are within the same zone. 32762306a36Sopenharmony_ci */ 32862306a36Sopenharmony_ci zone = page_zone(pfn_to_page(isolate_pageblock)); 32962306a36Sopenharmony_ci start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), 33062306a36Sopenharmony_ci zone->zone_start_pfn); 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci if (skip_isolation) { 33362306a36Sopenharmony_ci int mt __maybe_unused = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci VM_BUG_ON(!is_migrate_isolate(mt)); 33662306a36Sopenharmony_ci } else { 33762306a36Sopenharmony_ci ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype, 33862306a36Sopenharmony_ci flags, isolate_pageblock, isolate_pageblock + pageblock_nr_pages); 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci if (ret) 34162306a36Sopenharmony_ci return ret; 34262306a36Sopenharmony_ci } 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci /* 34562306a36Sopenharmony_ci * Bail out early when the to-be-isolated pageblock does not form 34662306a36Sopenharmony_ci * a free or in-use page across boundary_pfn: 34762306a36Sopenharmony_ci * 34862306a36Sopenharmony_ci * 1. isolate before boundary_pfn: the page after is not online 34962306a36Sopenharmony_ci * 2. isolate after boundary_pfn: the page before is not online 35062306a36Sopenharmony_ci * 35162306a36Sopenharmony_ci * This also ensures correctness. Without it, when isolate after 35262306a36Sopenharmony_ci * boundary_pfn and [start_pfn, boundary_pfn) are not online, 35362306a36Sopenharmony_ci * __first_valid_page() will return unexpected NULL in the for loop 35462306a36Sopenharmony_ci * below. 35562306a36Sopenharmony_ci */ 35662306a36Sopenharmony_ci if (isolate_before) { 35762306a36Sopenharmony_ci if (!pfn_to_online_page(boundary_pfn)) 35862306a36Sopenharmony_ci return 0; 35962306a36Sopenharmony_ci } else { 36062306a36Sopenharmony_ci if (!pfn_to_online_page(boundary_pfn - 1)) 36162306a36Sopenharmony_ci return 0; 36262306a36Sopenharmony_ci } 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci for (pfn = start_pfn; pfn < boundary_pfn;) { 36562306a36Sopenharmony_ci struct page *page = __first_valid_page(pfn, boundary_pfn - pfn); 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci VM_BUG_ON(!page); 36862306a36Sopenharmony_ci pfn = page_to_pfn(page); 36962306a36Sopenharmony_ci /* 37062306a36Sopenharmony_ci * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any 37162306a36Sopenharmony_ci * free pages in [start_pfn, boundary_pfn), its head page will 37262306a36Sopenharmony_ci * always be in the range. 37362306a36Sopenharmony_ci */ 37462306a36Sopenharmony_ci if (PageBuddy(page)) { 37562306a36Sopenharmony_ci int order = buddy_order(page); 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci if (pfn + (1UL << order) > boundary_pfn) { 37862306a36Sopenharmony_ci /* free page changed before split, check it again */ 37962306a36Sopenharmony_ci if (split_free_page(page, order, boundary_pfn - pfn)) 38062306a36Sopenharmony_ci continue; 38162306a36Sopenharmony_ci } 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci pfn += 1UL << order; 38462306a36Sopenharmony_ci continue; 38562306a36Sopenharmony_ci } 38662306a36Sopenharmony_ci /* 38762306a36Sopenharmony_ci * migrate compound pages then let the free page handling code 38862306a36Sopenharmony_ci * above do the rest. If migration is not possible, just fail. 38962306a36Sopenharmony_ci */ 39062306a36Sopenharmony_ci if (PageCompound(page)) { 39162306a36Sopenharmony_ci struct page *head = compound_head(page); 39262306a36Sopenharmony_ci unsigned long head_pfn = page_to_pfn(head); 39362306a36Sopenharmony_ci unsigned long nr_pages = compound_nr(head); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci if (head_pfn + nr_pages <= boundary_pfn) { 39662306a36Sopenharmony_ci pfn = head_pfn + nr_pages; 39762306a36Sopenharmony_ci continue; 39862306a36Sopenharmony_ci } 39962306a36Sopenharmony_ci#if defined CONFIG_COMPACTION || defined CONFIG_CMA 40062306a36Sopenharmony_ci /* 40162306a36Sopenharmony_ci * hugetlb, lru compound (THP), and movable compound pages 40262306a36Sopenharmony_ci * can be migrated. Otherwise, fail the isolation. 40362306a36Sopenharmony_ci */ 40462306a36Sopenharmony_ci if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) { 40562306a36Sopenharmony_ci int order; 40662306a36Sopenharmony_ci unsigned long outer_pfn; 40762306a36Sopenharmony_ci int page_mt = get_pageblock_migratetype(page); 40862306a36Sopenharmony_ci bool isolate_page = !is_migrate_isolate_page(page); 40962306a36Sopenharmony_ci struct compact_control cc = { 41062306a36Sopenharmony_ci .nr_migratepages = 0, 41162306a36Sopenharmony_ci .order = -1, 41262306a36Sopenharmony_ci .zone = page_zone(pfn_to_page(head_pfn)), 41362306a36Sopenharmony_ci .mode = MIGRATE_SYNC, 41462306a36Sopenharmony_ci .ignore_skip_hint = true, 41562306a36Sopenharmony_ci .no_set_skip_hint = true, 41662306a36Sopenharmony_ci .gfp_mask = gfp_flags, 41762306a36Sopenharmony_ci .alloc_contig = true, 41862306a36Sopenharmony_ci }; 41962306a36Sopenharmony_ci INIT_LIST_HEAD(&cc.migratepages); 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci /* 42262306a36Sopenharmony_ci * XXX: mark the page as MIGRATE_ISOLATE so that 42362306a36Sopenharmony_ci * no one else can grab the freed page after migration. 42462306a36Sopenharmony_ci * Ideally, the page should be freed as two separate 42562306a36Sopenharmony_ci * pages to be added into separate migratetype free 42662306a36Sopenharmony_ci * lists. 42762306a36Sopenharmony_ci */ 42862306a36Sopenharmony_ci if (isolate_page) { 42962306a36Sopenharmony_ci ret = set_migratetype_isolate(page, page_mt, 43062306a36Sopenharmony_ci flags, head_pfn, head_pfn + nr_pages); 43162306a36Sopenharmony_ci if (ret) 43262306a36Sopenharmony_ci goto failed; 43362306a36Sopenharmony_ci } 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci ret = __alloc_contig_migrate_range(&cc, head_pfn, 43662306a36Sopenharmony_ci head_pfn + nr_pages); 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci /* 43962306a36Sopenharmony_ci * restore the page's migratetype so that it can 44062306a36Sopenharmony_ci * be split into separate migratetype free lists 44162306a36Sopenharmony_ci * later. 44262306a36Sopenharmony_ci */ 44362306a36Sopenharmony_ci if (isolate_page) 44462306a36Sopenharmony_ci unset_migratetype_isolate(page, page_mt); 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci if (ret) 44762306a36Sopenharmony_ci goto failed; 44862306a36Sopenharmony_ci /* 44962306a36Sopenharmony_ci * reset pfn to the head of the free page, so 45062306a36Sopenharmony_ci * that the free page handling code above can split 45162306a36Sopenharmony_ci * the free page to the right migratetype list. 45262306a36Sopenharmony_ci * 45362306a36Sopenharmony_ci * head_pfn is not used here as a hugetlb page order 45462306a36Sopenharmony_ci * can be bigger than MAX_ORDER, but after it is 45562306a36Sopenharmony_ci * freed, the free page order is not. Use pfn within 45662306a36Sopenharmony_ci * the range to find the head of the free page. 45762306a36Sopenharmony_ci */ 45862306a36Sopenharmony_ci order = 0; 45962306a36Sopenharmony_ci outer_pfn = pfn; 46062306a36Sopenharmony_ci while (!PageBuddy(pfn_to_page(outer_pfn))) { 46162306a36Sopenharmony_ci /* stop if we cannot find the free page */ 46262306a36Sopenharmony_ci if (++order > MAX_ORDER) 46362306a36Sopenharmony_ci goto failed; 46462306a36Sopenharmony_ci outer_pfn &= ~0UL << order; 46562306a36Sopenharmony_ci } 46662306a36Sopenharmony_ci pfn = outer_pfn; 46762306a36Sopenharmony_ci continue; 46862306a36Sopenharmony_ci } else 46962306a36Sopenharmony_ci#endif 47062306a36Sopenharmony_ci goto failed; 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci pfn++; 47462306a36Sopenharmony_ci } 47562306a36Sopenharmony_ci return 0; 47662306a36Sopenharmony_cifailed: 47762306a36Sopenharmony_ci /* restore the original migratetype */ 47862306a36Sopenharmony_ci if (!skip_isolation) 47962306a36Sopenharmony_ci unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype); 48062306a36Sopenharmony_ci return -EBUSY; 48162306a36Sopenharmony_ci} 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci/** 48462306a36Sopenharmony_ci * start_isolate_page_range() - mark page range MIGRATE_ISOLATE 48562306a36Sopenharmony_ci * @start_pfn: The first PFN of the range to be isolated. 48662306a36Sopenharmony_ci * @end_pfn: The last PFN of the range to be isolated. 48762306a36Sopenharmony_ci * @migratetype: Migrate type to set in error recovery. 48862306a36Sopenharmony_ci * @flags: The following flags are allowed (they can be combined in 48962306a36Sopenharmony_ci * a bit mask) 49062306a36Sopenharmony_ci * MEMORY_OFFLINE - isolate to offline (!allocate) memory 49162306a36Sopenharmony_ci * e.g., skip over PageHWPoison() pages 49262306a36Sopenharmony_ci * and PageOffline() pages. 49362306a36Sopenharmony_ci * REPORT_FAILURE - report details about the failure to 49462306a36Sopenharmony_ci * isolate the range 49562306a36Sopenharmony_ci * @gfp_flags: GFP flags used for migrating pages that sit across the 49662306a36Sopenharmony_ci * range boundaries. 49762306a36Sopenharmony_ci * 49862306a36Sopenharmony_ci * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in 49962306a36Sopenharmony_ci * the range will never be allocated. Any free pages and pages freed in the 50062306a36Sopenharmony_ci * future will not be allocated again. If specified range includes migrate types 50162306a36Sopenharmony_ci * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all 50262306a36Sopenharmony_ci * pages in the range finally, the caller have to free all pages in the range. 50362306a36Sopenharmony_ci * test_page_isolated() can be used for test it. 50462306a36Sopenharmony_ci * 50562306a36Sopenharmony_ci * The function first tries to isolate the pageblocks at the beginning and end 50662306a36Sopenharmony_ci * of the range, since there might be pages across the range boundaries. 50762306a36Sopenharmony_ci * Afterwards, it isolates the rest of the range. 50862306a36Sopenharmony_ci * 50962306a36Sopenharmony_ci * There is no high level synchronization mechanism that prevents two threads 51062306a36Sopenharmony_ci * from trying to isolate overlapping ranges. If this happens, one thread 51162306a36Sopenharmony_ci * will notice pageblocks in the overlapping range already set to isolate. 51262306a36Sopenharmony_ci * This happens in set_migratetype_isolate, and set_migratetype_isolate 51362306a36Sopenharmony_ci * returns an error. We then clean up by restoring the migration type on 51462306a36Sopenharmony_ci * pageblocks we may have modified and return -EBUSY to caller. This 51562306a36Sopenharmony_ci * prevents two threads from simultaneously working on overlapping ranges. 51662306a36Sopenharmony_ci * 51762306a36Sopenharmony_ci * Please note that there is no strong synchronization with the page allocator 51862306a36Sopenharmony_ci * either. Pages might be freed while their page blocks are marked ISOLATED. 51962306a36Sopenharmony_ci * A call to drain_all_pages() after isolation can flush most of them. However 52062306a36Sopenharmony_ci * in some cases pages might still end up on pcp lists and that would allow 52162306a36Sopenharmony_ci * for their allocation even when they are in fact isolated already. Depending 52262306a36Sopenharmony_ci * on how strong of a guarantee the caller needs, zone_pcp_disable/enable() 52362306a36Sopenharmony_ci * might be used to flush and disable pcplist before isolation and enable after 52462306a36Sopenharmony_ci * unisolation. 52562306a36Sopenharmony_ci * 52662306a36Sopenharmony_ci * Return: 0 on success and -EBUSY if any part of range cannot be isolated. 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_ciint start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, 52962306a36Sopenharmony_ci int migratetype, int flags, gfp_t gfp_flags) 53062306a36Sopenharmony_ci{ 53162306a36Sopenharmony_ci unsigned long pfn; 53262306a36Sopenharmony_ci struct page *page; 53362306a36Sopenharmony_ci /* isolation is done at page block granularity */ 53462306a36Sopenharmony_ci unsigned long isolate_start = pageblock_start_pfn(start_pfn); 53562306a36Sopenharmony_ci unsigned long isolate_end = pageblock_align(end_pfn); 53662306a36Sopenharmony_ci int ret; 53762306a36Sopenharmony_ci bool skip_isolation = false; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ 54062306a36Sopenharmony_ci ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, 54162306a36Sopenharmony_ci skip_isolation, migratetype); 54262306a36Sopenharmony_ci if (ret) 54362306a36Sopenharmony_ci return ret; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci if (isolate_start == isolate_end - pageblock_nr_pages) 54662306a36Sopenharmony_ci skip_isolation = true; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ 54962306a36Sopenharmony_ci ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, 55062306a36Sopenharmony_ci skip_isolation, migratetype); 55162306a36Sopenharmony_ci if (ret) { 55262306a36Sopenharmony_ci unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); 55362306a36Sopenharmony_ci return ret; 55462306a36Sopenharmony_ci } 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci /* skip isolated pageblocks at the beginning and end */ 55762306a36Sopenharmony_ci for (pfn = isolate_start + pageblock_nr_pages; 55862306a36Sopenharmony_ci pfn < isolate_end - pageblock_nr_pages; 55962306a36Sopenharmony_ci pfn += pageblock_nr_pages) { 56062306a36Sopenharmony_ci page = __first_valid_page(pfn, pageblock_nr_pages); 56162306a36Sopenharmony_ci if (page && set_migratetype_isolate(page, migratetype, flags, 56262306a36Sopenharmony_ci start_pfn, end_pfn)) { 56362306a36Sopenharmony_ci undo_isolate_page_range(isolate_start, pfn, migratetype); 56462306a36Sopenharmony_ci unset_migratetype_isolate( 56562306a36Sopenharmony_ci pfn_to_page(isolate_end - pageblock_nr_pages), 56662306a36Sopenharmony_ci migratetype); 56762306a36Sopenharmony_ci return -EBUSY; 56862306a36Sopenharmony_ci } 56962306a36Sopenharmony_ci } 57062306a36Sopenharmony_ci return 0; 57162306a36Sopenharmony_ci} 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci/** 57462306a36Sopenharmony_ci * undo_isolate_page_range - undo effects of start_isolate_page_range() 57562306a36Sopenharmony_ci * @start_pfn: The first PFN of the isolated range 57662306a36Sopenharmony_ci * @end_pfn: The last PFN of the isolated range 57762306a36Sopenharmony_ci * @migratetype: New migrate type to set on the range 57862306a36Sopenharmony_ci * 57962306a36Sopenharmony_ci * This finds every MIGRATE_ISOLATE page block in the given range 58062306a36Sopenharmony_ci * and switches it to @migratetype. 58162306a36Sopenharmony_ci */ 58262306a36Sopenharmony_civoid undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, 58362306a36Sopenharmony_ci int migratetype) 58462306a36Sopenharmony_ci{ 58562306a36Sopenharmony_ci unsigned long pfn; 58662306a36Sopenharmony_ci struct page *page; 58762306a36Sopenharmony_ci unsigned long isolate_start = pageblock_start_pfn(start_pfn); 58862306a36Sopenharmony_ci unsigned long isolate_end = pageblock_align(end_pfn); 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci for (pfn = isolate_start; 59162306a36Sopenharmony_ci pfn < isolate_end; 59262306a36Sopenharmony_ci pfn += pageblock_nr_pages) { 59362306a36Sopenharmony_ci page = __first_valid_page(pfn, pageblock_nr_pages); 59462306a36Sopenharmony_ci if (!page || !is_migrate_isolate_page(page)) 59562306a36Sopenharmony_ci continue; 59662306a36Sopenharmony_ci unset_migratetype_isolate(page, migratetype); 59762306a36Sopenharmony_ci } 59862306a36Sopenharmony_ci} 59962306a36Sopenharmony_ci/* 60062306a36Sopenharmony_ci * Test all pages in the range is free(means isolated) or not. 60162306a36Sopenharmony_ci * all pages in [start_pfn...end_pfn) must be in the same zone. 60262306a36Sopenharmony_ci * zone->lock must be held before call this. 60362306a36Sopenharmony_ci * 60462306a36Sopenharmony_ci * Returns the last tested pfn. 60562306a36Sopenharmony_ci */ 60662306a36Sopenharmony_cistatic unsigned long 60762306a36Sopenharmony_ci__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, 60862306a36Sopenharmony_ci int flags) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci struct page *page; 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci while (pfn < end_pfn) { 61362306a36Sopenharmony_ci page = pfn_to_page(pfn); 61462306a36Sopenharmony_ci if (PageBuddy(page)) 61562306a36Sopenharmony_ci /* 61662306a36Sopenharmony_ci * If the page is on a free list, it has to be on 61762306a36Sopenharmony_ci * the correct MIGRATE_ISOLATE freelist. There is no 61862306a36Sopenharmony_ci * simple way to verify that as VM_BUG_ON(), though. 61962306a36Sopenharmony_ci */ 62062306a36Sopenharmony_ci pfn += 1 << buddy_order(page); 62162306a36Sopenharmony_ci else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) 62262306a36Sopenharmony_ci /* A HWPoisoned page cannot be also PageBuddy */ 62362306a36Sopenharmony_ci pfn++; 62462306a36Sopenharmony_ci else if ((flags & MEMORY_OFFLINE) && PageOffline(page) && 62562306a36Sopenharmony_ci !page_count(page)) 62662306a36Sopenharmony_ci /* 62762306a36Sopenharmony_ci * The responsible driver agreed to skip PageOffline() 62862306a36Sopenharmony_ci * pages when offlining memory by dropping its 62962306a36Sopenharmony_ci * reference in MEM_GOING_OFFLINE. 63062306a36Sopenharmony_ci */ 63162306a36Sopenharmony_ci pfn++; 63262306a36Sopenharmony_ci else 63362306a36Sopenharmony_ci break; 63462306a36Sopenharmony_ci } 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci return pfn; 63762306a36Sopenharmony_ci} 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci/** 64062306a36Sopenharmony_ci * test_pages_isolated - check if pageblocks in range are isolated 64162306a36Sopenharmony_ci * @start_pfn: The first PFN of the isolated range 64262306a36Sopenharmony_ci * @end_pfn: The first PFN *after* the isolated range 64362306a36Sopenharmony_ci * @isol_flags: Testing mode flags 64462306a36Sopenharmony_ci * 64562306a36Sopenharmony_ci * This tests if all in the specified range are free. 64662306a36Sopenharmony_ci * 64762306a36Sopenharmony_ci * If %MEMORY_OFFLINE is specified in @flags, it will consider 64862306a36Sopenharmony_ci * poisoned and offlined pages free as well. 64962306a36Sopenharmony_ci * 65062306a36Sopenharmony_ci * Caller must ensure the requested range doesn't span zones. 65162306a36Sopenharmony_ci * 65262306a36Sopenharmony_ci * Returns 0 if true, -EBUSY if one or more pages are in use. 65362306a36Sopenharmony_ci */ 65462306a36Sopenharmony_ciint test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, 65562306a36Sopenharmony_ci int isol_flags) 65662306a36Sopenharmony_ci{ 65762306a36Sopenharmony_ci unsigned long pfn, flags; 65862306a36Sopenharmony_ci struct page *page; 65962306a36Sopenharmony_ci struct zone *zone; 66062306a36Sopenharmony_ci int ret; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci /* 66362306a36Sopenharmony_ci * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages 66462306a36Sopenharmony_ci * are not aligned to pageblock_nr_pages. 66562306a36Sopenharmony_ci * Then we just check migratetype first. 66662306a36Sopenharmony_ci */ 66762306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 66862306a36Sopenharmony_ci page = __first_valid_page(pfn, pageblock_nr_pages); 66962306a36Sopenharmony_ci if (page && !is_migrate_isolate_page(page)) 67062306a36Sopenharmony_ci break; 67162306a36Sopenharmony_ci } 67262306a36Sopenharmony_ci page = __first_valid_page(start_pfn, end_pfn - start_pfn); 67362306a36Sopenharmony_ci if ((pfn < end_pfn) || !page) { 67462306a36Sopenharmony_ci ret = -EBUSY; 67562306a36Sopenharmony_ci goto out; 67662306a36Sopenharmony_ci } 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci /* Check all pages are free or marked as ISOLATED */ 67962306a36Sopenharmony_ci zone = page_zone(page); 68062306a36Sopenharmony_ci spin_lock_irqsave(&zone->lock, flags); 68162306a36Sopenharmony_ci pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags); 68262306a36Sopenharmony_ci spin_unlock_irqrestore(&zone->lock, flags); 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci ret = pfn < end_pfn ? -EBUSY : 0; 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ciout: 68762306a36Sopenharmony_ci trace_test_pages_isolated(start_pfn, end_pfn, pfn); 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci return ret; 69062306a36Sopenharmony_ci} 691