162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/mm/compaction.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Memory compaction for the reduction of external fragmentation. Note that 662306a36Sopenharmony_ci * this heavily depends upon page migration to do all the real heavy 762306a36Sopenharmony_ci * lifting 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie> 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci#include <linux/cpu.h> 1262306a36Sopenharmony_ci#include <linux/swap.h> 1362306a36Sopenharmony_ci#include <linux/migrate.h> 1462306a36Sopenharmony_ci#include <linux/compaction.h> 1562306a36Sopenharmony_ci#include <linux/mm_inline.h> 1662306a36Sopenharmony_ci#include <linux/sched/signal.h> 1762306a36Sopenharmony_ci#include <linux/backing-dev.h> 1862306a36Sopenharmony_ci#include <linux/sysctl.h> 1962306a36Sopenharmony_ci#include <linux/sysfs.h> 2062306a36Sopenharmony_ci#include <linux/page-isolation.h> 2162306a36Sopenharmony_ci#include <linux/kasan.h> 2262306a36Sopenharmony_ci#include <linux/kthread.h> 2362306a36Sopenharmony_ci#include <linux/freezer.h> 2462306a36Sopenharmony_ci#include <linux/page_owner.h> 2562306a36Sopenharmony_ci#include <linux/psi.h> 2662306a36Sopenharmony_ci#include "internal.h" 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#ifdef CONFIG_COMPACTION 2962306a36Sopenharmony_ci/* 3062306a36Sopenharmony_ci * Fragmentation score check interval for proactive compaction purposes. 3162306a36Sopenharmony_ci */ 3262306a36Sopenharmony_ci#define HPAGE_FRAG_CHECK_INTERVAL_MSEC (500) 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_cistatic inline void count_compact_event(enum vm_event_item item) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci count_vm_event(item); 3762306a36Sopenharmony_ci} 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_cistatic inline void count_compact_events(enum vm_event_item item, long delta) 4062306a36Sopenharmony_ci{ 4162306a36Sopenharmony_ci count_vm_events(item, delta); 4262306a36Sopenharmony_ci} 4362306a36Sopenharmony_ci#else 4462306a36Sopenharmony_ci#define count_compact_event(item) do { } while (0) 4562306a36Sopenharmony_ci#define count_compact_events(item, delta) do { } while (0) 4662306a36Sopenharmony_ci#endif 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci#if defined CONFIG_COMPACTION || defined CONFIG_CMA 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#define CREATE_TRACE_POINTS 5162306a36Sopenharmony_ci#include <trace/events/compaction.h> 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci#define block_start_pfn(pfn, order) round_down(pfn, 1UL << (order)) 5462306a36Sopenharmony_ci#define block_end_pfn(pfn, order) ALIGN((pfn) + 1, 1UL << (order)) 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci/* 5762306a36Sopenharmony_ci * Page order with-respect-to which proactive compaction 5862306a36Sopenharmony_ci * calculates external fragmentation, which is used as 5962306a36Sopenharmony_ci * the "fragmentation score" of a node/zone. 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_ci#if defined CONFIG_TRANSPARENT_HUGEPAGE 6262306a36Sopenharmony_ci#define COMPACTION_HPAGE_ORDER HPAGE_PMD_ORDER 6362306a36Sopenharmony_ci#elif defined CONFIG_HUGETLBFS 6462306a36Sopenharmony_ci#define COMPACTION_HPAGE_ORDER HUGETLB_PAGE_ORDER 6562306a36Sopenharmony_ci#else 6662306a36Sopenharmony_ci#define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT) 6762306a36Sopenharmony_ci#endif 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistatic unsigned long release_freepages(struct list_head *freelist) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci struct page *page, *next; 7262306a36Sopenharmony_ci unsigned long high_pfn = 0; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci list_for_each_entry_safe(page, next, freelist, lru) { 7562306a36Sopenharmony_ci unsigned long pfn = page_to_pfn(page); 7662306a36Sopenharmony_ci list_del(&page->lru); 7762306a36Sopenharmony_ci __free_page(page); 7862306a36Sopenharmony_ci if (pfn > high_pfn) 7962306a36Sopenharmony_ci high_pfn = pfn; 8062306a36Sopenharmony_ci } 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci return high_pfn; 8362306a36Sopenharmony_ci} 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistatic void split_map_pages(struct list_head *list) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci unsigned int i, order, nr_pages; 8862306a36Sopenharmony_ci struct page *page, *next; 8962306a36Sopenharmony_ci LIST_HEAD(tmp_list); 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci list_for_each_entry_safe(page, next, list, lru) { 9262306a36Sopenharmony_ci list_del(&page->lru); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci order = page_private(page); 9562306a36Sopenharmony_ci nr_pages = 1 << order; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci post_alloc_hook(page, order, __GFP_MOVABLE); 9862306a36Sopenharmony_ci if (order) 9962306a36Sopenharmony_ci split_page(page, order); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 10262306a36Sopenharmony_ci list_add(&page->lru, &tmp_list); 10362306a36Sopenharmony_ci page++; 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci list_splice(&tmp_list, list); 10862306a36Sopenharmony_ci} 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci#ifdef CONFIG_COMPACTION 11162306a36Sopenharmony_cibool PageMovable(struct page *page) 11262306a36Sopenharmony_ci{ 11362306a36Sopenharmony_ci const struct movable_operations *mops; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 11662306a36Sopenharmony_ci if (!__PageMovable(page)) 11762306a36Sopenharmony_ci return false; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci mops = page_movable_ops(page); 12062306a36Sopenharmony_ci if (mops) 12162306a36Sopenharmony_ci return true; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci return false; 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_civoid __SetPageMovable(struct page *page, const struct movable_operations *mops) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 12962306a36Sopenharmony_ci VM_BUG_ON_PAGE((unsigned long)mops & PAGE_MAPPING_MOVABLE, page); 13062306a36Sopenharmony_ci page->mapping = (void *)((unsigned long)mops | PAGE_MAPPING_MOVABLE); 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ciEXPORT_SYMBOL(__SetPageMovable); 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_civoid __ClearPageMovable(struct page *page) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci VM_BUG_ON_PAGE(!PageMovable(page), page); 13762306a36Sopenharmony_ci /* 13862306a36Sopenharmony_ci * This page still has the type of a movable page, but it's 13962306a36Sopenharmony_ci * actually not movable any more. 14062306a36Sopenharmony_ci */ 14162306a36Sopenharmony_ci page->mapping = (void *)PAGE_MAPPING_MOVABLE; 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ciEXPORT_SYMBOL(__ClearPageMovable); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci/* Do not skip compaction more than 64 times */ 14662306a36Sopenharmony_ci#define COMPACT_MAX_DEFER_SHIFT 6 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci/* 14962306a36Sopenharmony_ci * Compaction is deferred when compaction fails to result in a page 15062306a36Sopenharmony_ci * allocation success. 1 << compact_defer_shift, compactions are skipped up 15162306a36Sopenharmony_ci * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT 15262306a36Sopenharmony_ci */ 15362306a36Sopenharmony_cistatic void defer_compaction(struct zone *zone, int order) 15462306a36Sopenharmony_ci{ 15562306a36Sopenharmony_ci zone->compact_considered = 0; 15662306a36Sopenharmony_ci zone->compact_defer_shift++; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci if (order < zone->compact_order_failed) 15962306a36Sopenharmony_ci zone->compact_order_failed = order; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT) 16262306a36Sopenharmony_ci zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci trace_mm_compaction_defer_compaction(zone, order); 16562306a36Sopenharmony_ci} 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci/* Returns true if compaction should be skipped this time */ 16862306a36Sopenharmony_cistatic bool compaction_deferred(struct zone *zone, int order) 16962306a36Sopenharmony_ci{ 17062306a36Sopenharmony_ci unsigned long defer_limit = 1UL << zone->compact_defer_shift; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci if (order < zone->compact_order_failed) 17362306a36Sopenharmony_ci return false; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci /* Avoid possible overflow */ 17662306a36Sopenharmony_ci if (++zone->compact_considered >= defer_limit) { 17762306a36Sopenharmony_ci zone->compact_considered = defer_limit; 17862306a36Sopenharmony_ci return false; 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci trace_mm_compaction_deferred(zone, order); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci return true; 18462306a36Sopenharmony_ci} 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci/* 18762306a36Sopenharmony_ci * Update defer tracking counters after successful compaction of given order, 18862306a36Sopenharmony_ci * which means an allocation either succeeded (alloc_success == true) or is 18962306a36Sopenharmony_ci * expected to succeed. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_civoid compaction_defer_reset(struct zone *zone, int order, 19262306a36Sopenharmony_ci bool alloc_success) 19362306a36Sopenharmony_ci{ 19462306a36Sopenharmony_ci if (alloc_success) { 19562306a36Sopenharmony_ci zone->compact_considered = 0; 19662306a36Sopenharmony_ci zone->compact_defer_shift = 0; 19762306a36Sopenharmony_ci } 19862306a36Sopenharmony_ci if (order >= zone->compact_order_failed) 19962306a36Sopenharmony_ci zone->compact_order_failed = order + 1; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci trace_mm_compaction_defer_reset(zone, order); 20262306a36Sopenharmony_ci} 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci/* Returns true if restarting compaction after many failures */ 20562306a36Sopenharmony_cistatic bool compaction_restarting(struct zone *zone, int order) 20662306a36Sopenharmony_ci{ 20762306a36Sopenharmony_ci if (order < zone->compact_order_failed) 20862306a36Sopenharmony_ci return false; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT && 21162306a36Sopenharmony_ci zone->compact_considered >= 1UL << zone->compact_defer_shift; 21262306a36Sopenharmony_ci} 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci/* Returns true if the pageblock should be scanned for pages to isolate. */ 21562306a36Sopenharmony_cistatic inline bool isolation_suitable(struct compact_control *cc, 21662306a36Sopenharmony_ci struct page *page) 21762306a36Sopenharmony_ci{ 21862306a36Sopenharmony_ci if (cc->ignore_skip_hint) 21962306a36Sopenharmony_ci return true; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci return !get_pageblock_skip(page); 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic void reset_cached_positions(struct zone *zone) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; 22762306a36Sopenharmony_ci zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; 22862306a36Sopenharmony_ci zone->compact_cached_free_pfn = 22962306a36Sopenharmony_ci pageblock_start_pfn(zone_end_pfn(zone) - 1); 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM 23362306a36Sopenharmony_ci/* 23462306a36Sopenharmony_ci * If the PFN falls into an offline section, return the start PFN of the 23562306a36Sopenharmony_ci * next online section. If the PFN falls into an online section or if 23662306a36Sopenharmony_ci * there is no next online section, return 0. 23762306a36Sopenharmony_ci */ 23862306a36Sopenharmony_cistatic unsigned long skip_offline_sections(unsigned long start_pfn) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci unsigned long start_nr = pfn_to_section_nr(start_pfn); 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci if (online_section_nr(start_nr)) 24362306a36Sopenharmony_ci return 0; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci while (++start_nr <= __highest_present_section_nr) { 24662306a36Sopenharmony_ci if (online_section_nr(start_nr)) 24762306a36Sopenharmony_ci return section_nr_to_pfn(start_nr); 24862306a36Sopenharmony_ci } 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci return 0; 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci/* 25462306a36Sopenharmony_ci * If the PFN falls into an offline section, return the end PFN of the 25562306a36Sopenharmony_ci * next online section in reverse. If the PFN falls into an online section 25662306a36Sopenharmony_ci * or if there is no next online section in reverse, return 0. 25762306a36Sopenharmony_ci */ 25862306a36Sopenharmony_cistatic unsigned long skip_offline_sections_reverse(unsigned long start_pfn) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci unsigned long start_nr = pfn_to_section_nr(start_pfn); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci if (!start_nr || online_section_nr(start_nr)) 26362306a36Sopenharmony_ci return 0; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci while (start_nr-- > 0) { 26662306a36Sopenharmony_ci if (online_section_nr(start_nr)) 26762306a36Sopenharmony_ci return section_nr_to_pfn(start_nr) + PAGES_PER_SECTION; 26862306a36Sopenharmony_ci } 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci return 0; 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ci#else 27362306a36Sopenharmony_cistatic unsigned long skip_offline_sections(unsigned long start_pfn) 27462306a36Sopenharmony_ci{ 27562306a36Sopenharmony_ci return 0; 27662306a36Sopenharmony_ci} 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_cistatic unsigned long skip_offline_sections_reverse(unsigned long start_pfn) 27962306a36Sopenharmony_ci{ 28062306a36Sopenharmony_ci return 0; 28162306a36Sopenharmony_ci} 28262306a36Sopenharmony_ci#endif 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci/* 28562306a36Sopenharmony_ci * Compound pages of >= pageblock_order should consistently be skipped until 28662306a36Sopenharmony_ci * released. It is always pointless to compact pages of such order (if they are 28762306a36Sopenharmony_ci * migratable), and the pageblocks they occupy cannot contain any free pages. 28862306a36Sopenharmony_ci */ 28962306a36Sopenharmony_cistatic bool pageblock_skip_persistent(struct page *page) 29062306a36Sopenharmony_ci{ 29162306a36Sopenharmony_ci if (!PageCompound(page)) 29262306a36Sopenharmony_ci return false; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci page = compound_head(page); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci if (compound_order(page) >= pageblock_order) 29762306a36Sopenharmony_ci return true; 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci return false; 30062306a36Sopenharmony_ci} 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_cistatic bool 30362306a36Sopenharmony_ci__reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, 30462306a36Sopenharmony_ci bool check_target) 30562306a36Sopenharmony_ci{ 30662306a36Sopenharmony_ci struct page *page = pfn_to_online_page(pfn); 30762306a36Sopenharmony_ci struct page *block_page; 30862306a36Sopenharmony_ci struct page *end_page; 30962306a36Sopenharmony_ci unsigned long block_pfn; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci if (!page) 31262306a36Sopenharmony_ci return false; 31362306a36Sopenharmony_ci if (zone != page_zone(page)) 31462306a36Sopenharmony_ci return false; 31562306a36Sopenharmony_ci if (pageblock_skip_persistent(page)) 31662306a36Sopenharmony_ci return false; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci /* 31962306a36Sopenharmony_ci * If skip is already cleared do no further checking once the 32062306a36Sopenharmony_ci * restart points have been set. 32162306a36Sopenharmony_ci */ 32262306a36Sopenharmony_ci if (check_source && check_target && !get_pageblock_skip(page)) 32362306a36Sopenharmony_ci return true; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci /* 32662306a36Sopenharmony_ci * If clearing skip for the target scanner, do not select a 32762306a36Sopenharmony_ci * non-movable pageblock as the starting point. 32862306a36Sopenharmony_ci */ 32962306a36Sopenharmony_ci if (!check_source && check_target && 33062306a36Sopenharmony_ci get_pageblock_migratetype(page) != MIGRATE_MOVABLE) 33162306a36Sopenharmony_ci return false; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci /* Ensure the start of the pageblock or zone is online and valid */ 33462306a36Sopenharmony_ci block_pfn = pageblock_start_pfn(pfn); 33562306a36Sopenharmony_ci block_pfn = max(block_pfn, zone->zone_start_pfn); 33662306a36Sopenharmony_ci block_page = pfn_to_online_page(block_pfn); 33762306a36Sopenharmony_ci if (block_page) { 33862306a36Sopenharmony_ci page = block_page; 33962306a36Sopenharmony_ci pfn = block_pfn; 34062306a36Sopenharmony_ci } 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci /* Ensure the end of the pageblock or zone is online and valid */ 34362306a36Sopenharmony_ci block_pfn = pageblock_end_pfn(pfn) - 1; 34462306a36Sopenharmony_ci block_pfn = min(block_pfn, zone_end_pfn(zone) - 1); 34562306a36Sopenharmony_ci end_page = pfn_to_online_page(block_pfn); 34662306a36Sopenharmony_ci if (!end_page) 34762306a36Sopenharmony_ci return false; 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci /* 35062306a36Sopenharmony_ci * Only clear the hint if a sample indicates there is either a 35162306a36Sopenharmony_ci * free page or an LRU page in the block. One or other condition 35262306a36Sopenharmony_ci * is necessary for the block to be a migration source/target. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci do { 35562306a36Sopenharmony_ci if (check_source && PageLRU(page)) { 35662306a36Sopenharmony_ci clear_pageblock_skip(page); 35762306a36Sopenharmony_ci return true; 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci if (check_target && PageBuddy(page)) { 36162306a36Sopenharmony_ci clear_pageblock_skip(page); 36262306a36Sopenharmony_ci return true; 36362306a36Sopenharmony_ci } 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci page += (1 << PAGE_ALLOC_COSTLY_ORDER); 36662306a36Sopenharmony_ci } while (page <= end_page); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci return false; 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci/* 37262306a36Sopenharmony_ci * This function is called to clear all cached information on pageblocks that 37362306a36Sopenharmony_ci * should be skipped for page isolation when the migrate and free page scanner 37462306a36Sopenharmony_ci * meet. 37562306a36Sopenharmony_ci */ 37662306a36Sopenharmony_cistatic void __reset_isolation_suitable(struct zone *zone) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci unsigned long migrate_pfn = zone->zone_start_pfn; 37962306a36Sopenharmony_ci unsigned long free_pfn = zone_end_pfn(zone) - 1; 38062306a36Sopenharmony_ci unsigned long reset_migrate = free_pfn; 38162306a36Sopenharmony_ci unsigned long reset_free = migrate_pfn; 38262306a36Sopenharmony_ci bool source_set = false; 38362306a36Sopenharmony_ci bool free_set = false; 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci if (!zone->compact_blockskip_flush) 38662306a36Sopenharmony_ci return; 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci zone->compact_blockskip_flush = false; 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci /* 39162306a36Sopenharmony_ci * Walk the zone and update pageblock skip information. Source looks 39262306a36Sopenharmony_ci * for PageLRU while target looks for PageBuddy. When the scanner 39362306a36Sopenharmony_ci * is found, both PageBuddy and PageLRU are checked as the pageblock 39462306a36Sopenharmony_ci * is suitable as both source and target. 39562306a36Sopenharmony_ci */ 39662306a36Sopenharmony_ci for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages, 39762306a36Sopenharmony_ci free_pfn -= pageblock_nr_pages) { 39862306a36Sopenharmony_ci cond_resched(); 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci /* Update the migrate PFN */ 40162306a36Sopenharmony_ci if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) && 40262306a36Sopenharmony_ci migrate_pfn < reset_migrate) { 40362306a36Sopenharmony_ci source_set = true; 40462306a36Sopenharmony_ci reset_migrate = migrate_pfn; 40562306a36Sopenharmony_ci zone->compact_init_migrate_pfn = reset_migrate; 40662306a36Sopenharmony_ci zone->compact_cached_migrate_pfn[0] = reset_migrate; 40762306a36Sopenharmony_ci zone->compact_cached_migrate_pfn[1] = reset_migrate; 40862306a36Sopenharmony_ci } 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci /* Update the free PFN */ 41162306a36Sopenharmony_ci if (__reset_isolation_pfn(zone, free_pfn, free_set, true) && 41262306a36Sopenharmony_ci free_pfn > reset_free) { 41362306a36Sopenharmony_ci free_set = true; 41462306a36Sopenharmony_ci reset_free = free_pfn; 41562306a36Sopenharmony_ci zone->compact_init_free_pfn = reset_free; 41662306a36Sopenharmony_ci zone->compact_cached_free_pfn = reset_free; 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci /* Leave no distance if no suitable block was reset */ 42162306a36Sopenharmony_ci if (reset_migrate >= reset_free) { 42262306a36Sopenharmony_ci zone->compact_cached_migrate_pfn[0] = migrate_pfn; 42362306a36Sopenharmony_ci zone->compact_cached_migrate_pfn[1] = migrate_pfn; 42462306a36Sopenharmony_ci zone->compact_cached_free_pfn = free_pfn; 42562306a36Sopenharmony_ci } 42662306a36Sopenharmony_ci} 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_civoid reset_isolation_suitable(pg_data_t *pgdat) 42962306a36Sopenharmony_ci{ 43062306a36Sopenharmony_ci int zoneid; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 43362306a36Sopenharmony_ci struct zone *zone = &pgdat->node_zones[zoneid]; 43462306a36Sopenharmony_ci if (!populated_zone(zone)) 43562306a36Sopenharmony_ci continue; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci /* Only flush if a full compaction finished recently */ 43862306a36Sopenharmony_ci if (zone->compact_blockskip_flush) 43962306a36Sopenharmony_ci __reset_isolation_suitable(zone); 44062306a36Sopenharmony_ci } 44162306a36Sopenharmony_ci} 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci/* 44462306a36Sopenharmony_ci * Sets the pageblock skip bit if it was clear. Note that this is a hint as 44562306a36Sopenharmony_ci * locks are not required for read/writers. Returns true if it was already set. 44662306a36Sopenharmony_ci */ 44762306a36Sopenharmony_cistatic bool test_and_set_skip(struct compact_control *cc, struct page *page) 44862306a36Sopenharmony_ci{ 44962306a36Sopenharmony_ci bool skip; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci /* Do not update if skip hint is being ignored */ 45262306a36Sopenharmony_ci if (cc->ignore_skip_hint) 45362306a36Sopenharmony_ci return false; 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci skip = get_pageblock_skip(page); 45662306a36Sopenharmony_ci if (!skip && !cc->no_set_skip_hint) 45762306a36Sopenharmony_ci set_pageblock_skip(page); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci return skip; 46062306a36Sopenharmony_ci} 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_cistatic void update_cached_migrate(struct compact_control *cc, unsigned long pfn) 46362306a36Sopenharmony_ci{ 46462306a36Sopenharmony_ci struct zone *zone = cc->zone; 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci /* Set for isolation rather than compaction */ 46762306a36Sopenharmony_ci if (cc->no_set_skip_hint) 46862306a36Sopenharmony_ci return; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci pfn = pageblock_end_pfn(pfn); 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci /* Update where async and sync compaction should restart */ 47362306a36Sopenharmony_ci if (pfn > zone->compact_cached_migrate_pfn[0]) 47462306a36Sopenharmony_ci zone->compact_cached_migrate_pfn[0] = pfn; 47562306a36Sopenharmony_ci if (cc->mode != MIGRATE_ASYNC && 47662306a36Sopenharmony_ci pfn > zone->compact_cached_migrate_pfn[1]) 47762306a36Sopenharmony_ci zone->compact_cached_migrate_pfn[1] = pfn; 47862306a36Sopenharmony_ci} 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci/* 48162306a36Sopenharmony_ci * If no pages were isolated then mark this pageblock to be skipped in the 48262306a36Sopenharmony_ci * future. The information is later cleared by __reset_isolation_suitable(). 48362306a36Sopenharmony_ci */ 48462306a36Sopenharmony_cistatic void update_pageblock_skip(struct compact_control *cc, 48562306a36Sopenharmony_ci struct page *page, unsigned long pfn) 48662306a36Sopenharmony_ci{ 48762306a36Sopenharmony_ci struct zone *zone = cc->zone; 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci if (cc->no_set_skip_hint) 49062306a36Sopenharmony_ci return; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci set_pageblock_skip(page); 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci if (pfn < zone->compact_cached_free_pfn) 49562306a36Sopenharmony_ci zone->compact_cached_free_pfn = pfn; 49662306a36Sopenharmony_ci} 49762306a36Sopenharmony_ci#else 49862306a36Sopenharmony_cistatic inline bool isolation_suitable(struct compact_control *cc, 49962306a36Sopenharmony_ci struct page *page) 50062306a36Sopenharmony_ci{ 50162306a36Sopenharmony_ci return true; 50262306a36Sopenharmony_ci} 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_cistatic inline bool pageblock_skip_persistent(struct page *page) 50562306a36Sopenharmony_ci{ 50662306a36Sopenharmony_ci return false; 50762306a36Sopenharmony_ci} 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_cistatic inline void update_pageblock_skip(struct compact_control *cc, 51062306a36Sopenharmony_ci struct page *page, unsigned long pfn) 51162306a36Sopenharmony_ci{ 51262306a36Sopenharmony_ci} 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_cistatic void update_cached_migrate(struct compact_control *cc, unsigned long pfn) 51562306a36Sopenharmony_ci{ 51662306a36Sopenharmony_ci} 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_cistatic bool test_and_set_skip(struct compact_control *cc, struct page *page) 51962306a36Sopenharmony_ci{ 52062306a36Sopenharmony_ci return false; 52162306a36Sopenharmony_ci} 52262306a36Sopenharmony_ci#endif /* CONFIG_COMPACTION */ 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci/* 52562306a36Sopenharmony_ci * Compaction requires the taking of some coarse locks that are potentially 52662306a36Sopenharmony_ci * very heavily contended. For async compaction, trylock and record if the 52762306a36Sopenharmony_ci * lock is contended. The lock will still be acquired but compaction will 52862306a36Sopenharmony_ci * abort when the current block is finished regardless of success rate. 52962306a36Sopenharmony_ci * Sync compaction acquires the lock. 53062306a36Sopenharmony_ci * 53162306a36Sopenharmony_ci * Always returns true which makes it easier to track lock state in callers. 53262306a36Sopenharmony_ci */ 53362306a36Sopenharmony_cistatic bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags, 53462306a36Sopenharmony_ci struct compact_control *cc) 53562306a36Sopenharmony_ci __acquires(lock) 53662306a36Sopenharmony_ci{ 53762306a36Sopenharmony_ci /* Track if the lock is contended in async mode */ 53862306a36Sopenharmony_ci if (cc->mode == MIGRATE_ASYNC && !cc->contended) { 53962306a36Sopenharmony_ci if (spin_trylock_irqsave(lock, *flags)) 54062306a36Sopenharmony_ci return true; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci cc->contended = true; 54362306a36Sopenharmony_ci } 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci spin_lock_irqsave(lock, *flags); 54662306a36Sopenharmony_ci return true; 54762306a36Sopenharmony_ci} 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci/* 55062306a36Sopenharmony_ci * Compaction requires the taking of some coarse locks that are potentially 55162306a36Sopenharmony_ci * very heavily contended. The lock should be periodically unlocked to avoid 55262306a36Sopenharmony_ci * having disabled IRQs for a long time, even when there is nobody waiting on 55362306a36Sopenharmony_ci * the lock. It might also be that allowing the IRQs will result in 55462306a36Sopenharmony_ci * need_resched() becoming true. If scheduling is needed, compaction schedules. 55562306a36Sopenharmony_ci * Either compaction type will also abort if a fatal signal is pending. 55662306a36Sopenharmony_ci * In either case if the lock was locked, it is dropped and not regained. 55762306a36Sopenharmony_ci * 55862306a36Sopenharmony_ci * Returns true if compaction should abort due to fatal signal pending. 55962306a36Sopenharmony_ci * Returns false when compaction can continue. 56062306a36Sopenharmony_ci */ 56162306a36Sopenharmony_cistatic bool compact_unlock_should_abort(spinlock_t *lock, 56262306a36Sopenharmony_ci unsigned long flags, bool *locked, struct compact_control *cc) 56362306a36Sopenharmony_ci{ 56462306a36Sopenharmony_ci if (*locked) { 56562306a36Sopenharmony_ci spin_unlock_irqrestore(lock, flags); 56662306a36Sopenharmony_ci *locked = false; 56762306a36Sopenharmony_ci } 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci if (fatal_signal_pending(current)) { 57062306a36Sopenharmony_ci cc->contended = true; 57162306a36Sopenharmony_ci return true; 57262306a36Sopenharmony_ci } 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci cond_resched(); 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci return false; 57762306a36Sopenharmony_ci} 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci/* 58062306a36Sopenharmony_ci * Isolate free pages onto a private freelist. If @strict is true, will abort 58162306a36Sopenharmony_ci * returning 0 on any invalid PFNs or non-free pages inside of the pageblock 58262306a36Sopenharmony_ci * (even though it may still end up isolating some pages). 58362306a36Sopenharmony_ci */ 58462306a36Sopenharmony_cistatic unsigned long isolate_freepages_block(struct compact_control *cc, 58562306a36Sopenharmony_ci unsigned long *start_pfn, 58662306a36Sopenharmony_ci unsigned long end_pfn, 58762306a36Sopenharmony_ci struct list_head *freelist, 58862306a36Sopenharmony_ci unsigned int stride, 58962306a36Sopenharmony_ci bool strict) 59062306a36Sopenharmony_ci{ 59162306a36Sopenharmony_ci int nr_scanned = 0, total_isolated = 0; 59262306a36Sopenharmony_ci struct page *page; 59362306a36Sopenharmony_ci unsigned long flags = 0; 59462306a36Sopenharmony_ci bool locked = false; 59562306a36Sopenharmony_ci unsigned long blockpfn = *start_pfn; 59662306a36Sopenharmony_ci unsigned int order; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci /* Strict mode is for isolation, speed is secondary */ 59962306a36Sopenharmony_ci if (strict) 60062306a36Sopenharmony_ci stride = 1; 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci page = pfn_to_page(blockpfn); 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci /* Isolate free pages. */ 60562306a36Sopenharmony_ci for (; blockpfn < end_pfn; blockpfn += stride, page += stride) { 60662306a36Sopenharmony_ci int isolated; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci /* 60962306a36Sopenharmony_ci * Periodically drop the lock (if held) regardless of its 61062306a36Sopenharmony_ci * contention, to give chance to IRQs. Abort if fatal signal 61162306a36Sopenharmony_ci * pending. 61262306a36Sopenharmony_ci */ 61362306a36Sopenharmony_ci if (!(blockpfn % COMPACT_CLUSTER_MAX) 61462306a36Sopenharmony_ci && compact_unlock_should_abort(&cc->zone->lock, flags, 61562306a36Sopenharmony_ci &locked, cc)) 61662306a36Sopenharmony_ci break; 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci nr_scanned++; 61962306a36Sopenharmony_ci 62062306a36Sopenharmony_ci /* 62162306a36Sopenharmony_ci * For compound pages such as THP and hugetlbfs, we can save 62262306a36Sopenharmony_ci * potentially a lot of iterations if we skip them at once. 62362306a36Sopenharmony_ci * The check is racy, but we can consider only valid values 62462306a36Sopenharmony_ci * and the only danger is skipping too much. 62562306a36Sopenharmony_ci */ 62662306a36Sopenharmony_ci if (PageCompound(page)) { 62762306a36Sopenharmony_ci const unsigned int order = compound_order(page); 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci if (likely(order <= MAX_ORDER)) { 63062306a36Sopenharmony_ci blockpfn += (1UL << order) - 1; 63162306a36Sopenharmony_ci page += (1UL << order) - 1; 63262306a36Sopenharmony_ci nr_scanned += (1UL << order) - 1; 63362306a36Sopenharmony_ci } 63462306a36Sopenharmony_ci goto isolate_fail; 63562306a36Sopenharmony_ci } 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci if (!PageBuddy(page)) 63862306a36Sopenharmony_ci goto isolate_fail; 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci /* If we already hold the lock, we can skip some rechecking. */ 64162306a36Sopenharmony_ci if (!locked) { 64262306a36Sopenharmony_ci locked = compact_lock_irqsave(&cc->zone->lock, 64362306a36Sopenharmony_ci &flags, cc); 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci /* Recheck this is a buddy page under lock */ 64662306a36Sopenharmony_ci if (!PageBuddy(page)) 64762306a36Sopenharmony_ci goto isolate_fail; 64862306a36Sopenharmony_ci } 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci /* Found a free page, will break it into order-0 pages */ 65162306a36Sopenharmony_ci order = buddy_order(page); 65262306a36Sopenharmony_ci isolated = __isolate_free_page(page, order); 65362306a36Sopenharmony_ci if (!isolated) 65462306a36Sopenharmony_ci break; 65562306a36Sopenharmony_ci set_page_private(page, order); 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci nr_scanned += isolated - 1; 65862306a36Sopenharmony_ci total_isolated += isolated; 65962306a36Sopenharmony_ci cc->nr_freepages += isolated; 66062306a36Sopenharmony_ci list_add_tail(&page->lru, freelist); 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci if (!strict && cc->nr_migratepages <= cc->nr_freepages) { 66362306a36Sopenharmony_ci blockpfn += isolated; 66462306a36Sopenharmony_ci break; 66562306a36Sopenharmony_ci } 66662306a36Sopenharmony_ci /* Advance to the end of split page */ 66762306a36Sopenharmony_ci blockpfn += isolated - 1; 66862306a36Sopenharmony_ci page += isolated - 1; 66962306a36Sopenharmony_ci continue; 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ciisolate_fail: 67262306a36Sopenharmony_ci if (strict) 67362306a36Sopenharmony_ci break; 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci } 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci if (locked) 67862306a36Sopenharmony_ci spin_unlock_irqrestore(&cc->zone->lock, flags); 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci /* 68162306a36Sopenharmony_ci * There is a tiny chance that we have read bogus compound_order(), 68262306a36Sopenharmony_ci * so be careful to not go outside of the pageblock. 68362306a36Sopenharmony_ci */ 68462306a36Sopenharmony_ci if (unlikely(blockpfn > end_pfn)) 68562306a36Sopenharmony_ci blockpfn = end_pfn; 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn, 68862306a36Sopenharmony_ci nr_scanned, total_isolated); 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci /* Record how far we have got within the block */ 69162306a36Sopenharmony_ci *start_pfn = blockpfn; 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci /* 69462306a36Sopenharmony_ci * If strict isolation is requested by CMA then check that all the 69562306a36Sopenharmony_ci * pages requested were isolated. If there were any failures, 0 is 69662306a36Sopenharmony_ci * returned and CMA will fail. 69762306a36Sopenharmony_ci */ 69862306a36Sopenharmony_ci if (strict && blockpfn < end_pfn) 69962306a36Sopenharmony_ci total_isolated = 0; 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci cc->total_free_scanned += nr_scanned; 70262306a36Sopenharmony_ci if (total_isolated) 70362306a36Sopenharmony_ci count_compact_events(COMPACTISOLATED, total_isolated); 70462306a36Sopenharmony_ci return total_isolated; 70562306a36Sopenharmony_ci} 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci/** 70862306a36Sopenharmony_ci * isolate_freepages_range() - isolate free pages. 70962306a36Sopenharmony_ci * @cc: Compaction control structure. 71062306a36Sopenharmony_ci * @start_pfn: The first PFN to start isolating. 71162306a36Sopenharmony_ci * @end_pfn: The one-past-last PFN. 71262306a36Sopenharmony_ci * 71362306a36Sopenharmony_ci * Non-free pages, invalid PFNs, or zone boundaries within the 71462306a36Sopenharmony_ci * [start_pfn, end_pfn) range are considered errors, cause function to 71562306a36Sopenharmony_ci * undo its actions and return zero. 71662306a36Sopenharmony_ci * 71762306a36Sopenharmony_ci * Otherwise, function returns one-past-the-last PFN of isolated page 71862306a36Sopenharmony_ci * (which may be greater then end_pfn if end fell in a middle of 71962306a36Sopenharmony_ci * a free page). 72062306a36Sopenharmony_ci */ 72162306a36Sopenharmony_ciunsigned long 72262306a36Sopenharmony_ciisolate_freepages_range(struct compact_control *cc, 72362306a36Sopenharmony_ci unsigned long start_pfn, unsigned long end_pfn) 72462306a36Sopenharmony_ci{ 72562306a36Sopenharmony_ci unsigned long isolated, pfn, block_start_pfn, block_end_pfn; 72662306a36Sopenharmony_ci LIST_HEAD(freelist); 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci pfn = start_pfn; 72962306a36Sopenharmony_ci block_start_pfn = pageblock_start_pfn(pfn); 73062306a36Sopenharmony_ci if (block_start_pfn < cc->zone->zone_start_pfn) 73162306a36Sopenharmony_ci block_start_pfn = cc->zone->zone_start_pfn; 73262306a36Sopenharmony_ci block_end_pfn = pageblock_end_pfn(pfn); 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci for (; pfn < end_pfn; pfn += isolated, 73562306a36Sopenharmony_ci block_start_pfn = block_end_pfn, 73662306a36Sopenharmony_ci block_end_pfn += pageblock_nr_pages) { 73762306a36Sopenharmony_ci /* Protect pfn from changing by isolate_freepages_block */ 73862306a36Sopenharmony_ci unsigned long isolate_start_pfn = pfn; 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci /* 74162306a36Sopenharmony_ci * pfn could pass the block_end_pfn if isolated freepage 74262306a36Sopenharmony_ci * is more than pageblock order. In this case, we adjust 74362306a36Sopenharmony_ci * scanning range to right one. 74462306a36Sopenharmony_ci */ 74562306a36Sopenharmony_ci if (pfn >= block_end_pfn) { 74662306a36Sopenharmony_ci block_start_pfn = pageblock_start_pfn(pfn); 74762306a36Sopenharmony_ci block_end_pfn = pageblock_end_pfn(pfn); 74862306a36Sopenharmony_ci } 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci block_end_pfn = min(block_end_pfn, end_pfn); 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci if (!pageblock_pfn_to_page(block_start_pfn, 75362306a36Sopenharmony_ci block_end_pfn, cc->zone)) 75462306a36Sopenharmony_ci break; 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci isolated = isolate_freepages_block(cc, &isolate_start_pfn, 75762306a36Sopenharmony_ci block_end_pfn, &freelist, 0, true); 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci /* 76062306a36Sopenharmony_ci * In strict mode, isolate_freepages_block() returns 0 if 76162306a36Sopenharmony_ci * there are any holes in the block (ie. invalid PFNs or 76262306a36Sopenharmony_ci * non-free pages). 76362306a36Sopenharmony_ci */ 76462306a36Sopenharmony_ci if (!isolated) 76562306a36Sopenharmony_ci break; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci /* 76862306a36Sopenharmony_ci * If we managed to isolate pages, it is always (1 << n) * 76962306a36Sopenharmony_ci * pageblock_nr_pages for some non-negative n. (Max order 77062306a36Sopenharmony_ci * page may span two pageblocks). 77162306a36Sopenharmony_ci */ 77262306a36Sopenharmony_ci } 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci /* __isolate_free_page() does not map the pages */ 77562306a36Sopenharmony_ci split_map_pages(&freelist); 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci if (pfn < end_pfn) { 77862306a36Sopenharmony_ci /* Loop terminated early, cleanup. */ 77962306a36Sopenharmony_ci release_freepages(&freelist); 78062306a36Sopenharmony_ci return 0; 78162306a36Sopenharmony_ci } 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci /* We don't use freelists for anything. */ 78462306a36Sopenharmony_ci return pfn; 78562306a36Sopenharmony_ci} 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci/* Similar to reclaim, but different enough that they don't share logic */ 78862306a36Sopenharmony_cistatic bool too_many_isolated(struct compact_control *cc) 78962306a36Sopenharmony_ci{ 79062306a36Sopenharmony_ci pg_data_t *pgdat = cc->zone->zone_pgdat; 79162306a36Sopenharmony_ci bool too_many; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci unsigned long active, inactive, isolated; 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci inactive = node_page_state(pgdat, NR_INACTIVE_FILE) + 79662306a36Sopenharmony_ci node_page_state(pgdat, NR_INACTIVE_ANON); 79762306a36Sopenharmony_ci active = node_page_state(pgdat, NR_ACTIVE_FILE) + 79862306a36Sopenharmony_ci node_page_state(pgdat, NR_ACTIVE_ANON); 79962306a36Sopenharmony_ci isolated = node_page_state(pgdat, NR_ISOLATED_FILE) + 80062306a36Sopenharmony_ci node_page_state(pgdat, NR_ISOLATED_ANON); 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci /* 80362306a36Sopenharmony_ci * Allow GFP_NOFS to isolate past the limit set for regular 80462306a36Sopenharmony_ci * compaction runs. This prevents an ABBA deadlock when other 80562306a36Sopenharmony_ci * compactors have already isolated to the limit, but are 80662306a36Sopenharmony_ci * blocked on filesystem locks held by the GFP_NOFS thread. 80762306a36Sopenharmony_ci */ 80862306a36Sopenharmony_ci if (cc->gfp_mask & __GFP_FS) { 80962306a36Sopenharmony_ci inactive >>= 3; 81062306a36Sopenharmony_ci active >>= 3; 81162306a36Sopenharmony_ci } 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci too_many = isolated > (inactive + active) / 2; 81462306a36Sopenharmony_ci if (!too_many) 81562306a36Sopenharmony_ci wake_throttle_isolated(pgdat); 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci return too_many; 81862306a36Sopenharmony_ci} 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci/** 82162306a36Sopenharmony_ci * isolate_migratepages_block() - isolate all migrate-able pages within 82262306a36Sopenharmony_ci * a single pageblock 82362306a36Sopenharmony_ci * @cc: Compaction control structure. 82462306a36Sopenharmony_ci * @low_pfn: The first PFN to isolate 82562306a36Sopenharmony_ci * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock 82662306a36Sopenharmony_ci * @mode: Isolation mode to be used. 82762306a36Sopenharmony_ci * 82862306a36Sopenharmony_ci * Isolate all pages that can be migrated from the range specified by 82962306a36Sopenharmony_ci * [low_pfn, end_pfn). The range is expected to be within same pageblock. 83062306a36Sopenharmony_ci * Returns errno, like -EAGAIN or -EINTR in case e.g signal pending or congestion, 83162306a36Sopenharmony_ci * -ENOMEM in case we could not allocate a page, or 0. 83262306a36Sopenharmony_ci * cc->migrate_pfn will contain the next pfn to scan. 83362306a36Sopenharmony_ci * 83462306a36Sopenharmony_ci * The pages are isolated on cc->migratepages list (not required to be empty), 83562306a36Sopenharmony_ci * and cc->nr_migratepages is updated accordingly. 83662306a36Sopenharmony_ci */ 83762306a36Sopenharmony_cistatic int 83862306a36Sopenharmony_ciisolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, 83962306a36Sopenharmony_ci unsigned long end_pfn, isolate_mode_t mode) 84062306a36Sopenharmony_ci{ 84162306a36Sopenharmony_ci pg_data_t *pgdat = cc->zone->zone_pgdat; 84262306a36Sopenharmony_ci unsigned long nr_scanned = 0, nr_isolated = 0; 84362306a36Sopenharmony_ci struct lruvec *lruvec; 84462306a36Sopenharmony_ci unsigned long flags = 0; 84562306a36Sopenharmony_ci struct lruvec *locked = NULL; 84662306a36Sopenharmony_ci struct folio *folio = NULL; 84762306a36Sopenharmony_ci struct page *page = NULL, *valid_page = NULL; 84862306a36Sopenharmony_ci struct address_space *mapping; 84962306a36Sopenharmony_ci unsigned long start_pfn = low_pfn; 85062306a36Sopenharmony_ci bool skip_on_failure = false; 85162306a36Sopenharmony_ci unsigned long next_skip_pfn = 0; 85262306a36Sopenharmony_ci bool skip_updated = false; 85362306a36Sopenharmony_ci int ret = 0; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci cc->migrate_pfn = low_pfn; 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci /* 85862306a36Sopenharmony_ci * Ensure that there are not too many pages isolated from the LRU 85962306a36Sopenharmony_ci * list by either parallel reclaimers or compaction. If there are, 86062306a36Sopenharmony_ci * delay for some time until fewer pages are isolated 86162306a36Sopenharmony_ci */ 86262306a36Sopenharmony_ci while (unlikely(too_many_isolated(cc))) { 86362306a36Sopenharmony_ci /* stop isolation if there are still pages not migrated */ 86462306a36Sopenharmony_ci if (cc->nr_migratepages) 86562306a36Sopenharmony_ci return -EAGAIN; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci /* async migration should just abort */ 86862306a36Sopenharmony_ci if (cc->mode == MIGRATE_ASYNC) 86962306a36Sopenharmony_ci return -EAGAIN; 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci if (fatal_signal_pending(current)) 87462306a36Sopenharmony_ci return -EINTR; 87562306a36Sopenharmony_ci } 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci cond_resched(); 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) { 88062306a36Sopenharmony_ci skip_on_failure = true; 88162306a36Sopenharmony_ci next_skip_pfn = block_end_pfn(low_pfn, cc->order); 88262306a36Sopenharmony_ci } 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci /* Time to isolate some pages for migration */ 88562306a36Sopenharmony_ci for (; low_pfn < end_pfn; low_pfn++) { 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci if (skip_on_failure && low_pfn >= next_skip_pfn) { 88862306a36Sopenharmony_ci /* 88962306a36Sopenharmony_ci * We have isolated all migration candidates in the 89062306a36Sopenharmony_ci * previous order-aligned block, and did not skip it due 89162306a36Sopenharmony_ci * to failure. We should migrate the pages now and 89262306a36Sopenharmony_ci * hopefully succeed compaction. 89362306a36Sopenharmony_ci */ 89462306a36Sopenharmony_ci if (nr_isolated) 89562306a36Sopenharmony_ci break; 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci /* 89862306a36Sopenharmony_ci * We failed to isolate in the previous order-aligned 89962306a36Sopenharmony_ci * block. Set the new boundary to the end of the 90062306a36Sopenharmony_ci * current block. Note we can't simply increase 90162306a36Sopenharmony_ci * next_skip_pfn by 1 << order, as low_pfn might have 90262306a36Sopenharmony_ci * been incremented by a higher number due to skipping 90362306a36Sopenharmony_ci * a compound or a high-order buddy page in the 90462306a36Sopenharmony_ci * previous loop iteration. 90562306a36Sopenharmony_ci */ 90662306a36Sopenharmony_ci next_skip_pfn = block_end_pfn(low_pfn, cc->order); 90762306a36Sopenharmony_ci } 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci /* 91062306a36Sopenharmony_ci * Periodically drop the lock (if held) regardless of its 91162306a36Sopenharmony_ci * contention, to give chance to IRQs. Abort completely if 91262306a36Sopenharmony_ci * a fatal signal is pending. 91362306a36Sopenharmony_ci */ 91462306a36Sopenharmony_ci if (!(low_pfn % COMPACT_CLUSTER_MAX)) { 91562306a36Sopenharmony_ci if (locked) { 91662306a36Sopenharmony_ci unlock_page_lruvec_irqrestore(locked, flags); 91762306a36Sopenharmony_ci locked = NULL; 91862306a36Sopenharmony_ci } 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci if (fatal_signal_pending(current)) { 92162306a36Sopenharmony_ci cc->contended = true; 92262306a36Sopenharmony_ci ret = -EINTR; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci goto fatal_pending; 92562306a36Sopenharmony_ci } 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci cond_resched(); 92862306a36Sopenharmony_ci } 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci nr_scanned++; 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci page = pfn_to_page(low_pfn); 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci /* 93562306a36Sopenharmony_ci * Check if the pageblock has already been marked skipped. 93662306a36Sopenharmony_ci * Only the first PFN is checked as the caller isolates 93762306a36Sopenharmony_ci * COMPACT_CLUSTER_MAX at a time so the second call must 93862306a36Sopenharmony_ci * not falsely conclude that the block should be skipped. 93962306a36Sopenharmony_ci */ 94062306a36Sopenharmony_ci if (!valid_page && (pageblock_aligned(low_pfn) || 94162306a36Sopenharmony_ci low_pfn == cc->zone->zone_start_pfn)) { 94262306a36Sopenharmony_ci if (!isolation_suitable(cc, page)) { 94362306a36Sopenharmony_ci low_pfn = end_pfn; 94462306a36Sopenharmony_ci folio = NULL; 94562306a36Sopenharmony_ci goto isolate_abort; 94662306a36Sopenharmony_ci } 94762306a36Sopenharmony_ci valid_page = page; 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci if (PageHuge(page) && cc->alloc_contig) { 95162306a36Sopenharmony_ci if (locked) { 95262306a36Sopenharmony_ci unlock_page_lruvec_irqrestore(locked, flags); 95362306a36Sopenharmony_ci locked = NULL; 95462306a36Sopenharmony_ci } 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci ret = isolate_or_dissolve_huge_page(page, &cc->migratepages); 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci /* 95962306a36Sopenharmony_ci * Fail isolation in case isolate_or_dissolve_huge_page() 96062306a36Sopenharmony_ci * reports an error. In case of -ENOMEM, abort right away. 96162306a36Sopenharmony_ci */ 96262306a36Sopenharmony_ci if (ret < 0) { 96362306a36Sopenharmony_ci /* Do not report -EBUSY down the chain */ 96462306a36Sopenharmony_ci if (ret == -EBUSY) 96562306a36Sopenharmony_ci ret = 0; 96662306a36Sopenharmony_ci low_pfn += compound_nr(page) - 1; 96762306a36Sopenharmony_ci nr_scanned += compound_nr(page) - 1; 96862306a36Sopenharmony_ci goto isolate_fail; 96962306a36Sopenharmony_ci } 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci if (PageHuge(page)) { 97262306a36Sopenharmony_ci /* 97362306a36Sopenharmony_ci * Hugepage was successfully isolated and placed 97462306a36Sopenharmony_ci * on the cc->migratepages list. 97562306a36Sopenharmony_ci */ 97662306a36Sopenharmony_ci folio = page_folio(page); 97762306a36Sopenharmony_ci low_pfn += folio_nr_pages(folio) - 1; 97862306a36Sopenharmony_ci goto isolate_success_no_list; 97962306a36Sopenharmony_ci } 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci /* 98262306a36Sopenharmony_ci * Ok, the hugepage was dissolved. Now these pages are 98362306a36Sopenharmony_ci * Buddy and cannot be re-allocated because they are 98462306a36Sopenharmony_ci * isolated. Fall-through as the check below handles 98562306a36Sopenharmony_ci * Buddy pages. 98662306a36Sopenharmony_ci */ 98762306a36Sopenharmony_ci } 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci /* 99062306a36Sopenharmony_ci * Skip if free. We read page order here without zone lock 99162306a36Sopenharmony_ci * which is generally unsafe, but the race window is small and 99262306a36Sopenharmony_ci * the worst thing that can happen is that we skip some 99362306a36Sopenharmony_ci * potential isolation targets. 99462306a36Sopenharmony_ci */ 99562306a36Sopenharmony_ci if (PageBuddy(page)) { 99662306a36Sopenharmony_ci unsigned long freepage_order = buddy_order_unsafe(page); 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci /* 99962306a36Sopenharmony_ci * Without lock, we cannot be sure that what we got is 100062306a36Sopenharmony_ci * a valid page order. Consider only values in the 100162306a36Sopenharmony_ci * valid order range to prevent low_pfn overflow. 100262306a36Sopenharmony_ci */ 100362306a36Sopenharmony_ci if (freepage_order > 0 && freepage_order <= MAX_ORDER) { 100462306a36Sopenharmony_ci low_pfn += (1UL << freepage_order) - 1; 100562306a36Sopenharmony_ci nr_scanned += (1UL << freepage_order) - 1; 100662306a36Sopenharmony_ci } 100762306a36Sopenharmony_ci continue; 100862306a36Sopenharmony_ci } 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci /* 101162306a36Sopenharmony_ci * Regardless of being on LRU, compound pages such as THP and 101262306a36Sopenharmony_ci * hugetlbfs are not to be compacted unless we are attempting 101362306a36Sopenharmony_ci * an allocation much larger than the huge page size (eg CMA). 101462306a36Sopenharmony_ci * We can potentially save a lot of iterations if we skip them 101562306a36Sopenharmony_ci * at once. The check is racy, but we can consider only valid 101662306a36Sopenharmony_ci * values and the only danger is skipping too much. 101762306a36Sopenharmony_ci */ 101862306a36Sopenharmony_ci if (PageCompound(page) && !cc->alloc_contig) { 101962306a36Sopenharmony_ci const unsigned int order = compound_order(page); 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci if (likely(order <= MAX_ORDER)) { 102262306a36Sopenharmony_ci low_pfn += (1UL << order) - 1; 102362306a36Sopenharmony_ci nr_scanned += (1UL << order) - 1; 102462306a36Sopenharmony_ci } 102562306a36Sopenharmony_ci goto isolate_fail; 102662306a36Sopenharmony_ci } 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci /* 102962306a36Sopenharmony_ci * Check may be lockless but that's ok as we recheck later. 103062306a36Sopenharmony_ci * It's possible to migrate LRU and non-lru movable pages. 103162306a36Sopenharmony_ci * Skip any other type of page 103262306a36Sopenharmony_ci */ 103362306a36Sopenharmony_ci if (!PageLRU(page)) { 103462306a36Sopenharmony_ci /* 103562306a36Sopenharmony_ci * __PageMovable can return false positive so we need 103662306a36Sopenharmony_ci * to verify it under page_lock. 103762306a36Sopenharmony_ci */ 103862306a36Sopenharmony_ci if (unlikely(__PageMovable(page)) && 103962306a36Sopenharmony_ci !PageIsolated(page)) { 104062306a36Sopenharmony_ci if (locked) { 104162306a36Sopenharmony_ci unlock_page_lruvec_irqrestore(locked, flags); 104262306a36Sopenharmony_ci locked = NULL; 104362306a36Sopenharmony_ci } 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci if (isolate_movable_page(page, mode)) { 104662306a36Sopenharmony_ci folio = page_folio(page); 104762306a36Sopenharmony_ci goto isolate_success; 104862306a36Sopenharmony_ci } 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci goto isolate_fail; 105262306a36Sopenharmony_ci } 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci /* 105562306a36Sopenharmony_ci * Be careful not to clear PageLRU until after we're 105662306a36Sopenharmony_ci * sure the page is not being freed elsewhere -- the 105762306a36Sopenharmony_ci * page release code relies on it. 105862306a36Sopenharmony_ci */ 105962306a36Sopenharmony_ci folio = folio_get_nontail_page(page); 106062306a36Sopenharmony_ci if (unlikely(!folio)) 106162306a36Sopenharmony_ci goto isolate_fail; 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci /* 106462306a36Sopenharmony_ci * Migration will fail if an anonymous page is pinned in memory, 106562306a36Sopenharmony_ci * so avoid taking lru_lock and isolating it unnecessarily in an 106662306a36Sopenharmony_ci * admittedly racy check. 106762306a36Sopenharmony_ci */ 106862306a36Sopenharmony_ci mapping = folio_mapping(folio); 106962306a36Sopenharmony_ci if (!mapping && (folio_ref_count(folio) - 1) > folio_mapcount(folio)) 107062306a36Sopenharmony_ci goto isolate_fail_put; 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci /* 107362306a36Sopenharmony_ci * Only allow to migrate anonymous pages in GFP_NOFS context 107462306a36Sopenharmony_ci * because those do not depend on fs locks. 107562306a36Sopenharmony_ci */ 107662306a36Sopenharmony_ci if (!(cc->gfp_mask & __GFP_FS) && mapping) 107762306a36Sopenharmony_ci goto isolate_fail_put; 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci /* Only take pages on LRU: a check now makes later tests safe */ 108062306a36Sopenharmony_ci if (!folio_test_lru(folio)) 108162306a36Sopenharmony_ci goto isolate_fail_put; 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci /* Compaction might skip unevictable pages but CMA takes them */ 108462306a36Sopenharmony_ci if (!(mode & ISOLATE_UNEVICTABLE) && folio_test_unevictable(folio)) 108562306a36Sopenharmony_ci goto isolate_fail_put; 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci /* 108862306a36Sopenharmony_ci * To minimise LRU disruption, the caller can indicate with 108962306a36Sopenharmony_ci * ISOLATE_ASYNC_MIGRATE that it only wants to isolate pages 109062306a36Sopenharmony_ci * it will be able to migrate without blocking - clean pages 109162306a36Sopenharmony_ci * for the most part. PageWriteback would require blocking. 109262306a36Sopenharmony_ci */ 109362306a36Sopenharmony_ci if ((mode & ISOLATE_ASYNC_MIGRATE) && folio_test_writeback(folio)) 109462306a36Sopenharmony_ci goto isolate_fail_put; 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci if ((mode & ISOLATE_ASYNC_MIGRATE) && folio_test_dirty(folio)) { 109762306a36Sopenharmony_ci bool migrate_dirty; 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci /* 110062306a36Sopenharmony_ci * Only folios without mappings or that have 110162306a36Sopenharmony_ci * a ->migrate_folio callback are possible to 110262306a36Sopenharmony_ci * migrate without blocking. However, we may 110362306a36Sopenharmony_ci * be racing with truncation, which can free 110462306a36Sopenharmony_ci * the mapping. Truncation holds the folio lock 110562306a36Sopenharmony_ci * until after the folio is removed from the page 110662306a36Sopenharmony_ci * cache so holding it ourselves is sufficient. 110762306a36Sopenharmony_ci */ 110862306a36Sopenharmony_ci if (!folio_trylock(folio)) 110962306a36Sopenharmony_ci goto isolate_fail_put; 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci mapping = folio_mapping(folio); 111262306a36Sopenharmony_ci migrate_dirty = !mapping || 111362306a36Sopenharmony_ci mapping->a_ops->migrate_folio; 111462306a36Sopenharmony_ci folio_unlock(folio); 111562306a36Sopenharmony_ci if (!migrate_dirty) 111662306a36Sopenharmony_ci goto isolate_fail_put; 111762306a36Sopenharmony_ci } 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_ci /* Try isolate the folio */ 112062306a36Sopenharmony_ci if (!folio_test_clear_lru(folio)) 112162306a36Sopenharmony_ci goto isolate_fail_put; 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci lruvec = folio_lruvec(folio); 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci /* If we already hold the lock, we can skip some rechecking */ 112662306a36Sopenharmony_ci if (lruvec != locked) { 112762306a36Sopenharmony_ci if (locked) 112862306a36Sopenharmony_ci unlock_page_lruvec_irqrestore(locked, flags); 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci compact_lock_irqsave(&lruvec->lru_lock, &flags, cc); 113162306a36Sopenharmony_ci locked = lruvec; 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci lruvec_memcg_debug(lruvec, folio); 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci /* 113662306a36Sopenharmony_ci * Try get exclusive access under lock. If marked for 113762306a36Sopenharmony_ci * skip, the scan is aborted unless the current context 113862306a36Sopenharmony_ci * is a rescan to reach the end of the pageblock. 113962306a36Sopenharmony_ci */ 114062306a36Sopenharmony_ci if (!skip_updated && valid_page) { 114162306a36Sopenharmony_ci skip_updated = true; 114262306a36Sopenharmony_ci if (test_and_set_skip(cc, valid_page) && 114362306a36Sopenharmony_ci !cc->finish_pageblock) { 114462306a36Sopenharmony_ci low_pfn = end_pfn; 114562306a36Sopenharmony_ci goto isolate_abort; 114662306a36Sopenharmony_ci } 114762306a36Sopenharmony_ci } 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci /* 115062306a36Sopenharmony_ci * folio become large since the non-locked check, 115162306a36Sopenharmony_ci * and it's on LRU. 115262306a36Sopenharmony_ci */ 115362306a36Sopenharmony_ci if (unlikely(folio_test_large(folio) && !cc->alloc_contig)) { 115462306a36Sopenharmony_ci low_pfn += folio_nr_pages(folio) - 1; 115562306a36Sopenharmony_ci nr_scanned += folio_nr_pages(folio) - 1; 115662306a36Sopenharmony_ci folio_set_lru(folio); 115762306a36Sopenharmony_ci goto isolate_fail_put; 115862306a36Sopenharmony_ci } 115962306a36Sopenharmony_ci } 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci /* The folio is taken off the LRU */ 116262306a36Sopenharmony_ci if (folio_test_large(folio)) 116362306a36Sopenharmony_ci low_pfn += folio_nr_pages(folio) - 1; 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci /* Successfully isolated */ 116662306a36Sopenharmony_ci lruvec_del_folio(lruvec, folio); 116762306a36Sopenharmony_ci node_stat_mod_folio(folio, 116862306a36Sopenharmony_ci NR_ISOLATED_ANON + folio_is_file_lru(folio), 116962306a36Sopenharmony_ci folio_nr_pages(folio)); 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ciisolate_success: 117262306a36Sopenharmony_ci list_add(&folio->lru, &cc->migratepages); 117362306a36Sopenharmony_ciisolate_success_no_list: 117462306a36Sopenharmony_ci cc->nr_migratepages += folio_nr_pages(folio); 117562306a36Sopenharmony_ci nr_isolated += folio_nr_pages(folio); 117662306a36Sopenharmony_ci nr_scanned += folio_nr_pages(folio) - 1; 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_ci /* 117962306a36Sopenharmony_ci * Avoid isolating too much unless this block is being 118062306a36Sopenharmony_ci * fully scanned (e.g. dirty/writeback pages, parallel allocation) 118162306a36Sopenharmony_ci * or a lock is contended. For contention, isolate quickly to 118262306a36Sopenharmony_ci * potentially remove one source of contention. 118362306a36Sopenharmony_ci */ 118462306a36Sopenharmony_ci if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX && 118562306a36Sopenharmony_ci !cc->finish_pageblock && !cc->contended) { 118662306a36Sopenharmony_ci ++low_pfn; 118762306a36Sopenharmony_ci break; 118862306a36Sopenharmony_ci } 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci continue; 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ciisolate_fail_put: 119362306a36Sopenharmony_ci /* Avoid potential deadlock in freeing page under lru_lock */ 119462306a36Sopenharmony_ci if (locked) { 119562306a36Sopenharmony_ci unlock_page_lruvec_irqrestore(locked, flags); 119662306a36Sopenharmony_ci locked = NULL; 119762306a36Sopenharmony_ci } 119862306a36Sopenharmony_ci folio_put(folio); 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ciisolate_fail: 120162306a36Sopenharmony_ci if (!skip_on_failure && ret != -ENOMEM) 120262306a36Sopenharmony_ci continue; 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci /* 120562306a36Sopenharmony_ci * We have isolated some pages, but then failed. Release them 120662306a36Sopenharmony_ci * instead of migrating, as we cannot form the cc->order buddy 120762306a36Sopenharmony_ci * page anyway. 120862306a36Sopenharmony_ci */ 120962306a36Sopenharmony_ci if (nr_isolated) { 121062306a36Sopenharmony_ci if (locked) { 121162306a36Sopenharmony_ci unlock_page_lruvec_irqrestore(locked, flags); 121262306a36Sopenharmony_ci locked = NULL; 121362306a36Sopenharmony_ci } 121462306a36Sopenharmony_ci putback_movable_pages(&cc->migratepages); 121562306a36Sopenharmony_ci cc->nr_migratepages = 0; 121662306a36Sopenharmony_ci nr_isolated = 0; 121762306a36Sopenharmony_ci } 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci if (low_pfn < next_skip_pfn) { 122062306a36Sopenharmony_ci low_pfn = next_skip_pfn - 1; 122162306a36Sopenharmony_ci /* 122262306a36Sopenharmony_ci * The check near the loop beginning would have updated 122362306a36Sopenharmony_ci * next_skip_pfn too, but this is a bit simpler. 122462306a36Sopenharmony_ci */ 122562306a36Sopenharmony_ci next_skip_pfn += 1UL << cc->order; 122662306a36Sopenharmony_ci } 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci if (ret == -ENOMEM) 122962306a36Sopenharmony_ci break; 123062306a36Sopenharmony_ci } 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci /* 123362306a36Sopenharmony_ci * The PageBuddy() check could have potentially brought us outside 123462306a36Sopenharmony_ci * the range to be scanned. 123562306a36Sopenharmony_ci */ 123662306a36Sopenharmony_ci if (unlikely(low_pfn > end_pfn)) 123762306a36Sopenharmony_ci low_pfn = end_pfn; 123862306a36Sopenharmony_ci 123962306a36Sopenharmony_ci folio = NULL; 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ciisolate_abort: 124262306a36Sopenharmony_ci if (locked) 124362306a36Sopenharmony_ci unlock_page_lruvec_irqrestore(locked, flags); 124462306a36Sopenharmony_ci if (folio) { 124562306a36Sopenharmony_ci folio_set_lru(folio); 124662306a36Sopenharmony_ci folio_put(folio); 124762306a36Sopenharmony_ci } 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci /* 125062306a36Sopenharmony_ci * Update the cached scanner pfn once the pageblock has been scanned. 125162306a36Sopenharmony_ci * Pages will either be migrated in which case there is no point 125262306a36Sopenharmony_ci * scanning in the near future or migration failed in which case the 125362306a36Sopenharmony_ci * failure reason may persist. The block is marked for skipping if 125462306a36Sopenharmony_ci * there were no pages isolated in the block or if the block is 125562306a36Sopenharmony_ci * rescanned twice in a row. 125662306a36Sopenharmony_ci */ 125762306a36Sopenharmony_ci if (low_pfn == end_pfn && (!nr_isolated || cc->finish_pageblock)) { 125862306a36Sopenharmony_ci if (!cc->no_set_skip_hint && valid_page && !skip_updated) 125962306a36Sopenharmony_ci set_pageblock_skip(valid_page); 126062306a36Sopenharmony_ci update_cached_migrate(cc, low_pfn); 126162306a36Sopenharmony_ci } 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn, 126462306a36Sopenharmony_ci nr_scanned, nr_isolated); 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_cifatal_pending: 126762306a36Sopenharmony_ci cc->total_migrate_scanned += nr_scanned; 126862306a36Sopenharmony_ci if (nr_isolated) 126962306a36Sopenharmony_ci count_compact_events(COMPACTISOLATED, nr_isolated); 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci cc->migrate_pfn = low_pfn; 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci return ret; 127462306a36Sopenharmony_ci} 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci/** 127762306a36Sopenharmony_ci * isolate_migratepages_range() - isolate migrate-able pages in a PFN range 127862306a36Sopenharmony_ci * @cc: Compaction control structure. 127962306a36Sopenharmony_ci * @start_pfn: The first PFN to start isolating. 128062306a36Sopenharmony_ci * @end_pfn: The one-past-last PFN. 128162306a36Sopenharmony_ci * 128262306a36Sopenharmony_ci * Returns -EAGAIN when contented, -EINTR in case of a signal pending, -ENOMEM 128362306a36Sopenharmony_ci * in case we could not allocate a page, or 0. 128462306a36Sopenharmony_ci */ 128562306a36Sopenharmony_ciint 128662306a36Sopenharmony_ciisolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, 128762306a36Sopenharmony_ci unsigned long end_pfn) 128862306a36Sopenharmony_ci{ 128962306a36Sopenharmony_ci unsigned long pfn, block_start_pfn, block_end_pfn; 129062306a36Sopenharmony_ci int ret = 0; 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci /* Scan block by block. First and last block may be incomplete */ 129362306a36Sopenharmony_ci pfn = start_pfn; 129462306a36Sopenharmony_ci block_start_pfn = pageblock_start_pfn(pfn); 129562306a36Sopenharmony_ci if (block_start_pfn < cc->zone->zone_start_pfn) 129662306a36Sopenharmony_ci block_start_pfn = cc->zone->zone_start_pfn; 129762306a36Sopenharmony_ci block_end_pfn = pageblock_end_pfn(pfn); 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci for (; pfn < end_pfn; pfn = block_end_pfn, 130062306a36Sopenharmony_ci block_start_pfn = block_end_pfn, 130162306a36Sopenharmony_ci block_end_pfn += pageblock_nr_pages) { 130262306a36Sopenharmony_ci 130362306a36Sopenharmony_ci block_end_pfn = min(block_end_pfn, end_pfn); 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci if (!pageblock_pfn_to_page(block_start_pfn, 130662306a36Sopenharmony_ci block_end_pfn, cc->zone)) 130762306a36Sopenharmony_ci continue; 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci ret = isolate_migratepages_block(cc, pfn, block_end_pfn, 131062306a36Sopenharmony_ci ISOLATE_UNEVICTABLE); 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_ci if (ret) 131362306a36Sopenharmony_ci break; 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX) 131662306a36Sopenharmony_ci break; 131762306a36Sopenharmony_ci } 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci return ret; 132062306a36Sopenharmony_ci} 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_ci#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 132362306a36Sopenharmony_ci#ifdef CONFIG_COMPACTION 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_cistatic bool suitable_migration_source(struct compact_control *cc, 132662306a36Sopenharmony_ci struct page *page) 132762306a36Sopenharmony_ci{ 132862306a36Sopenharmony_ci int block_mt; 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_ci if (pageblock_skip_persistent(page)) 133162306a36Sopenharmony_ci return false; 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction) 133462306a36Sopenharmony_ci return true; 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci block_mt = get_pageblock_migratetype(page); 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci if (cc->migratetype == MIGRATE_MOVABLE) 133962306a36Sopenharmony_ci return is_migrate_movable(block_mt); 134062306a36Sopenharmony_ci else 134162306a36Sopenharmony_ci return block_mt == cc->migratetype; 134262306a36Sopenharmony_ci} 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci/* Returns true if the page is within a block suitable for migration to */ 134562306a36Sopenharmony_cistatic bool suitable_migration_target(struct compact_control *cc, 134662306a36Sopenharmony_ci struct page *page) 134762306a36Sopenharmony_ci{ 134862306a36Sopenharmony_ci /* If the page is a large free page, then disallow migration */ 134962306a36Sopenharmony_ci if (PageBuddy(page)) { 135062306a36Sopenharmony_ci /* 135162306a36Sopenharmony_ci * We are checking page_order without zone->lock taken. But 135262306a36Sopenharmony_ci * the only small danger is that we skip a potentially suitable 135362306a36Sopenharmony_ci * pageblock, so it's not worth to check order for valid range. 135462306a36Sopenharmony_ci */ 135562306a36Sopenharmony_ci if (buddy_order_unsafe(page) >= pageblock_order) 135662306a36Sopenharmony_ci return false; 135762306a36Sopenharmony_ci } 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci if (cc->ignore_block_suitable) 136062306a36Sopenharmony_ci return true; 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 136362306a36Sopenharmony_ci if (is_migrate_movable(get_pageblock_migratetype(page))) 136462306a36Sopenharmony_ci return true; 136562306a36Sopenharmony_ci 136662306a36Sopenharmony_ci /* Otherwise skip the block */ 136762306a36Sopenharmony_ci return false; 136862306a36Sopenharmony_ci} 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_cistatic inline unsigned int 137162306a36Sopenharmony_cifreelist_scan_limit(struct compact_control *cc) 137262306a36Sopenharmony_ci{ 137362306a36Sopenharmony_ci unsigned short shift = BITS_PER_LONG - 1; 137462306a36Sopenharmony_ci 137562306a36Sopenharmony_ci return (COMPACT_CLUSTER_MAX >> min(shift, cc->fast_search_fail)) + 1; 137662306a36Sopenharmony_ci} 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci/* 137962306a36Sopenharmony_ci * Test whether the free scanner has reached the same or lower pageblock than 138062306a36Sopenharmony_ci * the migration scanner, and compaction should thus terminate. 138162306a36Sopenharmony_ci */ 138262306a36Sopenharmony_cistatic inline bool compact_scanners_met(struct compact_control *cc) 138362306a36Sopenharmony_ci{ 138462306a36Sopenharmony_ci return (cc->free_pfn >> pageblock_order) 138562306a36Sopenharmony_ci <= (cc->migrate_pfn >> pageblock_order); 138662306a36Sopenharmony_ci} 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci/* 138962306a36Sopenharmony_ci * Used when scanning for a suitable migration target which scans freelists 139062306a36Sopenharmony_ci * in reverse. Reorders the list such as the unscanned pages are scanned 139162306a36Sopenharmony_ci * first on the next iteration of the free scanner 139262306a36Sopenharmony_ci */ 139362306a36Sopenharmony_cistatic void 139462306a36Sopenharmony_cimove_freelist_head(struct list_head *freelist, struct page *freepage) 139562306a36Sopenharmony_ci{ 139662306a36Sopenharmony_ci LIST_HEAD(sublist); 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci if (!list_is_last(freelist, &freepage->lru)) { 139962306a36Sopenharmony_ci list_cut_before(&sublist, freelist, &freepage->lru); 140062306a36Sopenharmony_ci list_splice_tail(&sublist, freelist); 140162306a36Sopenharmony_ci } 140262306a36Sopenharmony_ci} 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci/* 140562306a36Sopenharmony_ci * Similar to move_freelist_head except used by the migration scanner 140662306a36Sopenharmony_ci * when scanning forward. It's possible for these list operations to 140762306a36Sopenharmony_ci * move against each other if they search the free list exactly in 140862306a36Sopenharmony_ci * lockstep. 140962306a36Sopenharmony_ci */ 141062306a36Sopenharmony_cistatic void 141162306a36Sopenharmony_cimove_freelist_tail(struct list_head *freelist, struct page *freepage) 141262306a36Sopenharmony_ci{ 141362306a36Sopenharmony_ci LIST_HEAD(sublist); 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci if (!list_is_first(freelist, &freepage->lru)) { 141662306a36Sopenharmony_ci list_cut_position(&sublist, freelist, &freepage->lru); 141762306a36Sopenharmony_ci list_splice_tail(&sublist, freelist); 141862306a36Sopenharmony_ci } 141962306a36Sopenharmony_ci} 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_cistatic void 142262306a36Sopenharmony_cifast_isolate_around(struct compact_control *cc, unsigned long pfn) 142362306a36Sopenharmony_ci{ 142462306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 142562306a36Sopenharmony_ci struct page *page; 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci /* Do not search around if there are enough pages already */ 142862306a36Sopenharmony_ci if (cc->nr_freepages >= cc->nr_migratepages) 142962306a36Sopenharmony_ci return; 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_ci /* Minimise scanning during async compaction */ 143262306a36Sopenharmony_ci if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC) 143362306a36Sopenharmony_ci return; 143462306a36Sopenharmony_ci 143562306a36Sopenharmony_ci /* Pageblock boundaries */ 143662306a36Sopenharmony_ci start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn); 143762306a36Sopenharmony_ci end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)); 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone); 144062306a36Sopenharmony_ci if (!page) 144162306a36Sopenharmony_ci return; 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_ci isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false); 144462306a36Sopenharmony_ci 144562306a36Sopenharmony_ci /* Skip this pageblock in the future as it's full or nearly full */ 144662306a36Sopenharmony_ci if (start_pfn == end_pfn && !cc->no_set_skip_hint) 144762306a36Sopenharmony_ci set_pageblock_skip(page); 144862306a36Sopenharmony_ci} 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_ci/* Search orders in round-robin fashion */ 145162306a36Sopenharmony_cistatic int next_search_order(struct compact_control *cc, int order) 145262306a36Sopenharmony_ci{ 145362306a36Sopenharmony_ci order--; 145462306a36Sopenharmony_ci if (order < 0) 145562306a36Sopenharmony_ci order = cc->order - 1; 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci /* Search wrapped around? */ 145862306a36Sopenharmony_ci if (order == cc->search_order) { 145962306a36Sopenharmony_ci cc->search_order--; 146062306a36Sopenharmony_ci if (cc->search_order < 0) 146162306a36Sopenharmony_ci cc->search_order = cc->order - 1; 146262306a36Sopenharmony_ci return -1; 146362306a36Sopenharmony_ci } 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci return order; 146662306a36Sopenharmony_ci} 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_cistatic void fast_isolate_freepages(struct compact_control *cc) 146962306a36Sopenharmony_ci{ 147062306a36Sopenharmony_ci unsigned int limit = max(1U, freelist_scan_limit(cc) >> 1); 147162306a36Sopenharmony_ci unsigned int nr_scanned = 0, total_isolated = 0; 147262306a36Sopenharmony_ci unsigned long low_pfn, min_pfn, highest = 0; 147362306a36Sopenharmony_ci unsigned long nr_isolated = 0; 147462306a36Sopenharmony_ci unsigned long distance; 147562306a36Sopenharmony_ci struct page *page = NULL; 147662306a36Sopenharmony_ci bool scan_start = false; 147762306a36Sopenharmony_ci int order; 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci /* Full compaction passes in a negative order */ 148062306a36Sopenharmony_ci if (cc->order <= 0) 148162306a36Sopenharmony_ci return; 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci /* 148462306a36Sopenharmony_ci * If starting the scan, use a deeper search and use the highest 148562306a36Sopenharmony_ci * PFN found if a suitable one is not found. 148662306a36Sopenharmony_ci */ 148762306a36Sopenharmony_ci if (cc->free_pfn >= cc->zone->compact_init_free_pfn) { 148862306a36Sopenharmony_ci limit = pageblock_nr_pages >> 1; 148962306a36Sopenharmony_ci scan_start = true; 149062306a36Sopenharmony_ci } 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci /* 149362306a36Sopenharmony_ci * Preferred point is in the top quarter of the scan space but take 149462306a36Sopenharmony_ci * a pfn from the top half if the search is problematic. 149562306a36Sopenharmony_ci */ 149662306a36Sopenharmony_ci distance = (cc->free_pfn - cc->migrate_pfn); 149762306a36Sopenharmony_ci low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2)); 149862306a36Sopenharmony_ci min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1)); 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci if (WARN_ON_ONCE(min_pfn > low_pfn)) 150162306a36Sopenharmony_ci low_pfn = min_pfn; 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_ci /* 150462306a36Sopenharmony_ci * Search starts from the last successful isolation order or the next 150562306a36Sopenharmony_ci * order to search after a previous failure 150662306a36Sopenharmony_ci */ 150762306a36Sopenharmony_ci cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order); 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci for (order = cc->search_order; 151062306a36Sopenharmony_ci !page && order >= 0; 151162306a36Sopenharmony_ci order = next_search_order(cc, order)) { 151262306a36Sopenharmony_ci struct free_area *area = &cc->zone->free_area[order]; 151362306a36Sopenharmony_ci struct list_head *freelist; 151462306a36Sopenharmony_ci struct page *freepage; 151562306a36Sopenharmony_ci unsigned long flags; 151662306a36Sopenharmony_ci unsigned int order_scanned = 0; 151762306a36Sopenharmony_ci unsigned long high_pfn = 0; 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci if (!area->nr_free) 152062306a36Sopenharmony_ci continue; 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci spin_lock_irqsave(&cc->zone->lock, flags); 152362306a36Sopenharmony_ci freelist = &area->free_list[MIGRATE_MOVABLE]; 152462306a36Sopenharmony_ci list_for_each_entry_reverse(freepage, freelist, buddy_list) { 152562306a36Sopenharmony_ci unsigned long pfn; 152662306a36Sopenharmony_ci 152762306a36Sopenharmony_ci order_scanned++; 152862306a36Sopenharmony_ci nr_scanned++; 152962306a36Sopenharmony_ci pfn = page_to_pfn(freepage); 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci if (pfn >= highest) 153262306a36Sopenharmony_ci highest = max(pageblock_start_pfn(pfn), 153362306a36Sopenharmony_ci cc->zone->zone_start_pfn); 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci if (pfn >= low_pfn) { 153662306a36Sopenharmony_ci cc->fast_search_fail = 0; 153762306a36Sopenharmony_ci cc->search_order = order; 153862306a36Sopenharmony_ci page = freepage; 153962306a36Sopenharmony_ci break; 154062306a36Sopenharmony_ci } 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci if (pfn >= min_pfn && pfn > high_pfn) { 154362306a36Sopenharmony_ci high_pfn = pfn; 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci /* Shorten the scan if a candidate is found */ 154662306a36Sopenharmony_ci limit >>= 1; 154762306a36Sopenharmony_ci } 154862306a36Sopenharmony_ci 154962306a36Sopenharmony_ci if (order_scanned >= limit) 155062306a36Sopenharmony_ci break; 155162306a36Sopenharmony_ci } 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci /* Use a maximum candidate pfn if a preferred one was not found */ 155462306a36Sopenharmony_ci if (!page && high_pfn) { 155562306a36Sopenharmony_ci page = pfn_to_page(high_pfn); 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci /* Update freepage for the list reorder below */ 155862306a36Sopenharmony_ci freepage = page; 155962306a36Sopenharmony_ci } 156062306a36Sopenharmony_ci 156162306a36Sopenharmony_ci /* Reorder to so a future search skips recent pages */ 156262306a36Sopenharmony_ci move_freelist_head(freelist, freepage); 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_ci /* Isolate the page if available */ 156562306a36Sopenharmony_ci if (page) { 156662306a36Sopenharmony_ci if (__isolate_free_page(page, order)) { 156762306a36Sopenharmony_ci set_page_private(page, order); 156862306a36Sopenharmony_ci nr_isolated = 1 << order; 156962306a36Sopenharmony_ci nr_scanned += nr_isolated - 1; 157062306a36Sopenharmony_ci total_isolated += nr_isolated; 157162306a36Sopenharmony_ci cc->nr_freepages += nr_isolated; 157262306a36Sopenharmony_ci list_add_tail(&page->lru, &cc->freepages); 157362306a36Sopenharmony_ci count_compact_events(COMPACTISOLATED, nr_isolated); 157462306a36Sopenharmony_ci } else { 157562306a36Sopenharmony_ci /* If isolation fails, abort the search */ 157662306a36Sopenharmony_ci order = cc->search_order + 1; 157762306a36Sopenharmony_ci page = NULL; 157862306a36Sopenharmony_ci } 157962306a36Sopenharmony_ci } 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci spin_unlock_irqrestore(&cc->zone->lock, flags); 158262306a36Sopenharmony_ci 158362306a36Sopenharmony_ci /* Skip fast search if enough freepages isolated */ 158462306a36Sopenharmony_ci if (cc->nr_freepages >= cc->nr_migratepages) 158562306a36Sopenharmony_ci break; 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci /* 158862306a36Sopenharmony_ci * Smaller scan on next order so the total scan is related 158962306a36Sopenharmony_ci * to freelist_scan_limit. 159062306a36Sopenharmony_ci */ 159162306a36Sopenharmony_ci if (order_scanned >= limit) 159262306a36Sopenharmony_ci limit = max(1U, limit >> 1); 159362306a36Sopenharmony_ci } 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_ci trace_mm_compaction_fast_isolate_freepages(min_pfn, cc->free_pfn, 159662306a36Sopenharmony_ci nr_scanned, total_isolated); 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ci if (!page) { 159962306a36Sopenharmony_ci cc->fast_search_fail++; 160062306a36Sopenharmony_ci if (scan_start) { 160162306a36Sopenharmony_ci /* 160262306a36Sopenharmony_ci * Use the highest PFN found above min. If one was 160362306a36Sopenharmony_ci * not found, be pessimistic for direct compaction 160462306a36Sopenharmony_ci * and use the min mark. 160562306a36Sopenharmony_ci */ 160662306a36Sopenharmony_ci if (highest >= min_pfn) { 160762306a36Sopenharmony_ci page = pfn_to_page(highest); 160862306a36Sopenharmony_ci cc->free_pfn = highest; 160962306a36Sopenharmony_ci } else { 161062306a36Sopenharmony_ci if (cc->direct_compaction && pfn_valid(min_pfn)) { 161162306a36Sopenharmony_ci page = pageblock_pfn_to_page(min_pfn, 161262306a36Sopenharmony_ci min(pageblock_end_pfn(min_pfn), 161362306a36Sopenharmony_ci zone_end_pfn(cc->zone)), 161462306a36Sopenharmony_ci cc->zone); 161562306a36Sopenharmony_ci cc->free_pfn = min_pfn; 161662306a36Sopenharmony_ci } 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci } 161962306a36Sopenharmony_ci } 162062306a36Sopenharmony_ci 162162306a36Sopenharmony_ci if (highest && highest >= cc->zone->compact_cached_free_pfn) { 162262306a36Sopenharmony_ci highest -= pageblock_nr_pages; 162362306a36Sopenharmony_ci cc->zone->compact_cached_free_pfn = highest; 162462306a36Sopenharmony_ci } 162562306a36Sopenharmony_ci 162662306a36Sopenharmony_ci cc->total_free_scanned += nr_scanned; 162762306a36Sopenharmony_ci if (!page) 162862306a36Sopenharmony_ci return; 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci low_pfn = page_to_pfn(page); 163162306a36Sopenharmony_ci fast_isolate_around(cc, low_pfn); 163262306a36Sopenharmony_ci} 163362306a36Sopenharmony_ci 163462306a36Sopenharmony_ci/* 163562306a36Sopenharmony_ci * Based on information in the current compact_control, find blocks 163662306a36Sopenharmony_ci * suitable for isolating free pages from and then isolate them. 163762306a36Sopenharmony_ci */ 163862306a36Sopenharmony_cistatic void isolate_freepages(struct compact_control *cc) 163962306a36Sopenharmony_ci{ 164062306a36Sopenharmony_ci struct zone *zone = cc->zone; 164162306a36Sopenharmony_ci struct page *page; 164262306a36Sopenharmony_ci unsigned long block_start_pfn; /* start of current pageblock */ 164362306a36Sopenharmony_ci unsigned long isolate_start_pfn; /* exact pfn we start at */ 164462306a36Sopenharmony_ci unsigned long block_end_pfn; /* end of current pageblock */ 164562306a36Sopenharmony_ci unsigned long low_pfn; /* lowest pfn scanner is able to scan */ 164662306a36Sopenharmony_ci struct list_head *freelist = &cc->freepages; 164762306a36Sopenharmony_ci unsigned int stride; 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci /* Try a small search of the free lists for a candidate */ 165062306a36Sopenharmony_ci fast_isolate_freepages(cc); 165162306a36Sopenharmony_ci if (cc->nr_freepages) 165262306a36Sopenharmony_ci goto splitmap; 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci /* 165562306a36Sopenharmony_ci * Initialise the free scanner. The starting point is where we last 165662306a36Sopenharmony_ci * successfully isolated from, zone-cached value, or the end of the 165762306a36Sopenharmony_ci * zone when isolating for the first time. For looping we also need 165862306a36Sopenharmony_ci * this pfn aligned down to the pageblock boundary, because we do 165962306a36Sopenharmony_ci * block_start_pfn -= pageblock_nr_pages in the for loop. 166062306a36Sopenharmony_ci * For ending point, take care when isolating in last pageblock of a 166162306a36Sopenharmony_ci * zone which ends in the middle of a pageblock. 166262306a36Sopenharmony_ci * The low boundary is the end of the pageblock the migration scanner 166362306a36Sopenharmony_ci * is using. 166462306a36Sopenharmony_ci */ 166562306a36Sopenharmony_ci isolate_start_pfn = cc->free_pfn; 166662306a36Sopenharmony_ci block_start_pfn = pageblock_start_pfn(isolate_start_pfn); 166762306a36Sopenharmony_ci block_end_pfn = min(block_start_pfn + pageblock_nr_pages, 166862306a36Sopenharmony_ci zone_end_pfn(zone)); 166962306a36Sopenharmony_ci low_pfn = pageblock_end_pfn(cc->migrate_pfn); 167062306a36Sopenharmony_ci stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1; 167162306a36Sopenharmony_ci 167262306a36Sopenharmony_ci /* 167362306a36Sopenharmony_ci * Isolate free pages until enough are available to migrate the 167462306a36Sopenharmony_ci * pages on cc->migratepages. We stop searching if the migrate 167562306a36Sopenharmony_ci * and free page scanners meet or enough free pages are isolated. 167662306a36Sopenharmony_ci */ 167762306a36Sopenharmony_ci for (; block_start_pfn >= low_pfn; 167862306a36Sopenharmony_ci block_end_pfn = block_start_pfn, 167962306a36Sopenharmony_ci block_start_pfn -= pageblock_nr_pages, 168062306a36Sopenharmony_ci isolate_start_pfn = block_start_pfn) { 168162306a36Sopenharmony_ci unsigned long nr_isolated; 168262306a36Sopenharmony_ci 168362306a36Sopenharmony_ci /* 168462306a36Sopenharmony_ci * This can iterate a massively long zone without finding any 168562306a36Sopenharmony_ci * suitable migration targets, so periodically check resched. 168662306a36Sopenharmony_ci */ 168762306a36Sopenharmony_ci if (!(block_start_pfn % (COMPACT_CLUSTER_MAX * pageblock_nr_pages))) 168862306a36Sopenharmony_ci cond_resched(); 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn, 169162306a36Sopenharmony_ci zone); 169262306a36Sopenharmony_ci if (!page) { 169362306a36Sopenharmony_ci unsigned long next_pfn; 169462306a36Sopenharmony_ci 169562306a36Sopenharmony_ci next_pfn = skip_offline_sections_reverse(block_start_pfn); 169662306a36Sopenharmony_ci if (next_pfn) 169762306a36Sopenharmony_ci block_start_pfn = max(next_pfn, low_pfn); 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci continue; 170062306a36Sopenharmony_ci } 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci /* Check the block is suitable for migration */ 170362306a36Sopenharmony_ci if (!suitable_migration_target(cc, page)) 170462306a36Sopenharmony_ci continue; 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci /* If isolation recently failed, do not retry */ 170762306a36Sopenharmony_ci if (!isolation_suitable(cc, page)) 170862306a36Sopenharmony_ci continue; 170962306a36Sopenharmony_ci 171062306a36Sopenharmony_ci /* Found a block suitable for isolating free pages from. */ 171162306a36Sopenharmony_ci nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn, 171262306a36Sopenharmony_ci block_end_pfn, freelist, stride, false); 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci /* Update the skip hint if the full pageblock was scanned */ 171562306a36Sopenharmony_ci if (isolate_start_pfn == block_end_pfn) 171662306a36Sopenharmony_ci update_pageblock_skip(cc, page, block_start_pfn - 171762306a36Sopenharmony_ci pageblock_nr_pages); 171862306a36Sopenharmony_ci 171962306a36Sopenharmony_ci /* Are enough freepages isolated? */ 172062306a36Sopenharmony_ci if (cc->nr_freepages >= cc->nr_migratepages) { 172162306a36Sopenharmony_ci if (isolate_start_pfn >= block_end_pfn) { 172262306a36Sopenharmony_ci /* 172362306a36Sopenharmony_ci * Restart at previous pageblock if more 172462306a36Sopenharmony_ci * freepages can be isolated next time. 172562306a36Sopenharmony_ci */ 172662306a36Sopenharmony_ci isolate_start_pfn = 172762306a36Sopenharmony_ci block_start_pfn - pageblock_nr_pages; 172862306a36Sopenharmony_ci } 172962306a36Sopenharmony_ci break; 173062306a36Sopenharmony_ci } else if (isolate_start_pfn < block_end_pfn) { 173162306a36Sopenharmony_ci /* 173262306a36Sopenharmony_ci * If isolation failed early, do not continue 173362306a36Sopenharmony_ci * needlessly. 173462306a36Sopenharmony_ci */ 173562306a36Sopenharmony_ci break; 173662306a36Sopenharmony_ci } 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci /* Adjust stride depending on isolation */ 173962306a36Sopenharmony_ci if (nr_isolated) { 174062306a36Sopenharmony_ci stride = 1; 174162306a36Sopenharmony_ci continue; 174262306a36Sopenharmony_ci } 174362306a36Sopenharmony_ci stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1); 174462306a36Sopenharmony_ci } 174562306a36Sopenharmony_ci 174662306a36Sopenharmony_ci /* 174762306a36Sopenharmony_ci * Record where the free scanner will restart next time. Either we 174862306a36Sopenharmony_ci * broke from the loop and set isolate_start_pfn based on the last 174962306a36Sopenharmony_ci * call to isolate_freepages_block(), or we met the migration scanner 175062306a36Sopenharmony_ci * and the loop terminated due to isolate_start_pfn < low_pfn 175162306a36Sopenharmony_ci */ 175262306a36Sopenharmony_ci cc->free_pfn = isolate_start_pfn; 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_cisplitmap: 175562306a36Sopenharmony_ci /* __isolate_free_page() does not map the pages */ 175662306a36Sopenharmony_ci split_map_pages(freelist); 175762306a36Sopenharmony_ci} 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci/* 176062306a36Sopenharmony_ci * This is a migrate-callback that "allocates" freepages by taking pages 176162306a36Sopenharmony_ci * from the isolated freelists in the block we are migrating to. 176262306a36Sopenharmony_ci */ 176362306a36Sopenharmony_cistatic struct folio *compaction_alloc(struct folio *src, unsigned long data) 176462306a36Sopenharmony_ci{ 176562306a36Sopenharmony_ci struct compact_control *cc = (struct compact_control *)data; 176662306a36Sopenharmony_ci struct folio *dst; 176762306a36Sopenharmony_ci 176862306a36Sopenharmony_ci if (list_empty(&cc->freepages)) { 176962306a36Sopenharmony_ci isolate_freepages(cc); 177062306a36Sopenharmony_ci 177162306a36Sopenharmony_ci if (list_empty(&cc->freepages)) 177262306a36Sopenharmony_ci return NULL; 177362306a36Sopenharmony_ci } 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci dst = list_entry(cc->freepages.next, struct folio, lru); 177662306a36Sopenharmony_ci list_del(&dst->lru); 177762306a36Sopenharmony_ci cc->nr_freepages--; 177862306a36Sopenharmony_ci 177962306a36Sopenharmony_ci return dst; 178062306a36Sopenharmony_ci} 178162306a36Sopenharmony_ci 178262306a36Sopenharmony_ci/* 178362306a36Sopenharmony_ci * This is a migrate-callback that "frees" freepages back to the isolated 178462306a36Sopenharmony_ci * freelist. All pages on the freelist are from the same zone, so there is no 178562306a36Sopenharmony_ci * special handling needed for NUMA. 178662306a36Sopenharmony_ci */ 178762306a36Sopenharmony_cistatic void compaction_free(struct folio *dst, unsigned long data) 178862306a36Sopenharmony_ci{ 178962306a36Sopenharmony_ci struct compact_control *cc = (struct compact_control *)data; 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ci list_add(&dst->lru, &cc->freepages); 179262306a36Sopenharmony_ci cc->nr_freepages++; 179362306a36Sopenharmony_ci} 179462306a36Sopenharmony_ci 179562306a36Sopenharmony_ci/* possible outcome of isolate_migratepages */ 179662306a36Sopenharmony_citypedef enum { 179762306a36Sopenharmony_ci ISOLATE_ABORT, /* Abort compaction now */ 179862306a36Sopenharmony_ci ISOLATE_NONE, /* No pages isolated, continue scanning */ 179962306a36Sopenharmony_ci ISOLATE_SUCCESS, /* Pages isolated, migrate */ 180062306a36Sopenharmony_ci} isolate_migrate_t; 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci/* 180362306a36Sopenharmony_ci * Allow userspace to control policy on scanning the unevictable LRU for 180462306a36Sopenharmony_ci * compactable pages. 180562306a36Sopenharmony_ci */ 180662306a36Sopenharmony_cistatic int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT; 180762306a36Sopenharmony_ci/* 180862306a36Sopenharmony_ci * Tunable for proactive compaction. It determines how 180962306a36Sopenharmony_ci * aggressively the kernel should compact memory in the 181062306a36Sopenharmony_ci * background. It takes values in the range [0, 100]. 181162306a36Sopenharmony_ci */ 181262306a36Sopenharmony_cistatic unsigned int __read_mostly sysctl_compaction_proactiveness = 20; 181362306a36Sopenharmony_cistatic int sysctl_extfrag_threshold = 500; 181462306a36Sopenharmony_cistatic int __read_mostly sysctl_compact_memory; 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_cistatic inline void 181762306a36Sopenharmony_ciupdate_fast_start_pfn(struct compact_control *cc, unsigned long pfn) 181862306a36Sopenharmony_ci{ 181962306a36Sopenharmony_ci if (cc->fast_start_pfn == ULONG_MAX) 182062306a36Sopenharmony_ci return; 182162306a36Sopenharmony_ci 182262306a36Sopenharmony_ci if (!cc->fast_start_pfn) 182362306a36Sopenharmony_ci cc->fast_start_pfn = pfn; 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_ci cc->fast_start_pfn = min(cc->fast_start_pfn, pfn); 182662306a36Sopenharmony_ci} 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_cistatic inline unsigned long 182962306a36Sopenharmony_cireinit_migrate_pfn(struct compact_control *cc) 183062306a36Sopenharmony_ci{ 183162306a36Sopenharmony_ci if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX) 183262306a36Sopenharmony_ci return cc->migrate_pfn; 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci cc->migrate_pfn = cc->fast_start_pfn; 183562306a36Sopenharmony_ci cc->fast_start_pfn = ULONG_MAX; 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci return cc->migrate_pfn; 183862306a36Sopenharmony_ci} 183962306a36Sopenharmony_ci 184062306a36Sopenharmony_ci/* 184162306a36Sopenharmony_ci * Briefly search the free lists for a migration source that already has 184262306a36Sopenharmony_ci * some free pages to reduce the number of pages that need migration 184362306a36Sopenharmony_ci * before a pageblock is free. 184462306a36Sopenharmony_ci */ 184562306a36Sopenharmony_cistatic unsigned long fast_find_migrateblock(struct compact_control *cc) 184662306a36Sopenharmony_ci{ 184762306a36Sopenharmony_ci unsigned int limit = freelist_scan_limit(cc); 184862306a36Sopenharmony_ci unsigned int nr_scanned = 0; 184962306a36Sopenharmony_ci unsigned long distance; 185062306a36Sopenharmony_ci unsigned long pfn = cc->migrate_pfn; 185162306a36Sopenharmony_ci unsigned long high_pfn; 185262306a36Sopenharmony_ci int order; 185362306a36Sopenharmony_ci bool found_block = false; 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci /* Skip hints are relied on to avoid repeats on the fast search */ 185662306a36Sopenharmony_ci if (cc->ignore_skip_hint) 185762306a36Sopenharmony_ci return pfn; 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci /* 186062306a36Sopenharmony_ci * If the pageblock should be finished then do not select a different 186162306a36Sopenharmony_ci * pageblock. 186262306a36Sopenharmony_ci */ 186362306a36Sopenharmony_ci if (cc->finish_pageblock) 186462306a36Sopenharmony_ci return pfn; 186562306a36Sopenharmony_ci 186662306a36Sopenharmony_ci /* 186762306a36Sopenharmony_ci * If the migrate_pfn is not at the start of a zone or the start 186862306a36Sopenharmony_ci * of a pageblock then assume this is a continuation of a previous 186962306a36Sopenharmony_ci * scan restarted due to COMPACT_CLUSTER_MAX. 187062306a36Sopenharmony_ci */ 187162306a36Sopenharmony_ci if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn)) 187262306a36Sopenharmony_ci return pfn; 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_ci /* 187562306a36Sopenharmony_ci * For smaller orders, just linearly scan as the number of pages 187662306a36Sopenharmony_ci * to migrate should be relatively small and does not necessarily 187762306a36Sopenharmony_ci * justify freeing up a large block for a small allocation. 187862306a36Sopenharmony_ci */ 187962306a36Sopenharmony_ci if (cc->order <= PAGE_ALLOC_COSTLY_ORDER) 188062306a36Sopenharmony_ci return pfn; 188162306a36Sopenharmony_ci 188262306a36Sopenharmony_ci /* 188362306a36Sopenharmony_ci * Only allow kcompactd and direct requests for movable pages to 188462306a36Sopenharmony_ci * quickly clear out a MOVABLE pageblock for allocation. This 188562306a36Sopenharmony_ci * reduces the risk that a large movable pageblock is freed for 188662306a36Sopenharmony_ci * an unmovable/reclaimable small allocation. 188762306a36Sopenharmony_ci */ 188862306a36Sopenharmony_ci if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE) 188962306a36Sopenharmony_ci return pfn; 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ci /* 189262306a36Sopenharmony_ci * When starting the migration scanner, pick any pageblock within the 189362306a36Sopenharmony_ci * first half of the search space. Otherwise try and pick a pageblock 189462306a36Sopenharmony_ci * within the first eighth to reduce the chances that a migration 189562306a36Sopenharmony_ci * target later becomes a source. 189662306a36Sopenharmony_ci */ 189762306a36Sopenharmony_ci distance = (cc->free_pfn - cc->migrate_pfn) >> 1; 189862306a36Sopenharmony_ci if (cc->migrate_pfn != cc->zone->zone_start_pfn) 189962306a36Sopenharmony_ci distance >>= 2; 190062306a36Sopenharmony_ci high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance); 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci for (order = cc->order - 1; 190362306a36Sopenharmony_ci order >= PAGE_ALLOC_COSTLY_ORDER && !found_block && nr_scanned < limit; 190462306a36Sopenharmony_ci order--) { 190562306a36Sopenharmony_ci struct free_area *area = &cc->zone->free_area[order]; 190662306a36Sopenharmony_ci struct list_head *freelist; 190762306a36Sopenharmony_ci unsigned long flags; 190862306a36Sopenharmony_ci struct page *freepage; 190962306a36Sopenharmony_ci 191062306a36Sopenharmony_ci if (!area->nr_free) 191162306a36Sopenharmony_ci continue; 191262306a36Sopenharmony_ci 191362306a36Sopenharmony_ci spin_lock_irqsave(&cc->zone->lock, flags); 191462306a36Sopenharmony_ci freelist = &area->free_list[MIGRATE_MOVABLE]; 191562306a36Sopenharmony_ci list_for_each_entry(freepage, freelist, buddy_list) { 191662306a36Sopenharmony_ci unsigned long free_pfn; 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci if (nr_scanned++ >= limit) { 191962306a36Sopenharmony_ci move_freelist_tail(freelist, freepage); 192062306a36Sopenharmony_ci break; 192162306a36Sopenharmony_ci } 192262306a36Sopenharmony_ci 192362306a36Sopenharmony_ci free_pfn = page_to_pfn(freepage); 192462306a36Sopenharmony_ci if (free_pfn < high_pfn) { 192562306a36Sopenharmony_ci /* 192662306a36Sopenharmony_ci * Avoid if skipped recently. Ideally it would 192762306a36Sopenharmony_ci * move to the tail but even safe iteration of 192862306a36Sopenharmony_ci * the list assumes an entry is deleted, not 192962306a36Sopenharmony_ci * reordered. 193062306a36Sopenharmony_ci */ 193162306a36Sopenharmony_ci if (get_pageblock_skip(freepage)) 193262306a36Sopenharmony_ci continue; 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci /* Reorder to so a future search skips recent pages */ 193562306a36Sopenharmony_ci move_freelist_tail(freelist, freepage); 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci update_fast_start_pfn(cc, free_pfn); 193862306a36Sopenharmony_ci pfn = pageblock_start_pfn(free_pfn); 193962306a36Sopenharmony_ci if (pfn < cc->zone->zone_start_pfn) 194062306a36Sopenharmony_ci pfn = cc->zone->zone_start_pfn; 194162306a36Sopenharmony_ci cc->fast_search_fail = 0; 194262306a36Sopenharmony_ci found_block = true; 194362306a36Sopenharmony_ci break; 194462306a36Sopenharmony_ci } 194562306a36Sopenharmony_ci } 194662306a36Sopenharmony_ci spin_unlock_irqrestore(&cc->zone->lock, flags); 194762306a36Sopenharmony_ci } 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_ci cc->total_migrate_scanned += nr_scanned; 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_ci /* 195262306a36Sopenharmony_ci * If fast scanning failed then use a cached entry for a page block 195362306a36Sopenharmony_ci * that had free pages as the basis for starting a linear scan. 195462306a36Sopenharmony_ci */ 195562306a36Sopenharmony_ci if (!found_block) { 195662306a36Sopenharmony_ci cc->fast_search_fail++; 195762306a36Sopenharmony_ci pfn = reinit_migrate_pfn(cc); 195862306a36Sopenharmony_ci } 195962306a36Sopenharmony_ci return pfn; 196062306a36Sopenharmony_ci} 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci/* 196362306a36Sopenharmony_ci * Isolate all pages that can be migrated from the first suitable block, 196462306a36Sopenharmony_ci * starting at the block pointed to by the migrate scanner pfn within 196562306a36Sopenharmony_ci * compact_control. 196662306a36Sopenharmony_ci */ 196762306a36Sopenharmony_cistatic isolate_migrate_t isolate_migratepages(struct compact_control *cc) 196862306a36Sopenharmony_ci{ 196962306a36Sopenharmony_ci unsigned long block_start_pfn; 197062306a36Sopenharmony_ci unsigned long block_end_pfn; 197162306a36Sopenharmony_ci unsigned long low_pfn; 197262306a36Sopenharmony_ci struct page *page; 197362306a36Sopenharmony_ci const isolate_mode_t isolate_mode = 197462306a36Sopenharmony_ci (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) | 197562306a36Sopenharmony_ci (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0); 197662306a36Sopenharmony_ci bool fast_find_block; 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_ci /* 197962306a36Sopenharmony_ci * Start at where we last stopped, or beginning of the zone as 198062306a36Sopenharmony_ci * initialized by compact_zone(). The first failure will use 198162306a36Sopenharmony_ci * the lowest PFN as the starting point for linear scanning. 198262306a36Sopenharmony_ci */ 198362306a36Sopenharmony_ci low_pfn = fast_find_migrateblock(cc); 198462306a36Sopenharmony_ci block_start_pfn = pageblock_start_pfn(low_pfn); 198562306a36Sopenharmony_ci if (block_start_pfn < cc->zone->zone_start_pfn) 198662306a36Sopenharmony_ci block_start_pfn = cc->zone->zone_start_pfn; 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_ci /* 198962306a36Sopenharmony_ci * fast_find_migrateblock() has already ensured the pageblock is not 199062306a36Sopenharmony_ci * set with a skipped flag, so to avoid the isolation_suitable check 199162306a36Sopenharmony_ci * below again, check whether the fast search was successful. 199262306a36Sopenharmony_ci */ 199362306a36Sopenharmony_ci fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail; 199462306a36Sopenharmony_ci 199562306a36Sopenharmony_ci /* Only scan within a pageblock boundary */ 199662306a36Sopenharmony_ci block_end_pfn = pageblock_end_pfn(low_pfn); 199762306a36Sopenharmony_ci 199862306a36Sopenharmony_ci /* 199962306a36Sopenharmony_ci * Iterate over whole pageblocks until we find the first suitable. 200062306a36Sopenharmony_ci * Do not cross the free scanner. 200162306a36Sopenharmony_ci */ 200262306a36Sopenharmony_ci for (; block_end_pfn <= cc->free_pfn; 200362306a36Sopenharmony_ci fast_find_block = false, 200462306a36Sopenharmony_ci cc->migrate_pfn = low_pfn = block_end_pfn, 200562306a36Sopenharmony_ci block_start_pfn = block_end_pfn, 200662306a36Sopenharmony_ci block_end_pfn += pageblock_nr_pages) { 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci /* 200962306a36Sopenharmony_ci * This can potentially iterate a massively long zone with 201062306a36Sopenharmony_ci * many pageblocks unsuitable, so periodically check if we 201162306a36Sopenharmony_ci * need to schedule. 201262306a36Sopenharmony_ci */ 201362306a36Sopenharmony_ci if (!(low_pfn % (COMPACT_CLUSTER_MAX * pageblock_nr_pages))) 201462306a36Sopenharmony_ci cond_resched(); 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci page = pageblock_pfn_to_page(block_start_pfn, 201762306a36Sopenharmony_ci block_end_pfn, cc->zone); 201862306a36Sopenharmony_ci if (!page) { 201962306a36Sopenharmony_ci unsigned long next_pfn; 202062306a36Sopenharmony_ci 202162306a36Sopenharmony_ci next_pfn = skip_offline_sections(block_start_pfn); 202262306a36Sopenharmony_ci if (next_pfn) 202362306a36Sopenharmony_ci block_end_pfn = min(next_pfn, cc->free_pfn); 202462306a36Sopenharmony_ci continue; 202562306a36Sopenharmony_ci } 202662306a36Sopenharmony_ci 202762306a36Sopenharmony_ci /* 202862306a36Sopenharmony_ci * If isolation recently failed, do not retry. Only check the 202962306a36Sopenharmony_ci * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock 203062306a36Sopenharmony_ci * to be visited multiple times. Assume skip was checked 203162306a36Sopenharmony_ci * before making it "skip" so other compaction instances do 203262306a36Sopenharmony_ci * not scan the same block. 203362306a36Sopenharmony_ci */ 203462306a36Sopenharmony_ci if ((pageblock_aligned(low_pfn) || 203562306a36Sopenharmony_ci low_pfn == cc->zone->zone_start_pfn) && 203662306a36Sopenharmony_ci !fast_find_block && !isolation_suitable(cc, page)) 203762306a36Sopenharmony_ci continue; 203862306a36Sopenharmony_ci 203962306a36Sopenharmony_ci /* 204062306a36Sopenharmony_ci * For async direct compaction, only scan the pageblocks of the 204162306a36Sopenharmony_ci * same migratetype without huge pages. Async direct compaction 204262306a36Sopenharmony_ci * is optimistic to see if the minimum amount of work satisfies 204362306a36Sopenharmony_ci * the allocation. The cached PFN is updated as it's possible 204462306a36Sopenharmony_ci * that all remaining blocks between source and target are 204562306a36Sopenharmony_ci * unsuitable and the compaction scanners fail to meet. 204662306a36Sopenharmony_ci */ 204762306a36Sopenharmony_ci if (!suitable_migration_source(cc, page)) { 204862306a36Sopenharmony_ci update_cached_migrate(cc, block_end_pfn); 204962306a36Sopenharmony_ci continue; 205062306a36Sopenharmony_ci } 205162306a36Sopenharmony_ci 205262306a36Sopenharmony_ci /* Perform the isolation */ 205362306a36Sopenharmony_ci if (isolate_migratepages_block(cc, low_pfn, block_end_pfn, 205462306a36Sopenharmony_ci isolate_mode)) 205562306a36Sopenharmony_ci return ISOLATE_ABORT; 205662306a36Sopenharmony_ci 205762306a36Sopenharmony_ci /* 205862306a36Sopenharmony_ci * Either we isolated something and proceed with migration. Or 205962306a36Sopenharmony_ci * we failed and compact_zone should decide if we should 206062306a36Sopenharmony_ci * continue or not. 206162306a36Sopenharmony_ci */ 206262306a36Sopenharmony_ci break; 206362306a36Sopenharmony_ci } 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_ci return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; 206662306a36Sopenharmony_ci} 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci/* 206962306a36Sopenharmony_ci * order == -1 is expected when compacting via 207062306a36Sopenharmony_ci * /proc/sys/vm/compact_memory 207162306a36Sopenharmony_ci */ 207262306a36Sopenharmony_cistatic inline bool is_via_compact_memory(int order) 207362306a36Sopenharmony_ci{ 207462306a36Sopenharmony_ci return order == -1; 207562306a36Sopenharmony_ci} 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci/* 207862306a36Sopenharmony_ci * Determine whether kswapd is (or recently was!) running on this node. 207962306a36Sopenharmony_ci * 208062306a36Sopenharmony_ci * pgdat_kswapd_lock() pins pgdat->kswapd, so a concurrent kswapd_stop() can't 208162306a36Sopenharmony_ci * zero it. 208262306a36Sopenharmony_ci */ 208362306a36Sopenharmony_cistatic bool kswapd_is_running(pg_data_t *pgdat) 208462306a36Sopenharmony_ci{ 208562306a36Sopenharmony_ci bool running; 208662306a36Sopenharmony_ci 208762306a36Sopenharmony_ci pgdat_kswapd_lock(pgdat); 208862306a36Sopenharmony_ci running = pgdat->kswapd && task_is_running(pgdat->kswapd); 208962306a36Sopenharmony_ci pgdat_kswapd_unlock(pgdat); 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_ci return running; 209262306a36Sopenharmony_ci} 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_ci/* 209562306a36Sopenharmony_ci * A zone's fragmentation score is the external fragmentation wrt to the 209662306a36Sopenharmony_ci * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100]. 209762306a36Sopenharmony_ci */ 209862306a36Sopenharmony_cistatic unsigned int fragmentation_score_zone(struct zone *zone) 209962306a36Sopenharmony_ci{ 210062306a36Sopenharmony_ci return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER); 210162306a36Sopenharmony_ci} 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_ci/* 210462306a36Sopenharmony_ci * A weighted zone's fragmentation score is the external fragmentation 210562306a36Sopenharmony_ci * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It 210662306a36Sopenharmony_ci * returns a value in the range [0, 100]. 210762306a36Sopenharmony_ci * 210862306a36Sopenharmony_ci * The scaling factor ensures that proactive compaction focuses on larger 210962306a36Sopenharmony_ci * zones like ZONE_NORMAL, rather than smaller, specialized zones like 211062306a36Sopenharmony_ci * ZONE_DMA32. For smaller zones, the score value remains close to zero, 211162306a36Sopenharmony_ci * and thus never exceeds the high threshold for proactive compaction. 211262306a36Sopenharmony_ci */ 211362306a36Sopenharmony_cistatic unsigned int fragmentation_score_zone_weighted(struct zone *zone) 211462306a36Sopenharmony_ci{ 211562306a36Sopenharmony_ci unsigned long score; 211662306a36Sopenharmony_ci 211762306a36Sopenharmony_ci score = zone->present_pages * fragmentation_score_zone(zone); 211862306a36Sopenharmony_ci return div64_ul(score, zone->zone_pgdat->node_present_pages + 1); 211962306a36Sopenharmony_ci} 212062306a36Sopenharmony_ci 212162306a36Sopenharmony_ci/* 212262306a36Sopenharmony_ci * The per-node proactive (background) compaction process is started by its 212362306a36Sopenharmony_ci * corresponding kcompactd thread when the node's fragmentation score 212462306a36Sopenharmony_ci * exceeds the high threshold. The compaction process remains active till 212562306a36Sopenharmony_ci * the node's score falls below the low threshold, or one of the back-off 212662306a36Sopenharmony_ci * conditions is met. 212762306a36Sopenharmony_ci */ 212862306a36Sopenharmony_cistatic unsigned int fragmentation_score_node(pg_data_t *pgdat) 212962306a36Sopenharmony_ci{ 213062306a36Sopenharmony_ci unsigned int score = 0; 213162306a36Sopenharmony_ci int zoneid; 213262306a36Sopenharmony_ci 213362306a36Sopenharmony_ci for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 213462306a36Sopenharmony_ci struct zone *zone; 213562306a36Sopenharmony_ci 213662306a36Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 213762306a36Sopenharmony_ci if (!populated_zone(zone)) 213862306a36Sopenharmony_ci continue; 213962306a36Sopenharmony_ci score += fragmentation_score_zone_weighted(zone); 214062306a36Sopenharmony_ci } 214162306a36Sopenharmony_ci 214262306a36Sopenharmony_ci return score; 214362306a36Sopenharmony_ci} 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_cistatic unsigned int fragmentation_score_wmark(bool low) 214662306a36Sopenharmony_ci{ 214762306a36Sopenharmony_ci unsigned int wmark_low; 214862306a36Sopenharmony_ci 214962306a36Sopenharmony_ci /* 215062306a36Sopenharmony_ci * Cap the low watermark to avoid excessive compaction 215162306a36Sopenharmony_ci * activity in case a user sets the proactiveness tunable 215262306a36Sopenharmony_ci * close to 100 (maximum). 215362306a36Sopenharmony_ci */ 215462306a36Sopenharmony_ci wmark_low = max(100U - sysctl_compaction_proactiveness, 5U); 215562306a36Sopenharmony_ci return low ? wmark_low : min(wmark_low + 10, 100U); 215662306a36Sopenharmony_ci} 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_cistatic bool should_proactive_compact_node(pg_data_t *pgdat) 215962306a36Sopenharmony_ci{ 216062306a36Sopenharmony_ci int wmark_high; 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat)) 216362306a36Sopenharmony_ci return false; 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ci wmark_high = fragmentation_score_wmark(false); 216662306a36Sopenharmony_ci return fragmentation_score_node(pgdat) > wmark_high; 216762306a36Sopenharmony_ci} 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_cistatic enum compact_result __compact_finished(struct compact_control *cc) 217062306a36Sopenharmony_ci{ 217162306a36Sopenharmony_ci unsigned int order; 217262306a36Sopenharmony_ci const int migratetype = cc->migratetype; 217362306a36Sopenharmony_ci int ret; 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ci /* Compaction run completes if the migrate and free scanner meet */ 217662306a36Sopenharmony_ci if (compact_scanners_met(cc)) { 217762306a36Sopenharmony_ci /* Let the next compaction start anew. */ 217862306a36Sopenharmony_ci reset_cached_positions(cc->zone); 217962306a36Sopenharmony_ci 218062306a36Sopenharmony_ci /* 218162306a36Sopenharmony_ci * Mark that the PG_migrate_skip information should be cleared 218262306a36Sopenharmony_ci * by kswapd when it goes to sleep. kcompactd does not set the 218362306a36Sopenharmony_ci * flag itself as the decision to be clear should be directly 218462306a36Sopenharmony_ci * based on an allocation request. 218562306a36Sopenharmony_ci */ 218662306a36Sopenharmony_ci if (cc->direct_compaction) 218762306a36Sopenharmony_ci cc->zone->compact_blockskip_flush = true; 218862306a36Sopenharmony_ci 218962306a36Sopenharmony_ci if (cc->whole_zone) 219062306a36Sopenharmony_ci return COMPACT_COMPLETE; 219162306a36Sopenharmony_ci else 219262306a36Sopenharmony_ci return COMPACT_PARTIAL_SKIPPED; 219362306a36Sopenharmony_ci } 219462306a36Sopenharmony_ci 219562306a36Sopenharmony_ci if (cc->proactive_compaction) { 219662306a36Sopenharmony_ci int score, wmark_low; 219762306a36Sopenharmony_ci pg_data_t *pgdat; 219862306a36Sopenharmony_ci 219962306a36Sopenharmony_ci pgdat = cc->zone->zone_pgdat; 220062306a36Sopenharmony_ci if (kswapd_is_running(pgdat)) 220162306a36Sopenharmony_ci return COMPACT_PARTIAL_SKIPPED; 220262306a36Sopenharmony_ci 220362306a36Sopenharmony_ci score = fragmentation_score_zone(cc->zone); 220462306a36Sopenharmony_ci wmark_low = fragmentation_score_wmark(true); 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_ci if (score > wmark_low) 220762306a36Sopenharmony_ci ret = COMPACT_CONTINUE; 220862306a36Sopenharmony_ci else 220962306a36Sopenharmony_ci ret = COMPACT_SUCCESS; 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci goto out; 221262306a36Sopenharmony_ci } 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci if (is_via_compact_memory(cc->order)) 221562306a36Sopenharmony_ci return COMPACT_CONTINUE; 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci /* 221862306a36Sopenharmony_ci * Always finish scanning a pageblock to reduce the possibility of 221962306a36Sopenharmony_ci * fallbacks in the future. This is particularly important when 222062306a36Sopenharmony_ci * migration source is unmovable/reclaimable but it's not worth 222162306a36Sopenharmony_ci * special casing. 222262306a36Sopenharmony_ci */ 222362306a36Sopenharmony_ci if (!pageblock_aligned(cc->migrate_pfn)) 222462306a36Sopenharmony_ci return COMPACT_CONTINUE; 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci /* Direct compactor: Is a suitable page free? */ 222762306a36Sopenharmony_ci ret = COMPACT_NO_SUITABLE_PAGE; 222862306a36Sopenharmony_ci for (order = cc->order; order <= MAX_ORDER; order++) { 222962306a36Sopenharmony_ci struct free_area *area = &cc->zone->free_area[order]; 223062306a36Sopenharmony_ci bool can_steal; 223162306a36Sopenharmony_ci 223262306a36Sopenharmony_ci /* Job done if page is free of the right migratetype */ 223362306a36Sopenharmony_ci if (!free_area_empty(area, migratetype)) 223462306a36Sopenharmony_ci return COMPACT_SUCCESS; 223562306a36Sopenharmony_ci 223662306a36Sopenharmony_ci#ifdef CONFIG_CMA 223762306a36Sopenharmony_ci /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */ 223862306a36Sopenharmony_ci if (migratetype == get_cma_migratetype() && 223962306a36Sopenharmony_ci !free_area_empty(area, MIGRATE_CMA)) 224062306a36Sopenharmony_ci return COMPACT_SUCCESS; 224162306a36Sopenharmony_ci#endif 224262306a36Sopenharmony_ci /* 224362306a36Sopenharmony_ci * Job done if allocation would steal freepages from 224462306a36Sopenharmony_ci * other migratetype buddy lists. 224562306a36Sopenharmony_ci */ 224662306a36Sopenharmony_ci if (find_suitable_fallback(area, order, migratetype, 224762306a36Sopenharmony_ci true, &can_steal) != -1) 224862306a36Sopenharmony_ci /* 224962306a36Sopenharmony_ci * Movable pages are OK in any pageblock. If we are 225062306a36Sopenharmony_ci * stealing for a non-movable allocation, make sure 225162306a36Sopenharmony_ci * we finish compacting the current pageblock first 225262306a36Sopenharmony_ci * (which is assured by the above migrate_pfn align 225362306a36Sopenharmony_ci * check) so it is as free as possible and we won't 225462306a36Sopenharmony_ci * have to steal another one soon. 225562306a36Sopenharmony_ci */ 225662306a36Sopenharmony_ci return COMPACT_SUCCESS; 225762306a36Sopenharmony_ci } 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_ciout: 226062306a36Sopenharmony_ci if (cc->contended || fatal_signal_pending(current)) 226162306a36Sopenharmony_ci ret = COMPACT_CONTENDED; 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci return ret; 226462306a36Sopenharmony_ci} 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_cistatic enum compact_result compact_finished(struct compact_control *cc) 226762306a36Sopenharmony_ci{ 226862306a36Sopenharmony_ci int ret; 226962306a36Sopenharmony_ci 227062306a36Sopenharmony_ci ret = __compact_finished(cc); 227162306a36Sopenharmony_ci trace_mm_compaction_finished(cc->zone, cc->order, ret); 227262306a36Sopenharmony_ci if (ret == COMPACT_NO_SUITABLE_PAGE) 227362306a36Sopenharmony_ci ret = COMPACT_CONTINUE; 227462306a36Sopenharmony_ci 227562306a36Sopenharmony_ci return ret; 227662306a36Sopenharmony_ci} 227762306a36Sopenharmony_ci 227862306a36Sopenharmony_cistatic bool __compaction_suitable(struct zone *zone, int order, 227962306a36Sopenharmony_ci int highest_zoneidx, 228062306a36Sopenharmony_ci unsigned long wmark_target) 228162306a36Sopenharmony_ci{ 228262306a36Sopenharmony_ci unsigned long watermark; 228362306a36Sopenharmony_ci /* 228462306a36Sopenharmony_ci * Watermarks for order-0 must be met for compaction to be able to 228562306a36Sopenharmony_ci * isolate free pages for migration targets. This means that the 228662306a36Sopenharmony_ci * watermark and alloc_flags have to match, or be more pessimistic than 228762306a36Sopenharmony_ci * the check in __isolate_free_page(). We don't use the direct 228862306a36Sopenharmony_ci * compactor's alloc_flags, as they are not relevant for freepage 228962306a36Sopenharmony_ci * isolation. We however do use the direct compactor's highest_zoneidx 229062306a36Sopenharmony_ci * to skip over zones where lowmem reserves would prevent allocation 229162306a36Sopenharmony_ci * even if compaction succeeds. 229262306a36Sopenharmony_ci * For costly orders, we require low watermark instead of min for 229362306a36Sopenharmony_ci * compaction to proceed to increase its chances. 229462306a36Sopenharmony_ci * ALLOC_CMA is used, as pages in CMA pageblocks are considered 229562306a36Sopenharmony_ci * suitable migration targets 229662306a36Sopenharmony_ci */ 229762306a36Sopenharmony_ci watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? 229862306a36Sopenharmony_ci low_wmark_pages(zone) : min_wmark_pages(zone); 229962306a36Sopenharmony_ci watermark += compact_gap(order); 230062306a36Sopenharmony_ci return __zone_watermark_ok(zone, 0, watermark, highest_zoneidx, 230162306a36Sopenharmony_ci ALLOC_CMA, wmark_target); 230262306a36Sopenharmony_ci} 230362306a36Sopenharmony_ci 230462306a36Sopenharmony_ci/* 230562306a36Sopenharmony_ci * compaction_suitable: Is this suitable to run compaction on this zone now? 230662306a36Sopenharmony_ci */ 230762306a36Sopenharmony_cibool compaction_suitable(struct zone *zone, int order, int highest_zoneidx) 230862306a36Sopenharmony_ci{ 230962306a36Sopenharmony_ci enum compact_result compact_result; 231062306a36Sopenharmony_ci bool suitable; 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_ci suitable = __compaction_suitable(zone, order, highest_zoneidx, 231362306a36Sopenharmony_ci zone_page_state(zone, NR_FREE_PAGES)); 231462306a36Sopenharmony_ci /* 231562306a36Sopenharmony_ci * fragmentation index determines if allocation failures are due to 231662306a36Sopenharmony_ci * low memory or external fragmentation 231762306a36Sopenharmony_ci * 231862306a36Sopenharmony_ci * index of -1000 would imply allocations might succeed depending on 231962306a36Sopenharmony_ci * watermarks, but we already failed the high-order watermark check 232062306a36Sopenharmony_ci * index towards 0 implies failure is due to lack of memory 232162306a36Sopenharmony_ci * index towards 1000 implies failure is due to fragmentation 232262306a36Sopenharmony_ci * 232362306a36Sopenharmony_ci * Only compact if a failure would be due to fragmentation. Also 232462306a36Sopenharmony_ci * ignore fragindex for non-costly orders where the alternative to 232562306a36Sopenharmony_ci * a successful reclaim/compaction is OOM. Fragindex and the 232662306a36Sopenharmony_ci * vm.extfrag_threshold sysctl is meant as a heuristic to prevent 232762306a36Sopenharmony_ci * excessive compaction for costly orders, but it should not be at the 232862306a36Sopenharmony_ci * expense of system stability. 232962306a36Sopenharmony_ci */ 233062306a36Sopenharmony_ci if (suitable) { 233162306a36Sopenharmony_ci compact_result = COMPACT_CONTINUE; 233262306a36Sopenharmony_ci if (order > PAGE_ALLOC_COSTLY_ORDER) { 233362306a36Sopenharmony_ci int fragindex = fragmentation_index(zone, order); 233462306a36Sopenharmony_ci 233562306a36Sopenharmony_ci if (fragindex >= 0 && 233662306a36Sopenharmony_ci fragindex <= sysctl_extfrag_threshold) { 233762306a36Sopenharmony_ci suitable = false; 233862306a36Sopenharmony_ci compact_result = COMPACT_NOT_SUITABLE_ZONE; 233962306a36Sopenharmony_ci } 234062306a36Sopenharmony_ci } 234162306a36Sopenharmony_ci } else { 234262306a36Sopenharmony_ci compact_result = COMPACT_SKIPPED; 234362306a36Sopenharmony_ci } 234462306a36Sopenharmony_ci 234562306a36Sopenharmony_ci trace_mm_compaction_suitable(zone, order, compact_result); 234662306a36Sopenharmony_ci 234762306a36Sopenharmony_ci return suitable; 234862306a36Sopenharmony_ci} 234962306a36Sopenharmony_ci 235062306a36Sopenharmony_cibool compaction_zonelist_suitable(struct alloc_context *ac, int order, 235162306a36Sopenharmony_ci int alloc_flags) 235262306a36Sopenharmony_ci{ 235362306a36Sopenharmony_ci struct zone *zone; 235462306a36Sopenharmony_ci struct zoneref *z; 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci /* 235762306a36Sopenharmony_ci * Make sure at least one zone would pass __compaction_suitable if we continue 235862306a36Sopenharmony_ci * retrying the reclaim. 235962306a36Sopenharmony_ci */ 236062306a36Sopenharmony_ci for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, 236162306a36Sopenharmony_ci ac->highest_zoneidx, ac->nodemask) { 236262306a36Sopenharmony_ci unsigned long available; 236362306a36Sopenharmony_ci 236462306a36Sopenharmony_ci /* 236562306a36Sopenharmony_ci * Do not consider all the reclaimable memory because we do not 236662306a36Sopenharmony_ci * want to trash just for a single high order allocation which 236762306a36Sopenharmony_ci * is even not guaranteed to appear even if __compaction_suitable 236862306a36Sopenharmony_ci * is happy about the watermark check. 236962306a36Sopenharmony_ci */ 237062306a36Sopenharmony_ci available = zone_reclaimable_pages(zone) / order; 237162306a36Sopenharmony_ci available += zone_page_state_snapshot(zone, NR_FREE_PAGES); 237262306a36Sopenharmony_ci if (__compaction_suitable(zone, order, ac->highest_zoneidx, 237362306a36Sopenharmony_ci available)) 237462306a36Sopenharmony_ci return true; 237562306a36Sopenharmony_ci } 237662306a36Sopenharmony_ci 237762306a36Sopenharmony_ci return false; 237862306a36Sopenharmony_ci} 237962306a36Sopenharmony_ci 238062306a36Sopenharmony_cistatic enum compact_result 238162306a36Sopenharmony_cicompact_zone(struct compact_control *cc, struct capture_control *capc) 238262306a36Sopenharmony_ci{ 238362306a36Sopenharmony_ci enum compact_result ret; 238462306a36Sopenharmony_ci unsigned long start_pfn = cc->zone->zone_start_pfn; 238562306a36Sopenharmony_ci unsigned long end_pfn = zone_end_pfn(cc->zone); 238662306a36Sopenharmony_ci unsigned long last_migrated_pfn; 238762306a36Sopenharmony_ci const bool sync = cc->mode != MIGRATE_ASYNC; 238862306a36Sopenharmony_ci bool update_cached; 238962306a36Sopenharmony_ci unsigned int nr_succeeded = 0; 239062306a36Sopenharmony_ci 239162306a36Sopenharmony_ci /* 239262306a36Sopenharmony_ci * These counters track activities during zone compaction. Initialize 239362306a36Sopenharmony_ci * them before compacting a new zone. 239462306a36Sopenharmony_ci */ 239562306a36Sopenharmony_ci cc->total_migrate_scanned = 0; 239662306a36Sopenharmony_ci cc->total_free_scanned = 0; 239762306a36Sopenharmony_ci cc->nr_migratepages = 0; 239862306a36Sopenharmony_ci cc->nr_freepages = 0; 239962306a36Sopenharmony_ci INIT_LIST_HEAD(&cc->freepages); 240062306a36Sopenharmony_ci INIT_LIST_HEAD(&cc->migratepages); 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_ci cc->migratetype = gfp_migratetype(cc->gfp_mask); 240362306a36Sopenharmony_ci 240462306a36Sopenharmony_ci if (!is_via_compact_memory(cc->order)) { 240562306a36Sopenharmony_ci unsigned long watermark; 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci /* Allocation can already succeed, nothing to do */ 240862306a36Sopenharmony_ci watermark = wmark_pages(cc->zone, 240962306a36Sopenharmony_ci cc->alloc_flags & ALLOC_WMARK_MASK); 241062306a36Sopenharmony_ci if (zone_watermark_ok(cc->zone, cc->order, watermark, 241162306a36Sopenharmony_ci cc->highest_zoneidx, cc->alloc_flags)) 241262306a36Sopenharmony_ci return COMPACT_SUCCESS; 241362306a36Sopenharmony_ci 241462306a36Sopenharmony_ci /* Compaction is likely to fail */ 241562306a36Sopenharmony_ci if (!compaction_suitable(cc->zone, cc->order, 241662306a36Sopenharmony_ci cc->highest_zoneidx)) 241762306a36Sopenharmony_ci return COMPACT_SKIPPED; 241862306a36Sopenharmony_ci } 241962306a36Sopenharmony_ci 242062306a36Sopenharmony_ci /* 242162306a36Sopenharmony_ci * Clear pageblock skip if there were failures recently and compaction 242262306a36Sopenharmony_ci * is about to be retried after being deferred. 242362306a36Sopenharmony_ci */ 242462306a36Sopenharmony_ci if (compaction_restarting(cc->zone, cc->order)) 242562306a36Sopenharmony_ci __reset_isolation_suitable(cc->zone); 242662306a36Sopenharmony_ci 242762306a36Sopenharmony_ci /* 242862306a36Sopenharmony_ci * Setup to move all movable pages to the end of the zone. Used cached 242962306a36Sopenharmony_ci * information on where the scanners should start (unless we explicitly 243062306a36Sopenharmony_ci * want to compact the whole zone), but check that it is initialised 243162306a36Sopenharmony_ci * by ensuring the values are within zone boundaries. 243262306a36Sopenharmony_ci */ 243362306a36Sopenharmony_ci cc->fast_start_pfn = 0; 243462306a36Sopenharmony_ci if (cc->whole_zone) { 243562306a36Sopenharmony_ci cc->migrate_pfn = start_pfn; 243662306a36Sopenharmony_ci cc->free_pfn = pageblock_start_pfn(end_pfn - 1); 243762306a36Sopenharmony_ci } else { 243862306a36Sopenharmony_ci cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync]; 243962306a36Sopenharmony_ci cc->free_pfn = cc->zone->compact_cached_free_pfn; 244062306a36Sopenharmony_ci if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { 244162306a36Sopenharmony_ci cc->free_pfn = pageblock_start_pfn(end_pfn - 1); 244262306a36Sopenharmony_ci cc->zone->compact_cached_free_pfn = cc->free_pfn; 244362306a36Sopenharmony_ci } 244462306a36Sopenharmony_ci if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { 244562306a36Sopenharmony_ci cc->migrate_pfn = start_pfn; 244662306a36Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; 244762306a36Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; 244862306a36Sopenharmony_ci } 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_ci if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn) 245162306a36Sopenharmony_ci cc->whole_zone = true; 245262306a36Sopenharmony_ci } 245362306a36Sopenharmony_ci 245462306a36Sopenharmony_ci last_migrated_pfn = 0; 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci /* 245762306a36Sopenharmony_ci * Migrate has separate cached PFNs for ASYNC and SYNC* migration on 245862306a36Sopenharmony_ci * the basis that some migrations will fail in ASYNC mode. However, 245962306a36Sopenharmony_ci * if the cached PFNs match and pageblocks are skipped due to having 246062306a36Sopenharmony_ci * no isolation candidates, then the sync state does not matter. 246162306a36Sopenharmony_ci * Until a pageblock with isolation candidates is found, keep the 246262306a36Sopenharmony_ci * cached PFNs in sync to avoid revisiting the same blocks. 246362306a36Sopenharmony_ci */ 246462306a36Sopenharmony_ci update_cached = !sync && 246562306a36Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1]; 246662306a36Sopenharmony_ci 246762306a36Sopenharmony_ci trace_mm_compaction_begin(cc, start_pfn, end_pfn, sync); 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci /* lru_add_drain_all could be expensive with involving other CPUs */ 247062306a36Sopenharmony_ci lru_add_drain(); 247162306a36Sopenharmony_ci 247262306a36Sopenharmony_ci while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) { 247362306a36Sopenharmony_ci int err; 247462306a36Sopenharmony_ci unsigned long iteration_start_pfn = cc->migrate_pfn; 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_ci /* 247762306a36Sopenharmony_ci * Avoid multiple rescans of the same pageblock which can 247862306a36Sopenharmony_ci * happen if a page cannot be isolated (dirty/writeback in 247962306a36Sopenharmony_ci * async mode) or if the migrated pages are being allocated 248062306a36Sopenharmony_ci * before the pageblock is cleared. The first rescan will 248162306a36Sopenharmony_ci * capture the entire pageblock for migration. If it fails, 248262306a36Sopenharmony_ci * it'll be marked skip and scanning will proceed as normal. 248362306a36Sopenharmony_ci */ 248462306a36Sopenharmony_ci cc->finish_pageblock = false; 248562306a36Sopenharmony_ci if (pageblock_start_pfn(last_migrated_pfn) == 248662306a36Sopenharmony_ci pageblock_start_pfn(iteration_start_pfn)) { 248762306a36Sopenharmony_ci cc->finish_pageblock = true; 248862306a36Sopenharmony_ci } 248962306a36Sopenharmony_ci 249062306a36Sopenharmony_cirescan: 249162306a36Sopenharmony_ci switch (isolate_migratepages(cc)) { 249262306a36Sopenharmony_ci case ISOLATE_ABORT: 249362306a36Sopenharmony_ci ret = COMPACT_CONTENDED; 249462306a36Sopenharmony_ci putback_movable_pages(&cc->migratepages); 249562306a36Sopenharmony_ci cc->nr_migratepages = 0; 249662306a36Sopenharmony_ci goto out; 249762306a36Sopenharmony_ci case ISOLATE_NONE: 249862306a36Sopenharmony_ci if (update_cached) { 249962306a36Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[1] = 250062306a36Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[0]; 250162306a36Sopenharmony_ci } 250262306a36Sopenharmony_ci 250362306a36Sopenharmony_ci /* 250462306a36Sopenharmony_ci * We haven't isolated and migrated anything, but 250562306a36Sopenharmony_ci * there might still be unflushed migrations from 250662306a36Sopenharmony_ci * previous cc->order aligned block. 250762306a36Sopenharmony_ci */ 250862306a36Sopenharmony_ci goto check_drain; 250962306a36Sopenharmony_ci case ISOLATE_SUCCESS: 251062306a36Sopenharmony_ci update_cached = false; 251162306a36Sopenharmony_ci last_migrated_pfn = max(cc->zone->zone_start_pfn, 251262306a36Sopenharmony_ci pageblock_start_pfn(cc->migrate_pfn - 1)); 251362306a36Sopenharmony_ci } 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci err = migrate_pages(&cc->migratepages, compaction_alloc, 251662306a36Sopenharmony_ci compaction_free, (unsigned long)cc, cc->mode, 251762306a36Sopenharmony_ci MR_COMPACTION, &nr_succeeded); 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_ci trace_mm_compaction_migratepages(cc, nr_succeeded); 252062306a36Sopenharmony_ci 252162306a36Sopenharmony_ci /* All pages were either migrated or will be released */ 252262306a36Sopenharmony_ci cc->nr_migratepages = 0; 252362306a36Sopenharmony_ci if (err) { 252462306a36Sopenharmony_ci putback_movable_pages(&cc->migratepages); 252562306a36Sopenharmony_ci /* 252662306a36Sopenharmony_ci * migrate_pages() may return -ENOMEM when scanners meet 252762306a36Sopenharmony_ci * and we want compact_finished() to detect it 252862306a36Sopenharmony_ci */ 252962306a36Sopenharmony_ci if (err == -ENOMEM && !compact_scanners_met(cc)) { 253062306a36Sopenharmony_ci ret = COMPACT_CONTENDED; 253162306a36Sopenharmony_ci goto out; 253262306a36Sopenharmony_ci } 253362306a36Sopenharmony_ci /* 253462306a36Sopenharmony_ci * If an ASYNC or SYNC_LIGHT fails to migrate a page 253562306a36Sopenharmony_ci * within the pageblock_order-aligned block and 253662306a36Sopenharmony_ci * fast_find_migrateblock may be used then scan the 253762306a36Sopenharmony_ci * remainder of the pageblock. This will mark the 253862306a36Sopenharmony_ci * pageblock "skip" to avoid rescanning in the near 253962306a36Sopenharmony_ci * future. This will isolate more pages than necessary 254062306a36Sopenharmony_ci * for the request but avoid loops due to 254162306a36Sopenharmony_ci * fast_find_migrateblock revisiting blocks that were 254262306a36Sopenharmony_ci * recently partially scanned. 254362306a36Sopenharmony_ci */ 254462306a36Sopenharmony_ci if (!pageblock_aligned(cc->migrate_pfn) && 254562306a36Sopenharmony_ci !cc->ignore_skip_hint && !cc->finish_pageblock && 254662306a36Sopenharmony_ci (cc->mode < MIGRATE_SYNC)) { 254762306a36Sopenharmony_ci cc->finish_pageblock = true; 254862306a36Sopenharmony_ci 254962306a36Sopenharmony_ci /* 255062306a36Sopenharmony_ci * Draining pcplists does not help THP if 255162306a36Sopenharmony_ci * any page failed to migrate. Even after 255262306a36Sopenharmony_ci * drain, the pageblock will not be free. 255362306a36Sopenharmony_ci */ 255462306a36Sopenharmony_ci if (cc->order == COMPACTION_HPAGE_ORDER) 255562306a36Sopenharmony_ci last_migrated_pfn = 0; 255662306a36Sopenharmony_ci 255762306a36Sopenharmony_ci goto rescan; 255862306a36Sopenharmony_ci } 255962306a36Sopenharmony_ci } 256062306a36Sopenharmony_ci 256162306a36Sopenharmony_ci /* Stop if a page has been captured */ 256262306a36Sopenharmony_ci if (capc && capc->page) { 256362306a36Sopenharmony_ci ret = COMPACT_SUCCESS; 256462306a36Sopenharmony_ci break; 256562306a36Sopenharmony_ci } 256662306a36Sopenharmony_ci 256762306a36Sopenharmony_cicheck_drain: 256862306a36Sopenharmony_ci /* 256962306a36Sopenharmony_ci * Has the migration scanner moved away from the previous 257062306a36Sopenharmony_ci * cc->order aligned block where we migrated from? If yes, 257162306a36Sopenharmony_ci * flush the pages that were freed, so that they can merge and 257262306a36Sopenharmony_ci * compact_finished() can detect immediately if allocation 257362306a36Sopenharmony_ci * would succeed. 257462306a36Sopenharmony_ci */ 257562306a36Sopenharmony_ci if (cc->order > 0 && last_migrated_pfn) { 257662306a36Sopenharmony_ci unsigned long current_block_start = 257762306a36Sopenharmony_ci block_start_pfn(cc->migrate_pfn, cc->order); 257862306a36Sopenharmony_ci 257962306a36Sopenharmony_ci if (last_migrated_pfn < current_block_start) { 258062306a36Sopenharmony_ci lru_add_drain_cpu_zone(cc->zone); 258162306a36Sopenharmony_ci /* No more flushing until we migrate again */ 258262306a36Sopenharmony_ci last_migrated_pfn = 0; 258362306a36Sopenharmony_ci } 258462306a36Sopenharmony_ci } 258562306a36Sopenharmony_ci } 258662306a36Sopenharmony_ci 258762306a36Sopenharmony_ciout: 258862306a36Sopenharmony_ci /* 258962306a36Sopenharmony_ci * Release free pages and update where the free scanner should restart, 259062306a36Sopenharmony_ci * so we don't leave any returned pages behind in the next attempt. 259162306a36Sopenharmony_ci */ 259262306a36Sopenharmony_ci if (cc->nr_freepages > 0) { 259362306a36Sopenharmony_ci unsigned long free_pfn = release_freepages(&cc->freepages); 259462306a36Sopenharmony_ci 259562306a36Sopenharmony_ci cc->nr_freepages = 0; 259662306a36Sopenharmony_ci VM_BUG_ON(free_pfn == 0); 259762306a36Sopenharmony_ci /* The cached pfn is always the first in a pageblock */ 259862306a36Sopenharmony_ci free_pfn = pageblock_start_pfn(free_pfn); 259962306a36Sopenharmony_ci /* 260062306a36Sopenharmony_ci * Only go back, not forward. The cached pfn might have been 260162306a36Sopenharmony_ci * already reset to zone end in compact_finished() 260262306a36Sopenharmony_ci */ 260362306a36Sopenharmony_ci if (free_pfn > cc->zone->compact_cached_free_pfn) 260462306a36Sopenharmony_ci cc->zone->compact_cached_free_pfn = free_pfn; 260562306a36Sopenharmony_ci } 260662306a36Sopenharmony_ci 260762306a36Sopenharmony_ci count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned); 260862306a36Sopenharmony_ci count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned); 260962306a36Sopenharmony_ci 261062306a36Sopenharmony_ci trace_mm_compaction_end(cc, start_pfn, end_pfn, sync, ret); 261162306a36Sopenharmony_ci 261262306a36Sopenharmony_ci VM_BUG_ON(!list_empty(&cc->freepages)); 261362306a36Sopenharmony_ci VM_BUG_ON(!list_empty(&cc->migratepages)); 261462306a36Sopenharmony_ci 261562306a36Sopenharmony_ci return ret; 261662306a36Sopenharmony_ci} 261762306a36Sopenharmony_ci 261862306a36Sopenharmony_cistatic enum compact_result compact_zone_order(struct zone *zone, int order, 261962306a36Sopenharmony_ci gfp_t gfp_mask, enum compact_priority prio, 262062306a36Sopenharmony_ci unsigned int alloc_flags, int highest_zoneidx, 262162306a36Sopenharmony_ci struct page **capture) 262262306a36Sopenharmony_ci{ 262362306a36Sopenharmony_ci enum compact_result ret; 262462306a36Sopenharmony_ci struct compact_control cc = { 262562306a36Sopenharmony_ci .order = order, 262662306a36Sopenharmony_ci .search_order = order, 262762306a36Sopenharmony_ci .gfp_mask = gfp_mask, 262862306a36Sopenharmony_ci .zone = zone, 262962306a36Sopenharmony_ci .mode = (prio == COMPACT_PRIO_ASYNC) ? 263062306a36Sopenharmony_ci MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT, 263162306a36Sopenharmony_ci .alloc_flags = alloc_flags, 263262306a36Sopenharmony_ci .highest_zoneidx = highest_zoneidx, 263362306a36Sopenharmony_ci .direct_compaction = true, 263462306a36Sopenharmony_ci .whole_zone = (prio == MIN_COMPACT_PRIORITY), 263562306a36Sopenharmony_ci .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY), 263662306a36Sopenharmony_ci .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY) 263762306a36Sopenharmony_ci }; 263862306a36Sopenharmony_ci struct capture_control capc = { 263962306a36Sopenharmony_ci .cc = &cc, 264062306a36Sopenharmony_ci .page = NULL, 264162306a36Sopenharmony_ci }; 264262306a36Sopenharmony_ci 264362306a36Sopenharmony_ci /* 264462306a36Sopenharmony_ci * Make sure the structs are really initialized before we expose the 264562306a36Sopenharmony_ci * capture control, in case we are interrupted and the interrupt handler 264662306a36Sopenharmony_ci * frees a page. 264762306a36Sopenharmony_ci */ 264862306a36Sopenharmony_ci barrier(); 264962306a36Sopenharmony_ci WRITE_ONCE(current->capture_control, &capc); 265062306a36Sopenharmony_ci 265162306a36Sopenharmony_ci ret = compact_zone(&cc, &capc); 265262306a36Sopenharmony_ci 265362306a36Sopenharmony_ci /* 265462306a36Sopenharmony_ci * Make sure we hide capture control first before we read the captured 265562306a36Sopenharmony_ci * page pointer, otherwise an interrupt could free and capture a page 265662306a36Sopenharmony_ci * and we would leak it. 265762306a36Sopenharmony_ci */ 265862306a36Sopenharmony_ci WRITE_ONCE(current->capture_control, NULL); 265962306a36Sopenharmony_ci *capture = READ_ONCE(capc.page); 266062306a36Sopenharmony_ci /* 266162306a36Sopenharmony_ci * Technically, it is also possible that compaction is skipped but 266262306a36Sopenharmony_ci * the page is still captured out of luck(IRQ came and freed the page). 266362306a36Sopenharmony_ci * Returning COMPACT_SUCCESS in such cases helps in properly accounting 266462306a36Sopenharmony_ci * the COMPACT[STALL|FAIL] when compaction is skipped. 266562306a36Sopenharmony_ci */ 266662306a36Sopenharmony_ci if (*capture) 266762306a36Sopenharmony_ci ret = COMPACT_SUCCESS; 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci return ret; 267062306a36Sopenharmony_ci} 267162306a36Sopenharmony_ci 267262306a36Sopenharmony_ci/** 267362306a36Sopenharmony_ci * try_to_compact_pages - Direct compact to satisfy a high-order allocation 267462306a36Sopenharmony_ci * @gfp_mask: The GFP mask of the current allocation 267562306a36Sopenharmony_ci * @order: The order of the current allocation 267662306a36Sopenharmony_ci * @alloc_flags: The allocation flags of the current allocation 267762306a36Sopenharmony_ci * @ac: The context of current allocation 267862306a36Sopenharmony_ci * @prio: Determines how hard direct compaction should try to succeed 267962306a36Sopenharmony_ci * @capture: Pointer to free page created by compaction will be stored here 268062306a36Sopenharmony_ci * 268162306a36Sopenharmony_ci * This is the main entry point for direct page compaction. 268262306a36Sopenharmony_ci */ 268362306a36Sopenharmony_cienum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, 268462306a36Sopenharmony_ci unsigned int alloc_flags, const struct alloc_context *ac, 268562306a36Sopenharmony_ci enum compact_priority prio, struct page **capture) 268662306a36Sopenharmony_ci{ 268762306a36Sopenharmony_ci int may_perform_io = (__force int)(gfp_mask & __GFP_IO); 268862306a36Sopenharmony_ci struct zoneref *z; 268962306a36Sopenharmony_ci struct zone *zone; 269062306a36Sopenharmony_ci enum compact_result rc = COMPACT_SKIPPED; 269162306a36Sopenharmony_ci 269262306a36Sopenharmony_ci /* 269362306a36Sopenharmony_ci * Check if the GFP flags allow compaction - GFP_NOIO is really 269462306a36Sopenharmony_ci * tricky context because the migration might require IO 269562306a36Sopenharmony_ci */ 269662306a36Sopenharmony_ci if (!may_perform_io) 269762306a36Sopenharmony_ci return COMPACT_SKIPPED; 269862306a36Sopenharmony_ci 269962306a36Sopenharmony_ci trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); 270062306a36Sopenharmony_ci 270162306a36Sopenharmony_ci /* Compact each zone in the list */ 270262306a36Sopenharmony_ci for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, 270362306a36Sopenharmony_ci ac->highest_zoneidx, ac->nodemask) { 270462306a36Sopenharmony_ci enum compact_result status; 270562306a36Sopenharmony_ci 270662306a36Sopenharmony_ci if (prio > MIN_COMPACT_PRIORITY 270762306a36Sopenharmony_ci && compaction_deferred(zone, order)) { 270862306a36Sopenharmony_ci rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); 270962306a36Sopenharmony_ci continue; 271062306a36Sopenharmony_ci } 271162306a36Sopenharmony_ci 271262306a36Sopenharmony_ci status = compact_zone_order(zone, order, gfp_mask, prio, 271362306a36Sopenharmony_ci alloc_flags, ac->highest_zoneidx, capture); 271462306a36Sopenharmony_ci rc = max(status, rc); 271562306a36Sopenharmony_ci 271662306a36Sopenharmony_ci /* The allocation should succeed, stop compacting */ 271762306a36Sopenharmony_ci if (status == COMPACT_SUCCESS) { 271862306a36Sopenharmony_ci /* 271962306a36Sopenharmony_ci * We think the allocation will succeed in this zone, 272062306a36Sopenharmony_ci * but it is not certain, hence the false. The caller 272162306a36Sopenharmony_ci * will repeat this with true if allocation indeed 272262306a36Sopenharmony_ci * succeeds in this zone. 272362306a36Sopenharmony_ci */ 272462306a36Sopenharmony_ci compaction_defer_reset(zone, order, false); 272562306a36Sopenharmony_ci 272662306a36Sopenharmony_ci break; 272762306a36Sopenharmony_ci } 272862306a36Sopenharmony_ci 272962306a36Sopenharmony_ci if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE || 273062306a36Sopenharmony_ci status == COMPACT_PARTIAL_SKIPPED)) 273162306a36Sopenharmony_ci /* 273262306a36Sopenharmony_ci * We think that allocation won't succeed in this zone 273362306a36Sopenharmony_ci * so we defer compaction there. If it ends up 273462306a36Sopenharmony_ci * succeeding after all, it will be reset. 273562306a36Sopenharmony_ci */ 273662306a36Sopenharmony_ci defer_compaction(zone, order); 273762306a36Sopenharmony_ci 273862306a36Sopenharmony_ci /* 273962306a36Sopenharmony_ci * We might have stopped compacting due to need_resched() in 274062306a36Sopenharmony_ci * async compaction, or due to a fatal signal detected. In that 274162306a36Sopenharmony_ci * case do not try further zones 274262306a36Sopenharmony_ci */ 274362306a36Sopenharmony_ci if ((prio == COMPACT_PRIO_ASYNC && need_resched()) 274462306a36Sopenharmony_ci || fatal_signal_pending(current)) 274562306a36Sopenharmony_ci break; 274662306a36Sopenharmony_ci } 274762306a36Sopenharmony_ci 274862306a36Sopenharmony_ci return rc; 274962306a36Sopenharmony_ci} 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci/* 275262306a36Sopenharmony_ci * Compact all zones within a node till each zone's fragmentation score 275362306a36Sopenharmony_ci * reaches within proactive compaction thresholds (as determined by the 275462306a36Sopenharmony_ci * proactiveness tunable). 275562306a36Sopenharmony_ci * 275662306a36Sopenharmony_ci * It is possible that the function returns before reaching score targets 275762306a36Sopenharmony_ci * due to various back-off conditions, such as, contention on per-node or 275862306a36Sopenharmony_ci * per-zone locks. 275962306a36Sopenharmony_ci */ 276062306a36Sopenharmony_cistatic void proactive_compact_node(pg_data_t *pgdat) 276162306a36Sopenharmony_ci{ 276262306a36Sopenharmony_ci int zoneid; 276362306a36Sopenharmony_ci struct zone *zone; 276462306a36Sopenharmony_ci struct compact_control cc = { 276562306a36Sopenharmony_ci .order = -1, 276662306a36Sopenharmony_ci .mode = MIGRATE_SYNC_LIGHT, 276762306a36Sopenharmony_ci .ignore_skip_hint = true, 276862306a36Sopenharmony_ci .whole_zone = true, 276962306a36Sopenharmony_ci .gfp_mask = GFP_KERNEL, 277062306a36Sopenharmony_ci .proactive_compaction = true, 277162306a36Sopenharmony_ci }; 277262306a36Sopenharmony_ci 277362306a36Sopenharmony_ci for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 277462306a36Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 277562306a36Sopenharmony_ci if (!populated_zone(zone)) 277662306a36Sopenharmony_ci continue; 277762306a36Sopenharmony_ci 277862306a36Sopenharmony_ci cc.zone = zone; 277962306a36Sopenharmony_ci 278062306a36Sopenharmony_ci compact_zone(&cc, NULL); 278162306a36Sopenharmony_ci 278262306a36Sopenharmony_ci count_compact_events(KCOMPACTD_MIGRATE_SCANNED, 278362306a36Sopenharmony_ci cc.total_migrate_scanned); 278462306a36Sopenharmony_ci count_compact_events(KCOMPACTD_FREE_SCANNED, 278562306a36Sopenharmony_ci cc.total_free_scanned); 278662306a36Sopenharmony_ci } 278762306a36Sopenharmony_ci} 278862306a36Sopenharmony_ci 278962306a36Sopenharmony_ci/* Compact all zones within a node */ 279062306a36Sopenharmony_cistatic void compact_node(int nid) 279162306a36Sopenharmony_ci{ 279262306a36Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 279362306a36Sopenharmony_ci int zoneid; 279462306a36Sopenharmony_ci struct zone *zone; 279562306a36Sopenharmony_ci struct compact_control cc = { 279662306a36Sopenharmony_ci .order = -1, 279762306a36Sopenharmony_ci .mode = MIGRATE_SYNC, 279862306a36Sopenharmony_ci .ignore_skip_hint = true, 279962306a36Sopenharmony_ci .whole_zone = true, 280062306a36Sopenharmony_ci .gfp_mask = GFP_KERNEL, 280162306a36Sopenharmony_ci }; 280262306a36Sopenharmony_ci 280362306a36Sopenharmony_ci 280462306a36Sopenharmony_ci for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 280562306a36Sopenharmony_ci 280662306a36Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 280762306a36Sopenharmony_ci if (!populated_zone(zone)) 280862306a36Sopenharmony_ci continue; 280962306a36Sopenharmony_ci 281062306a36Sopenharmony_ci cc.zone = zone; 281162306a36Sopenharmony_ci 281262306a36Sopenharmony_ci compact_zone(&cc, NULL); 281362306a36Sopenharmony_ci } 281462306a36Sopenharmony_ci} 281562306a36Sopenharmony_ci 281662306a36Sopenharmony_ci/* Compact all nodes in the system */ 281762306a36Sopenharmony_cistatic void compact_nodes(void) 281862306a36Sopenharmony_ci{ 281962306a36Sopenharmony_ci int nid; 282062306a36Sopenharmony_ci 282162306a36Sopenharmony_ci /* Flush pending updates to the LRU lists */ 282262306a36Sopenharmony_ci lru_add_drain_all(); 282362306a36Sopenharmony_ci 282462306a36Sopenharmony_ci for_each_online_node(nid) 282562306a36Sopenharmony_ci compact_node(nid); 282662306a36Sopenharmony_ci} 282762306a36Sopenharmony_ci 282862306a36Sopenharmony_cistatic int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write, 282962306a36Sopenharmony_ci void *buffer, size_t *length, loff_t *ppos) 283062306a36Sopenharmony_ci{ 283162306a36Sopenharmony_ci int rc, nid; 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_ci rc = proc_dointvec_minmax(table, write, buffer, length, ppos); 283462306a36Sopenharmony_ci if (rc) 283562306a36Sopenharmony_ci return rc; 283662306a36Sopenharmony_ci 283762306a36Sopenharmony_ci if (write && sysctl_compaction_proactiveness) { 283862306a36Sopenharmony_ci for_each_online_node(nid) { 283962306a36Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 284062306a36Sopenharmony_ci 284162306a36Sopenharmony_ci if (pgdat->proactive_compact_trigger) 284262306a36Sopenharmony_ci continue; 284362306a36Sopenharmony_ci 284462306a36Sopenharmony_ci pgdat->proactive_compact_trigger = true; 284562306a36Sopenharmony_ci trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, -1, 284662306a36Sopenharmony_ci pgdat->nr_zones - 1); 284762306a36Sopenharmony_ci wake_up_interruptible(&pgdat->kcompactd_wait); 284862306a36Sopenharmony_ci } 284962306a36Sopenharmony_ci } 285062306a36Sopenharmony_ci 285162306a36Sopenharmony_ci return 0; 285262306a36Sopenharmony_ci} 285362306a36Sopenharmony_ci 285462306a36Sopenharmony_ci/* 285562306a36Sopenharmony_ci * This is the entry point for compacting all nodes via 285662306a36Sopenharmony_ci * /proc/sys/vm/compact_memory 285762306a36Sopenharmony_ci */ 285862306a36Sopenharmony_cistatic int sysctl_compaction_handler(struct ctl_table *table, int write, 285962306a36Sopenharmony_ci void *buffer, size_t *length, loff_t *ppos) 286062306a36Sopenharmony_ci{ 286162306a36Sopenharmony_ci int ret; 286262306a36Sopenharmony_ci 286362306a36Sopenharmony_ci ret = proc_dointvec(table, write, buffer, length, ppos); 286462306a36Sopenharmony_ci if (ret) 286562306a36Sopenharmony_ci return ret; 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_ci if (sysctl_compact_memory != 1) 286862306a36Sopenharmony_ci return -EINVAL; 286962306a36Sopenharmony_ci 287062306a36Sopenharmony_ci if (write) 287162306a36Sopenharmony_ci compact_nodes(); 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_ci return 0; 287462306a36Sopenharmony_ci} 287562306a36Sopenharmony_ci 287662306a36Sopenharmony_ci#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) 287762306a36Sopenharmony_cistatic ssize_t compact_store(struct device *dev, 287862306a36Sopenharmony_ci struct device_attribute *attr, 287962306a36Sopenharmony_ci const char *buf, size_t count) 288062306a36Sopenharmony_ci{ 288162306a36Sopenharmony_ci int nid = dev->id; 288262306a36Sopenharmony_ci 288362306a36Sopenharmony_ci if (nid >= 0 && nid < nr_node_ids && node_online(nid)) { 288462306a36Sopenharmony_ci /* Flush pending updates to the LRU lists */ 288562306a36Sopenharmony_ci lru_add_drain_all(); 288662306a36Sopenharmony_ci 288762306a36Sopenharmony_ci compact_node(nid); 288862306a36Sopenharmony_ci } 288962306a36Sopenharmony_ci 289062306a36Sopenharmony_ci return count; 289162306a36Sopenharmony_ci} 289262306a36Sopenharmony_cistatic DEVICE_ATTR_WO(compact); 289362306a36Sopenharmony_ci 289462306a36Sopenharmony_ciint compaction_register_node(struct node *node) 289562306a36Sopenharmony_ci{ 289662306a36Sopenharmony_ci return device_create_file(&node->dev, &dev_attr_compact); 289762306a36Sopenharmony_ci} 289862306a36Sopenharmony_ci 289962306a36Sopenharmony_civoid compaction_unregister_node(struct node *node) 290062306a36Sopenharmony_ci{ 290162306a36Sopenharmony_ci device_remove_file(&node->dev, &dev_attr_compact); 290262306a36Sopenharmony_ci} 290362306a36Sopenharmony_ci#endif /* CONFIG_SYSFS && CONFIG_NUMA */ 290462306a36Sopenharmony_ci 290562306a36Sopenharmony_cistatic inline bool kcompactd_work_requested(pg_data_t *pgdat) 290662306a36Sopenharmony_ci{ 290762306a36Sopenharmony_ci return pgdat->kcompactd_max_order > 0 || kthread_should_stop() || 290862306a36Sopenharmony_ci pgdat->proactive_compact_trigger; 290962306a36Sopenharmony_ci} 291062306a36Sopenharmony_ci 291162306a36Sopenharmony_cistatic bool kcompactd_node_suitable(pg_data_t *pgdat) 291262306a36Sopenharmony_ci{ 291362306a36Sopenharmony_ci int zoneid; 291462306a36Sopenharmony_ci struct zone *zone; 291562306a36Sopenharmony_ci enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx; 291662306a36Sopenharmony_ci 291762306a36Sopenharmony_ci for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) { 291862306a36Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 291962306a36Sopenharmony_ci 292062306a36Sopenharmony_ci if (!populated_zone(zone)) 292162306a36Sopenharmony_ci continue; 292262306a36Sopenharmony_ci 292362306a36Sopenharmony_ci /* Allocation can already succeed, check other zones */ 292462306a36Sopenharmony_ci if (zone_watermark_ok(zone, pgdat->kcompactd_max_order, 292562306a36Sopenharmony_ci min_wmark_pages(zone), 292662306a36Sopenharmony_ci highest_zoneidx, 0)) 292762306a36Sopenharmony_ci continue; 292862306a36Sopenharmony_ci 292962306a36Sopenharmony_ci if (compaction_suitable(zone, pgdat->kcompactd_max_order, 293062306a36Sopenharmony_ci highest_zoneidx)) 293162306a36Sopenharmony_ci return true; 293262306a36Sopenharmony_ci } 293362306a36Sopenharmony_ci 293462306a36Sopenharmony_ci return false; 293562306a36Sopenharmony_ci} 293662306a36Sopenharmony_ci 293762306a36Sopenharmony_cistatic void kcompactd_do_work(pg_data_t *pgdat) 293862306a36Sopenharmony_ci{ 293962306a36Sopenharmony_ci /* 294062306a36Sopenharmony_ci * With no special task, compact all zones so that a page of requested 294162306a36Sopenharmony_ci * order is allocatable. 294262306a36Sopenharmony_ci */ 294362306a36Sopenharmony_ci int zoneid; 294462306a36Sopenharmony_ci struct zone *zone; 294562306a36Sopenharmony_ci struct compact_control cc = { 294662306a36Sopenharmony_ci .order = pgdat->kcompactd_max_order, 294762306a36Sopenharmony_ci .search_order = pgdat->kcompactd_max_order, 294862306a36Sopenharmony_ci .highest_zoneidx = pgdat->kcompactd_highest_zoneidx, 294962306a36Sopenharmony_ci .mode = MIGRATE_SYNC_LIGHT, 295062306a36Sopenharmony_ci .ignore_skip_hint = false, 295162306a36Sopenharmony_ci .gfp_mask = GFP_KERNEL, 295262306a36Sopenharmony_ci }; 295362306a36Sopenharmony_ci trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, 295462306a36Sopenharmony_ci cc.highest_zoneidx); 295562306a36Sopenharmony_ci count_compact_event(KCOMPACTD_WAKE); 295662306a36Sopenharmony_ci 295762306a36Sopenharmony_ci for (zoneid = 0; zoneid <= cc.highest_zoneidx; zoneid++) { 295862306a36Sopenharmony_ci int status; 295962306a36Sopenharmony_ci 296062306a36Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 296162306a36Sopenharmony_ci if (!populated_zone(zone)) 296262306a36Sopenharmony_ci continue; 296362306a36Sopenharmony_ci 296462306a36Sopenharmony_ci if (compaction_deferred(zone, cc.order)) 296562306a36Sopenharmony_ci continue; 296662306a36Sopenharmony_ci 296762306a36Sopenharmony_ci /* Allocation can already succeed, nothing to do */ 296862306a36Sopenharmony_ci if (zone_watermark_ok(zone, cc.order, 296962306a36Sopenharmony_ci min_wmark_pages(zone), zoneid, 0)) 297062306a36Sopenharmony_ci continue; 297162306a36Sopenharmony_ci 297262306a36Sopenharmony_ci if (!compaction_suitable(zone, cc.order, zoneid)) 297362306a36Sopenharmony_ci continue; 297462306a36Sopenharmony_ci 297562306a36Sopenharmony_ci if (kthread_should_stop()) 297662306a36Sopenharmony_ci return; 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_ci cc.zone = zone; 297962306a36Sopenharmony_ci status = compact_zone(&cc, NULL); 298062306a36Sopenharmony_ci 298162306a36Sopenharmony_ci if (status == COMPACT_SUCCESS) { 298262306a36Sopenharmony_ci compaction_defer_reset(zone, cc.order, false); 298362306a36Sopenharmony_ci } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) { 298462306a36Sopenharmony_ci /* 298562306a36Sopenharmony_ci * Buddy pages may become stranded on pcps that could 298662306a36Sopenharmony_ci * otherwise coalesce on the zone's free area for 298762306a36Sopenharmony_ci * order >= cc.order. This is ratelimited by the 298862306a36Sopenharmony_ci * upcoming deferral. 298962306a36Sopenharmony_ci */ 299062306a36Sopenharmony_ci drain_all_pages(zone); 299162306a36Sopenharmony_ci 299262306a36Sopenharmony_ci /* 299362306a36Sopenharmony_ci * We use sync migration mode here, so we defer like 299462306a36Sopenharmony_ci * sync direct compaction does. 299562306a36Sopenharmony_ci */ 299662306a36Sopenharmony_ci defer_compaction(zone, cc.order); 299762306a36Sopenharmony_ci } 299862306a36Sopenharmony_ci 299962306a36Sopenharmony_ci count_compact_events(KCOMPACTD_MIGRATE_SCANNED, 300062306a36Sopenharmony_ci cc.total_migrate_scanned); 300162306a36Sopenharmony_ci count_compact_events(KCOMPACTD_FREE_SCANNED, 300262306a36Sopenharmony_ci cc.total_free_scanned); 300362306a36Sopenharmony_ci } 300462306a36Sopenharmony_ci 300562306a36Sopenharmony_ci /* 300662306a36Sopenharmony_ci * Regardless of success, we are done until woken up next. But remember 300762306a36Sopenharmony_ci * the requested order/highest_zoneidx in case it was higher/tighter 300862306a36Sopenharmony_ci * than our current ones 300962306a36Sopenharmony_ci */ 301062306a36Sopenharmony_ci if (pgdat->kcompactd_max_order <= cc.order) 301162306a36Sopenharmony_ci pgdat->kcompactd_max_order = 0; 301262306a36Sopenharmony_ci if (pgdat->kcompactd_highest_zoneidx >= cc.highest_zoneidx) 301362306a36Sopenharmony_ci pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1; 301462306a36Sopenharmony_ci} 301562306a36Sopenharmony_ci 301662306a36Sopenharmony_civoid wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx) 301762306a36Sopenharmony_ci{ 301862306a36Sopenharmony_ci if (!order) 301962306a36Sopenharmony_ci return; 302062306a36Sopenharmony_ci 302162306a36Sopenharmony_ci if (pgdat->kcompactd_max_order < order) 302262306a36Sopenharmony_ci pgdat->kcompactd_max_order = order; 302362306a36Sopenharmony_ci 302462306a36Sopenharmony_ci if (pgdat->kcompactd_highest_zoneidx > highest_zoneidx) 302562306a36Sopenharmony_ci pgdat->kcompactd_highest_zoneidx = highest_zoneidx; 302662306a36Sopenharmony_ci 302762306a36Sopenharmony_ci /* 302862306a36Sopenharmony_ci * Pairs with implicit barrier in wait_event_freezable() 302962306a36Sopenharmony_ci * such that wakeups are not missed. 303062306a36Sopenharmony_ci */ 303162306a36Sopenharmony_ci if (!wq_has_sleeper(&pgdat->kcompactd_wait)) 303262306a36Sopenharmony_ci return; 303362306a36Sopenharmony_ci 303462306a36Sopenharmony_ci if (!kcompactd_node_suitable(pgdat)) 303562306a36Sopenharmony_ci return; 303662306a36Sopenharmony_ci 303762306a36Sopenharmony_ci trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order, 303862306a36Sopenharmony_ci highest_zoneidx); 303962306a36Sopenharmony_ci wake_up_interruptible(&pgdat->kcompactd_wait); 304062306a36Sopenharmony_ci} 304162306a36Sopenharmony_ci 304262306a36Sopenharmony_ci/* 304362306a36Sopenharmony_ci * The background compaction daemon, started as a kernel thread 304462306a36Sopenharmony_ci * from the init process. 304562306a36Sopenharmony_ci */ 304662306a36Sopenharmony_cistatic int kcompactd(void *p) 304762306a36Sopenharmony_ci{ 304862306a36Sopenharmony_ci pg_data_t *pgdat = (pg_data_t *)p; 304962306a36Sopenharmony_ci struct task_struct *tsk = current; 305062306a36Sopenharmony_ci long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC); 305162306a36Sopenharmony_ci long timeout = default_timeout; 305262306a36Sopenharmony_ci 305362306a36Sopenharmony_ci const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 305462306a36Sopenharmony_ci 305562306a36Sopenharmony_ci if (!cpumask_empty(cpumask)) 305662306a36Sopenharmony_ci set_cpus_allowed_ptr(tsk, cpumask); 305762306a36Sopenharmony_ci 305862306a36Sopenharmony_ci set_freezable(); 305962306a36Sopenharmony_ci 306062306a36Sopenharmony_ci pgdat->kcompactd_max_order = 0; 306162306a36Sopenharmony_ci pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1; 306262306a36Sopenharmony_ci 306362306a36Sopenharmony_ci while (!kthread_should_stop()) { 306462306a36Sopenharmony_ci unsigned long pflags; 306562306a36Sopenharmony_ci 306662306a36Sopenharmony_ci /* 306762306a36Sopenharmony_ci * Avoid the unnecessary wakeup for proactive compaction 306862306a36Sopenharmony_ci * when it is disabled. 306962306a36Sopenharmony_ci */ 307062306a36Sopenharmony_ci if (!sysctl_compaction_proactiveness) 307162306a36Sopenharmony_ci timeout = MAX_SCHEDULE_TIMEOUT; 307262306a36Sopenharmony_ci trace_mm_compaction_kcompactd_sleep(pgdat->node_id); 307362306a36Sopenharmony_ci if (wait_event_freezable_timeout(pgdat->kcompactd_wait, 307462306a36Sopenharmony_ci kcompactd_work_requested(pgdat), timeout) && 307562306a36Sopenharmony_ci !pgdat->proactive_compact_trigger) { 307662306a36Sopenharmony_ci 307762306a36Sopenharmony_ci psi_memstall_enter(&pflags); 307862306a36Sopenharmony_ci kcompactd_do_work(pgdat); 307962306a36Sopenharmony_ci psi_memstall_leave(&pflags); 308062306a36Sopenharmony_ci /* 308162306a36Sopenharmony_ci * Reset the timeout value. The defer timeout from 308262306a36Sopenharmony_ci * proactive compaction is lost here but that is fine 308362306a36Sopenharmony_ci * as the condition of the zone changing substantionally 308462306a36Sopenharmony_ci * then carrying on with the previous defer interval is 308562306a36Sopenharmony_ci * not useful. 308662306a36Sopenharmony_ci */ 308762306a36Sopenharmony_ci timeout = default_timeout; 308862306a36Sopenharmony_ci continue; 308962306a36Sopenharmony_ci } 309062306a36Sopenharmony_ci 309162306a36Sopenharmony_ci /* 309262306a36Sopenharmony_ci * Start the proactive work with default timeout. Based 309362306a36Sopenharmony_ci * on the fragmentation score, this timeout is updated. 309462306a36Sopenharmony_ci */ 309562306a36Sopenharmony_ci timeout = default_timeout; 309662306a36Sopenharmony_ci if (should_proactive_compact_node(pgdat)) { 309762306a36Sopenharmony_ci unsigned int prev_score, score; 309862306a36Sopenharmony_ci 309962306a36Sopenharmony_ci prev_score = fragmentation_score_node(pgdat); 310062306a36Sopenharmony_ci proactive_compact_node(pgdat); 310162306a36Sopenharmony_ci score = fragmentation_score_node(pgdat); 310262306a36Sopenharmony_ci /* 310362306a36Sopenharmony_ci * Defer proactive compaction if the fragmentation 310462306a36Sopenharmony_ci * score did not go down i.e. no progress made. 310562306a36Sopenharmony_ci */ 310662306a36Sopenharmony_ci if (unlikely(score >= prev_score)) 310762306a36Sopenharmony_ci timeout = 310862306a36Sopenharmony_ci default_timeout << COMPACT_MAX_DEFER_SHIFT; 310962306a36Sopenharmony_ci } 311062306a36Sopenharmony_ci if (unlikely(pgdat->proactive_compact_trigger)) 311162306a36Sopenharmony_ci pgdat->proactive_compact_trigger = false; 311262306a36Sopenharmony_ci } 311362306a36Sopenharmony_ci 311462306a36Sopenharmony_ci return 0; 311562306a36Sopenharmony_ci} 311662306a36Sopenharmony_ci 311762306a36Sopenharmony_ci/* 311862306a36Sopenharmony_ci * This kcompactd start function will be called by init and node-hot-add. 311962306a36Sopenharmony_ci * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added. 312062306a36Sopenharmony_ci */ 312162306a36Sopenharmony_civoid __meminit kcompactd_run(int nid) 312262306a36Sopenharmony_ci{ 312362306a36Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 312462306a36Sopenharmony_ci 312562306a36Sopenharmony_ci if (pgdat->kcompactd) 312662306a36Sopenharmony_ci return; 312762306a36Sopenharmony_ci 312862306a36Sopenharmony_ci pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid); 312962306a36Sopenharmony_ci if (IS_ERR(pgdat->kcompactd)) { 313062306a36Sopenharmony_ci pr_err("Failed to start kcompactd on node %d\n", nid); 313162306a36Sopenharmony_ci pgdat->kcompactd = NULL; 313262306a36Sopenharmony_ci } 313362306a36Sopenharmony_ci} 313462306a36Sopenharmony_ci 313562306a36Sopenharmony_ci/* 313662306a36Sopenharmony_ci * Called by memory hotplug when all memory in a node is offlined. Caller must 313762306a36Sopenharmony_ci * be holding mem_hotplug_begin/done(). 313862306a36Sopenharmony_ci */ 313962306a36Sopenharmony_civoid __meminit kcompactd_stop(int nid) 314062306a36Sopenharmony_ci{ 314162306a36Sopenharmony_ci struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd; 314262306a36Sopenharmony_ci 314362306a36Sopenharmony_ci if (kcompactd) { 314462306a36Sopenharmony_ci kthread_stop(kcompactd); 314562306a36Sopenharmony_ci NODE_DATA(nid)->kcompactd = NULL; 314662306a36Sopenharmony_ci } 314762306a36Sopenharmony_ci} 314862306a36Sopenharmony_ci 314962306a36Sopenharmony_ci/* 315062306a36Sopenharmony_ci * It's optimal to keep kcompactd on the same CPUs as their memory, but 315162306a36Sopenharmony_ci * not required for correctness. So if the last cpu in a node goes 315262306a36Sopenharmony_ci * away, we get changed to run anywhere: as the first one comes back, 315362306a36Sopenharmony_ci * restore their cpu bindings. 315462306a36Sopenharmony_ci */ 315562306a36Sopenharmony_cistatic int kcompactd_cpu_online(unsigned int cpu) 315662306a36Sopenharmony_ci{ 315762306a36Sopenharmony_ci int nid; 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) { 316062306a36Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 316162306a36Sopenharmony_ci const struct cpumask *mask; 316262306a36Sopenharmony_ci 316362306a36Sopenharmony_ci mask = cpumask_of_node(pgdat->node_id); 316462306a36Sopenharmony_ci 316562306a36Sopenharmony_ci if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids) 316662306a36Sopenharmony_ci /* One of our CPUs online: restore mask */ 316762306a36Sopenharmony_ci if (pgdat->kcompactd) 316862306a36Sopenharmony_ci set_cpus_allowed_ptr(pgdat->kcompactd, mask); 316962306a36Sopenharmony_ci } 317062306a36Sopenharmony_ci return 0; 317162306a36Sopenharmony_ci} 317262306a36Sopenharmony_ci 317362306a36Sopenharmony_cistatic int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, 317462306a36Sopenharmony_ci int write, void *buffer, size_t *lenp, loff_t *ppos) 317562306a36Sopenharmony_ci{ 317662306a36Sopenharmony_ci int ret, old; 317762306a36Sopenharmony_ci 317862306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write) 317962306a36Sopenharmony_ci return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 318062306a36Sopenharmony_ci 318162306a36Sopenharmony_ci old = *(int *)table->data; 318262306a36Sopenharmony_ci ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 318362306a36Sopenharmony_ci if (ret) 318462306a36Sopenharmony_ci return ret; 318562306a36Sopenharmony_ci if (old != *(int *)table->data) 318662306a36Sopenharmony_ci pr_warn_once("sysctl attribute %s changed by %s[%d]\n", 318762306a36Sopenharmony_ci table->procname, current->comm, 318862306a36Sopenharmony_ci task_pid_nr(current)); 318962306a36Sopenharmony_ci return ret; 319062306a36Sopenharmony_ci} 319162306a36Sopenharmony_ci 319262306a36Sopenharmony_cistatic struct ctl_table vm_compaction[] = { 319362306a36Sopenharmony_ci { 319462306a36Sopenharmony_ci .procname = "compact_memory", 319562306a36Sopenharmony_ci .data = &sysctl_compact_memory, 319662306a36Sopenharmony_ci .maxlen = sizeof(int), 319762306a36Sopenharmony_ci .mode = 0200, 319862306a36Sopenharmony_ci .proc_handler = sysctl_compaction_handler, 319962306a36Sopenharmony_ci }, 320062306a36Sopenharmony_ci { 320162306a36Sopenharmony_ci .procname = "compaction_proactiveness", 320262306a36Sopenharmony_ci .data = &sysctl_compaction_proactiveness, 320362306a36Sopenharmony_ci .maxlen = sizeof(sysctl_compaction_proactiveness), 320462306a36Sopenharmony_ci .mode = 0644, 320562306a36Sopenharmony_ci .proc_handler = compaction_proactiveness_sysctl_handler, 320662306a36Sopenharmony_ci .extra1 = SYSCTL_ZERO, 320762306a36Sopenharmony_ci .extra2 = SYSCTL_ONE_HUNDRED, 320862306a36Sopenharmony_ci }, 320962306a36Sopenharmony_ci { 321062306a36Sopenharmony_ci .procname = "extfrag_threshold", 321162306a36Sopenharmony_ci .data = &sysctl_extfrag_threshold, 321262306a36Sopenharmony_ci .maxlen = sizeof(int), 321362306a36Sopenharmony_ci .mode = 0644, 321462306a36Sopenharmony_ci .proc_handler = proc_dointvec_minmax, 321562306a36Sopenharmony_ci .extra1 = SYSCTL_ZERO, 321662306a36Sopenharmony_ci .extra2 = SYSCTL_ONE_THOUSAND, 321762306a36Sopenharmony_ci }, 321862306a36Sopenharmony_ci { 321962306a36Sopenharmony_ci .procname = "compact_unevictable_allowed", 322062306a36Sopenharmony_ci .data = &sysctl_compact_unevictable_allowed, 322162306a36Sopenharmony_ci .maxlen = sizeof(int), 322262306a36Sopenharmony_ci .mode = 0644, 322362306a36Sopenharmony_ci .proc_handler = proc_dointvec_minmax_warn_RT_change, 322462306a36Sopenharmony_ci .extra1 = SYSCTL_ZERO, 322562306a36Sopenharmony_ci .extra2 = SYSCTL_ONE, 322662306a36Sopenharmony_ci }, 322762306a36Sopenharmony_ci { } 322862306a36Sopenharmony_ci}; 322962306a36Sopenharmony_ci 323062306a36Sopenharmony_cistatic int __init kcompactd_init(void) 323162306a36Sopenharmony_ci{ 323262306a36Sopenharmony_ci int nid; 323362306a36Sopenharmony_ci int ret; 323462306a36Sopenharmony_ci 323562306a36Sopenharmony_ci ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, 323662306a36Sopenharmony_ci "mm/compaction:online", 323762306a36Sopenharmony_ci kcompactd_cpu_online, NULL); 323862306a36Sopenharmony_ci if (ret < 0) { 323962306a36Sopenharmony_ci pr_err("kcompactd: failed to register hotplug callbacks.\n"); 324062306a36Sopenharmony_ci return ret; 324162306a36Sopenharmony_ci } 324262306a36Sopenharmony_ci 324362306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) 324462306a36Sopenharmony_ci kcompactd_run(nid); 324562306a36Sopenharmony_ci register_sysctl_init("vm", vm_compaction); 324662306a36Sopenharmony_ci return 0; 324762306a36Sopenharmony_ci} 324862306a36Sopenharmony_cisubsys_initcall(kcompactd_init) 324962306a36Sopenharmony_ci 325062306a36Sopenharmony_ci#endif /* CONFIG_COMPACTION */ 3251