18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/mm/compaction.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Memory compaction for the reduction of external fragmentation. Note that 68c2ecf20Sopenharmony_ci * this heavily depends upon page migration to do all the real heavy 78c2ecf20Sopenharmony_ci * lifting 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie> 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci#include <linux/cpu.h> 128c2ecf20Sopenharmony_ci#include <linux/swap.h> 138c2ecf20Sopenharmony_ci#include <linux/migrate.h> 148c2ecf20Sopenharmony_ci#include <linux/compaction.h> 158c2ecf20Sopenharmony_ci#include <linux/mm_inline.h> 168c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 178c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 188c2ecf20Sopenharmony_ci#include <linux/sysctl.h> 198c2ecf20Sopenharmony_ci#include <linux/sysfs.h> 208c2ecf20Sopenharmony_ci#include <linux/page-isolation.h> 218c2ecf20Sopenharmony_ci#include <linux/kasan.h> 228c2ecf20Sopenharmony_ci#include <linux/kthread.h> 238c2ecf20Sopenharmony_ci#include <linux/freezer.h> 248c2ecf20Sopenharmony_ci#include <linux/page_owner.h> 258c2ecf20Sopenharmony_ci#include <linux/psi.h> 268c2ecf20Sopenharmony_ci#include "internal.h" 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPACTION 298c2ecf20Sopenharmony_cistatic inline void count_compact_event(enum vm_event_item item) 308c2ecf20Sopenharmony_ci{ 318c2ecf20Sopenharmony_ci count_vm_event(item); 328c2ecf20Sopenharmony_ci} 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_cistatic inline void count_compact_events(enum vm_event_item item, long delta) 358c2ecf20Sopenharmony_ci{ 368c2ecf20Sopenharmony_ci count_vm_events(item, delta); 378c2ecf20Sopenharmony_ci} 388c2ecf20Sopenharmony_ci#else 398c2ecf20Sopenharmony_ci#define count_compact_event(item) do { } while (0) 408c2ecf20Sopenharmony_ci#define count_compact_events(item, delta) do { } while (0) 418c2ecf20Sopenharmony_ci#endif 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#if defined CONFIG_COMPACTION || defined CONFIG_CMA 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS 468c2ecf20Sopenharmony_ci#include <trace/events/compaction.h> 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci#define block_start_pfn(pfn, order) round_down(pfn, 1UL << (order)) 498c2ecf20Sopenharmony_ci#define block_end_pfn(pfn, order) ALIGN((pfn) + 1, 1UL << (order)) 508c2ecf20Sopenharmony_ci#define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order) 518c2ecf20Sopenharmony_ci#define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order) 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci/* 548c2ecf20Sopenharmony_ci * Fragmentation score check interval for proactive compaction purposes. 558c2ecf20Sopenharmony_ci */ 568c2ecf20Sopenharmony_cistatic const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci/* 598c2ecf20Sopenharmony_ci * Page order with-respect-to which proactive compaction 608c2ecf20Sopenharmony_ci * calculates external fragmentation, which is used as 618c2ecf20Sopenharmony_ci * the "fragmentation score" of a node/zone. 628c2ecf20Sopenharmony_ci */ 638c2ecf20Sopenharmony_ci#if defined CONFIG_TRANSPARENT_HUGEPAGE 648c2ecf20Sopenharmony_ci#define COMPACTION_HPAGE_ORDER HPAGE_PMD_ORDER 658c2ecf20Sopenharmony_ci#elif defined CONFIG_HUGETLBFS 668c2ecf20Sopenharmony_ci#define COMPACTION_HPAGE_ORDER HUGETLB_PAGE_ORDER 678c2ecf20Sopenharmony_ci#else 688c2ecf20Sopenharmony_ci#define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT) 698c2ecf20Sopenharmony_ci#endif 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_cistatic unsigned long release_freepages(struct list_head *freelist) 728c2ecf20Sopenharmony_ci{ 738c2ecf20Sopenharmony_ci struct page *page, *next; 748c2ecf20Sopenharmony_ci unsigned long high_pfn = 0; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci list_for_each_entry_safe(page, next, freelist, lru) { 778c2ecf20Sopenharmony_ci unsigned long pfn = page_to_pfn(page); 788c2ecf20Sopenharmony_ci list_del(&page->lru); 798c2ecf20Sopenharmony_ci __free_page(page); 808c2ecf20Sopenharmony_ci if (pfn > high_pfn) 818c2ecf20Sopenharmony_ci high_pfn = pfn; 828c2ecf20Sopenharmony_ci } 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci return high_pfn; 858c2ecf20Sopenharmony_ci} 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_cistatic void split_map_pages(struct list_head *list) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci unsigned int i, order, nr_pages; 908c2ecf20Sopenharmony_ci struct page *page, *next; 918c2ecf20Sopenharmony_ci LIST_HEAD(tmp_list); 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci list_for_each_entry_safe(page, next, list, lru) { 948c2ecf20Sopenharmony_ci list_del(&page->lru); 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci order = page_private(page); 978c2ecf20Sopenharmony_ci nr_pages = 1 << order; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci post_alloc_hook(page, order, __GFP_MOVABLE); 1008c2ecf20Sopenharmony_ci if (order) 1018c2ecf20Sopenharmony_ci split_page(page, order); 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 1048c2ecf20Sopenharmony_ci list_add(&page->lru, &tmp_list); 1058c2ecf20Sopenharmony_ci page++; 1068c2ecf20Sopenharmony_ci } 1078c2ecf20Sopenharmony_ci } 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci list_splice(&tmp_list, list); 1108c2ecf20Sopenharmony_ci} 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPACTION 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ciint PageMovable(struct page *page) 1158c2ecf20Sopenharmony_ci{ 1168c2ecf20Sopenharmony_ci struct address_space *mapping; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 1198c2ecf20Sopenharmony_ci if (!__PageMovable(page)) 1208c2ecf20Sopenharmony_ci return 0; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci mapping = page_mapping(page); 1238c2ecf20Sopenharmony_ci if (mapping && mapping->a_ops && mapping->a_ops->isolate_page) 1248c2ecf20Sopenharmony_ci return 1; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci return 0; 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ciEXPORT_SYMBOL(PageMovable); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_civoid __SetPageMovable(struct page *page, struct address_space *mapping) 1318c2ecf20Sopenharmony_ci{ 1328c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 1338c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page); 1348c2ecf20Sopenharmony_ci page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE); 1358c2ecf20Sopenharmony_ci} 1368c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__SetPageMovable); 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_civoid __ClearPageMovable(struct page *page) 1398c2ecf20Sopenharmony_ci{ 1408c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 1418c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageMovable(page), page); 1428c2ecf20Sopenharmony_ci /* 1438c2ecf20Sopenharmony_ci * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE 1448c2ecf20Sopenharmony_ci * flag so that VM can catch up released page by driver after isolation. 1458c2ecf20Sopenharmony_ci * With it, VM migration doesn't try to put it back. 1468c2ecf20Sopenharmony_ci */ 1478c2ecf20Sopenharmony_ci page->mapping = (void *)((unsigned long)page->mapping & 1488c2ecf20Sopenharmony_ci PAGE_MAPPING_MOVABLE); 1498c2ecf20Sopenharmony_ci} 1508c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__ClearPageMovable); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci/* Do not skip compaction more than 64 times */ 1538c2ecf20Sopenharmony_ci#define COMPACT_MAX_DEFER_SHIFT 6 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci/* 1568c2ecf20Sopenharmony_ci * Compaction is deferred when compaction fails to result in a page 1578c2ecf20Sopenharmony_ci * allocation success. 1 << compact_defer_shift, compactions are skipped up 1588c2ecf20Sopenharmony_ci * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT 1598c2ecf20Sopenharmony_ci */ 1608c2ecf20Sopenharmony_civoid defer_compaction(struct zone *zone, int order) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci zone->compact_considered = 0; 1638c2ecf20Sopenharmony_ci zone->compact_defer_shift++; 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci if (order < zone->compact_order_failed) 1668c2ecf20Sopenharmony_ci zone->compact_order_failed = order; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT) 1698c2ecf20Sopenharmony_ci zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci trace_mm_compaction_defer_compaction(zone, order); 1728c2ecf20Sopenharmony_ci} 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci/* Returns true if compaction should be skipped this time */ 1758c2ecf20Sopenharmony_cibool compaction_deferred(struct zone *zone, int order) 1768c2ecf20Sopenharmony_ci{ 1778c2ecf20Sopenharmony_ci unsigned long defer_limit = 1UL << zone->compact_defer_shift; 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci if (order < zone->compact_order_failed) 1808c2ecf20Sopenharmony_ci return false; 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci /* Avoid possible overflow */ 1838c2ecf20Sopenharmony_ci if (++zone->compact_considered >= defer_limit) { 1848c2ecf20Sopenharmony_ci zone->compact_considered = defer_limit; 1858c2ecf20Sopenharmony_ci return false; 1868c2ecf20Sopenharmony_ci } 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci trace_mm_compaction_deferred(zone, order); 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci return true; 1918c2ecf20Sopenharmony_ci} 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci/* 1948c2ecf20Sopenharmony_ci * Update defer tracking counters after successful compaction of given order, 1958c2ecf20Sopenharmony_ci * which means an allocation either succeeded (alloc_success == true) or is 1968c2ecf20Sopenharmony_ci * expected to succeed. 1978c2ecf20Sopenharmony_ci */ 1988c2ecf20Sopenharmony_civoid compaction_defer_reset(struct zone *zone, int order, 1998c2ecf20Sopenharmony_ci bool alloc_success) 2008c2ecf20Sopenharmony_ci{ 2018c2ecf20Sopenharmony_ci if (alloc_success) { 2028c2ecf20Sopenharmony_ci zone->compact_considered = 0; 2038c2ecf20Sopenharmony_ci zone->compact_defer_shift = 0; 2048c2ecf20Sopenharmony_ci } 2058c2ecf20Sopenharmony_ci if (order >= zone->compact_order_failed) 2068c2ecf20Sopenharmony_ci zone->compact_order_failed = order + 1; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci trace_mm_compaction_defer_reset(zone, order); 2098c2ecf20Sopenharmony_ci} 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci/* Returns true if restarting compaction after many failures */ 2128c2ecf20Sopenharmony_cibool compaction_restarting(struct zone *zone, int order) 2138c2ecf20Sopenharmony_ci{ 2148c2ecf20Sopenharmony_ci if (order < zone->compact_order_failed) 2158c2ecf20Sopenharmony_ci return false; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT && 2188c2ecf20Sopenharmony_ci zone->compact_considered >= 1UL << zone->compact_defer_shift; 2198c2ecf20Sopenharmony_ci} 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci/* Returns true if the pageblock should be scanned for pages to isolate. */ 2228c2ecf20Sopenharmony_cistatic inline bool isolation_suitable(struct compact_control *cc, 2238c2ecf20Sopenharmony_ci struct page *page) 2248c2ecf20Sopenharmony_ci{ 2258c2ecf20Sopenharmony_ci if (cc->ignore_skip_hint) 2268c2ecf20Sopenharmony_ci return true; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci return !get_pageblock_skip(page); 2298c2ecf20Sopenharmony_ci} 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_cistatic void reset_cached_positions(struct zone *zone) 2328c2ecf20Sopenharmony_ci{ 2338c2ecf20Sopenharmony_ci zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; 2348c2ecf20Sopenharmony_ci zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; 2358c2ecf20Sopenharmony_ci zone->compact_cached_free_pfn = 2368c2ecf20Sopenharmony_ci pageblock_start_pfn(zone_end_pfn(zone) - 1); 2378c2ecf20Sopenharmony_ci} 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci/* 2408c2ecf20Sopenharmony_ci * Compound pages of >= pageblock_order should consistenly be skipped until 2418c2ecf20Sopenharmony_ci * released. It is always pointless to compact pages of such order (if they are 2428c2ecf20Sopenharmony_ci * migratable), and the pageblocks they occupy cannot contain any free pages. 2438c2ecf20Sopenharmony_ci */ 2448c2ecf20Sopenharmony_cistatic bool pageblock_skip_persistent(struct page *page) 2458c2ecf20Sopenharmony_ci{ 2468c2ecf20Sopenharmony_ci if (!PageCompound(page)) 2478c2ecf20Sopenharmony_ci return false; 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci page = compound_head(page); 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci if (compound_order(page) >= pageblock_order) 2528c2ecf20Sopenharmony_ci return true; 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci return false; 2558c2ecf20Sopenharmony_ci} 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_cistatic bool 2588c2ecf20Sopenharmony_ci__reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, 2598c2ecf20Sopenharmony_ci bool check_target) 2608c2ecf20Sopenharmony_ci{ 2618c2ecf20Sopenharmony_ci struct page *page = pfn_to_online_page(pfn); 2628c2ecf20Sopenharmony_ci struct page *block_page; 2638c2ecf20Sopenharmony_ci struct page *end_page; 2648c2ecf20Sopenharmony_ci unsigned long block_pfn; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci if (!page) 2678c2ecf20Sopenharmony_ci return false; 2688c2ecf20Sopenharmony_ci if (zone != page_zone(page)) 2698c2ecf20Sopenharmony_ci return false; 2708c2ecf20Sopenharmony_ci if (pageblock_skip_persistent(page)) 2718c2ecf20Sopenharmony_ci return false; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci /* 2748c2ecf20Sopenharmony_ci * If skip is already cleared do no further checking once the 2758c2ecf20Sopenharmony_ci * restart points have been set. 2768c2ecf20Sopenharmony_ci */ 2778c2ecf20Sopenharmony_ci if (check_source && check_target && !get_pageblock_skip(page)) 2788c2ecf20Sopenharmony_ci return true; 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci /* 2818c2ecf20Sopenharmony_ci * If clearing skip for the target scanner, do not select a 2828c2ecf20Sopenharmony_ci * non-movable pageblock as the starting point. 2838c2ecf20Sopenharmony_ci */ 2848c2ecf20Sopenharmony_ci if (!check_source && check_target && 2858c2ecf20Sopenharmony_ci get_pageblock_migratetype(page) != MIGRATE_MOVABLE) 2868c2ecf20Sopenharmony_ci return false; 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci /* Ensure the start of the pageblock or zone is online and valid */ 2898c2ecf20Sopenharmony_ci block_pfn = pageblock_start_pfn(pfn); 2908c2ecf20Sopenharmony_ci block_pfn = max(block_pfn, zone->zone_start_pfn); 2918c2ecf20Sopenharmony_ci block_page = pfn_to_online_page(block_pfn); 2928c2ecf20Sopenharmony_ci if (block_page) { 2938c2ecf20Sopenharmony_ci page = block_page; 2948c2ecf20Sopenharmony_ci pfn = block_pfn; 2958c2ecf20Sopenharmony_ci } 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci /* Ensure the end of the pageblock or zone is online and valid */ 2988c2ecf20Sopenharmony_ci block_pfn = pageblock_end_pfn(pfn) - 1; 2998c2ecf20Sopenharmony_ci block_pfn = min(block_pfn, zone_end_pfn(zone) - 1); 3008c2ecf20Sopenharmony_ci end_page = pfn_to_online_page(block_pfn); 3018c2ecf20Sopenharmony_ci if (!end_page) 3028c2ecf20Sopenharmony_ci return false; 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci /* 3058c2ecf20Sopenharmony_ci * Only clear the hint if a sample indicates there is either a 3068c2ecf20Sopenharmony_ci * free page or an LRU page in the block. One or other condition 3078c2ecf20Sopenharmony_ci * is necessary for the block to be a migration source/target. 3088c2ecf20Sopenharmony_ci */ 3098c2ecf20Sopenharmony_ci do { 3108c2ecf20Sopenharmony_ci if (pfn_valid_within(pfn)) { 3118c2ecf20Sopenharmony_ci if (check_source && PageLRU(page)) { 3128c2ecf20Sopenharmony_ci clear_pageblock_skip(page); 3138c2ecf20Sopenharmony_ci return true; 3148c2ecf20Sopenharmony_ci } 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci if (check_target && PageBuddy(page)) { 3178c2ecf20Sopenharmony_ci clear_pageblock_skip(page); 3188c2ecf20Sopenharmony_ci return true; 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci } 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci page += (1 << PAGE_ALLOC_COSTLY_ORDER); 3238c2ecf20Sopenharmony_ci pfn += (1 << PAGE_ALLOC_COSTLY_ORDER); 3248c2ecf20Sopenharmony_ci } while (page <= end_page); 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci return false; 3278c2ecf20Sopenharmony_ci} 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci/* 3308c2ecf20Sopenharmony_ci * This function is called to clear all cached information on pageblocks that 3318c2ecf20Sopenharmony_ci * should be skipped for page isolation when the migrate and free page scanner 3328c2ecf20Sopenharmony_ci * meet. 3338c2ecf20Sopenharmony_ci */ 3348c2ecf20Sopenharmony_cistatic void __reset_isolation_suitable(struct zone *zone) 3358c2ecf20Sopenharmony_ci{ 3368c2ecf20Sopenharmony_ci unsigned long migrate_pfn = zone->zone_start_pfn; 3378c2ecf20Sopenharmony_ci unsigned long free_pfn = zone_end_pfn(zone) - 1; 3388c2ecf20Sopenharmony_ci unsigned long reset_migrate = free_pfn; 3398c2ecf20Sopenharmony_ci unsigned long reset_free = migrate_pfn; 3408c2ecf20Sopenharmony_ci bool source_set = false; 3418c2ecf20Sopenharmony_ci bool free_set = false; 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci if (!zone->compact_blockskip_flush) 3448c2ecf20Sopenharmony_ci return; 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci zone->compact_blockskip_flush = false; 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci /* 3498c2ecf20Sopenharmony_ci * Walk the zone and update pageblock skip information. Source looks 3508c2ecf20Sopenharmony_ci * for PageLRU while target looks for PageBuddy. When the scanner 3518c2ecf20Sopenharmony_ci * is found, both PageBuddy and PageLRU are checked as the pageblock 3528c2ecf20Sopenharmony_ci * is suitable as both source and target. 3538c2ecf20Sopenharmony_ci */ 3548c2ecf20Sopenharmony_ci for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages, 3558c2ecf20Sopenharmony_ci free_pfn -= pageblock_nr_pages) { 3568c2ecf20Sopenharmony_ci cond_resched(); 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci /* Update the migrate PFN */ 3598c2ecf20Sopenharmony_ci if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) && 3608c2ecf20Sopenharmony_ci migrate_pfn < reset_migrate) { 3618c2ecf20Sopenharmony_ci source_set = true; 3628c2ecf20Sopenharmony_ci reset_migrate = migrate_pfn; 3638c2ecf20Sopenharmony_ci zone->compact_init_migrate_pfn = reset_migrate; 3648c2ecf20Sopenharmony_ci zone->compact_cached_migrate_pfn[0] = reset_migrate; 3658c2ecf20Sopenharmony_ci zone->compact_cached_migrate_pfn[1] = reset_migrate; 3668c2ecf20Sopenharmony_ci } 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci /* Update the free PFN */ 3698c2ecf20Sopenharmony_ci if (__reset_isolation_pfn(zone, free_pfn, free_set, true) && 3708c2ecf20Sopenharmony_ci free_pfn > reset_free) { 3718c2ecf20Sopenharmony_ci free_set = true; 3728c2ecf20Sopenharmony_ci reset_free = free_pfn; 3738c2ecf20Sopenharmony_ci zone->compact_init_free_pfn = reset_free; 3748c2ecf20Sopenharmony_ci zone->compact_cached_free_pfn = reset_free; 3758c2ecf20Sopenharmony_ci } 3768c2ecf20Sopenharmony_ci } 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci /* Leave no distance if no suitable block was reset */ 3798c2ecf20Sopenharmony_ci if (reset_migrate >= reset_free) { 3808c2ecf20Sopenharmony_ci zone->compact_cached_migrate_pfn[0] = migrate_pfn; 3818c2ecf20Sopenharmony_ci zone->compact_cached_migrate_pfn[1] = migrate_pfn; 3828c2ecf20Sopenharmony_ci zone->compact_cached_free_pfn = free_pfn; 3838c2ecf20Sopenharmony_ci } 3848c2ecf20Sopenharmony_ci} 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_civoid reset_isolation_suitable(pg_data_t *pgdat) 3878c2ecf20Sopenharmony_ci{ 3888c2ecf20Sopenharmony_ci int zoneid; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 3918c2ecf20Sopenharmony_ci struct zone *zone = &pgdat->node_zones[zoneid]; 3928c2ecf20Sopenharmony_ci if (!populated_zone(zone)) 3938c2ecf20Sopenharmony_ci continue; 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci /* Only flush if a full compaction finished recently */ 3968c2ecf20Sopenharmony_ci if (zone->compact_blockskip_flush) 3978c2ecf20Sopenharmony_ci __reset_isolation_suitable(zone); 3988c2ecf20Sopenharmony_ci } 3998c2ecf20Sopenharmony_ci} 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci/* 4028c2ecf20Sopenharmony_ci * Sets the pageblock skip bit if it was clear. Note that this is a hint as 4038c2ecf20Sopenharmony_ci * locks are not required for read/writers. Returns true if it was already set. 4048c2ecf20Sopenharmony_ci */ 4058c2ecf20Sopenharmony_cistatic bool test_and_set_skip(struct compact_control *cc, struct page *page, 4068c2ecf20Sopenharmony_ci unsigned long pfn) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci bool skip; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci /* Do no update if skip hint is being ignored */ 4118c2ecf20Sopenharmony_ci if (cc->ignore_skip_hint) 4128c2ecf20Sopenharmony_ci return false; 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci if (!IS_ALIGNED(pfn, pageblock_nr_pages)) 4158c2ecf20Sopenharmony_ci return false; 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci skip = get_pageblock_skip(page); 4188c2ecf20Sopenharmony_ci if (!skip && !cc->no_set_skip_hint) 4198c2ecf20Sopenharmony_ci set_pageblock_skip(page); 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci return skip; 4228c2ecf20Sopenharmony_ci} 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_cistatic void update_cached_migrate(struct compact_control *cc, unsigned long pfn) 4258c2ecf20Sopenharmony_ci{ 4268c2ecf20Sopenharmony_ci struct zone *zone = cc->zone; 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci pfn = pageblock_end_pfn(pfn); 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci /* Set for isolation rather than compaction */ 4318c2ecf20Sopenharmony_ci if (cc->no_set_skip_hint) 4328c2ecf20Sopenharmony_ci return; 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci if (pfn > zone->compact_cached_migrate_pfn[0]) 4358c2ecf20Sopenharmony_ci zone->compact_cached_migrate_pfn[0] = pfn; 4368c2ecf20Sopenharmony_ci if (cc->mode != MIGRATE_ASYNC && 4378c2ecf20Sopenharmony_ci pfn > zone->compact_cached_migrate_pfn[1]) 4388c2ecf20Sopenharmony_ci zone->compact_cached_migrate_pfn[1] = pfn; 4398c2ecf20Sopenharmony_ci} 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci/* 4428c2ecf20Sopenharmony_ci * If no pages were isolated then mark this pageblock to be skipped in the 4438c2ecf20Sopenharmony_ci * future. The information is later cleared by __reset_isolation_suitable(). 4448c2ecf20Sopenharmony_ci */ 4458c2ecf20Sopenharmony_cistatic void update_pageblock_skip(struct compact_control *cc, 4468c2ecf20Sopenharmony_ci struct page *page, unsigned long pfn) 4478c2ecf20Sopenharmony_ci{ 4488c2ecf20Sopenharmony_ci struct zone *zone = cc->zone; 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci if (cc->no_set_skip_hint) 4518c2ecf20Sopenharmony_ci return; 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci if (!page) 4548c2ecf20Sopenharmony_ci return; 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci set_pageblock_skip(page); 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci /* Update where async and sync compaction should restart */ 4598c2ecf20Sopenharmony_ci if (pfn < zone->compact_cached_free_pfn) 4608c2ecf20Sopenharmony_ci zone->compact_cached_free_pfn = pfn; 4618c2ecf20Sopenharmony_ci} 4628c2ecf20Sopenharmony_ci#else 4638c2ecf20Sopenharmony_cistatic inline bool isolation_suitable(struct compact_control *cc, 4648c2ecf20Sopenharmony_ci struct page *page) 4658c2ecf20Sopenharmony_ci{ 4668c2ecf20Sopenharmony_ci return true; 4678c2ecf20Sopenharmony_ci} 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_cistatic inline bool pageblock_skip_persistent(struct page *page) 4708c2ecf20Sopenharmony_ci{ 4718c2ecf20Sopenharmony_ci return false; 4728c2ecf20Sopenharmony_ci} 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_cistatic inline void update_pageblock_skip(struct compact_control *cc, 4758c2ecf20Sopenharmony_ci struct page *page, unsigned long pfn) 4768c2ecf20Sopenharmony_ci{ 4778c2ecf20Sopenharmony_ci} 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_cistatic void update_cached_migrate(struct compact_control *cc, unsigned long pfn) 4808c2ecf20Sopenharmony_ci{ 4818c2ecf20Sopenharmony_ci} 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_cistatic bool test_and_set_skip(struct compact_control *cc, struct page *page, 4848c2ecf20Sopenharmony_ci unsigned long pfn) 4858c2ecf20Sopenharmony_ci{ 4868c2ecf20Sopenharmony_ci return false; 4878c2ecf20Sopenharmony_ci} 4888c2ecf20Sopenharmony_ci#endif /* CONFIG_COMPACTION */ 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci/* 4918c2ecf20Sopenharmony_ci * Compaction requires the taking of some coarse locks that are potentially 4928c2ecf20Sopenharmony_ci * very heavily contended. For async compaction, trylock and record if the 4938c2ecf20Sopenharmony_ci * lock is contended. The lock will still be acquired but compaction will 4948c2ecf20Sopenharmony_ci * abort when the current block is finished regardless of success rate. 4958c2ecf20Sopenharmony_ci * Sync compaction acquires the lock. 4968c2ecf20Sopenharmony_ci * 4978c2ecf20Sopenharmony_ci * Always returns true which makes it easier to track lock state in callers. 4988c2ecf20Sopenharmony_ci */ 4998c2ecf20Sopenharmony_cistatic bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags, 5008c2ecf20Sopenharmony_ci struct compact_control *cc) 5018c2ecf20Sopenharmony_ci __acquires(lock) 5028c2ecf20Sopenharmony_ci{ 5038c2ecf20Sopenharmony_ci /* Track if the lock is contended in async mode */ 5048c2ecf20Sopenharmony_ci if (cc->mode == MIGRATE_ASYNC && !cc->contended) { 5058c2ecf20Sopenharmony_ci if (spin_trylock_irqsave(lock, *flags)) 5068c2ecf20Sopenharmony_ci return true; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_ci cc->contended = true; 5098c2ecf20Sopenharmony_ci } 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci spin_lock_irqsave(lock, *flags); 5128c2ecf20Sopenharmony_ci return true; 5138c2ecf20Sopenharmony_ci} 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci/* 5168c2ecf20Sopenharmony_ci * Compaction requires the taking of some coarse locks that are potentially 5178c2ecf20Sopenharmony_ci * very heavily contended. The lock should be periodically unlocked to avoid 5188c2ecf20Sopenharmony_ci * having disabled IRQs for a long time, even when there is nobody waiting on 5198c2ecf20Sopenharmony_ci * the lock. It might also be that allowing the IRQs will result in 5208c2ecf20Sopenharmony_ci * need_resched() becoming true. If scheduling is needed, async compaction 5218c2ecf20Sopenharmony_ci * aborts. Sync compaction schedules. 5228c2ecf20Sopenharmony_ci * Either compaction type will also abort if a fatal signal is pending. 5238c2ecf20Sopenharmony_ci * In either case if the lock was locked, it is dropped and not regained. 5248c2ecf20Sopenharmony_ci * 5258c2ecf20Sopenharmony_ci * Returns true if compaction should abort due to fatal signal pending, or 5268c2ecf20Sopenharmony_ci * async compaction due to need_resched() 5278c2ecf20Sopenharmony_ci * Returns false when compaction can continue (sync compaction might have 5288c2ecf20Sopenharmony_ci * scheduled) 5298c2ecf20Sopenharmony_ci */ 5308c2ecf20Sopenharmony_cistatic bool compact_unlock_should_abort(spinlock_t *lock, 5318c2ecf20Sopenharmony_ci unsigned long flags, bool *locked, struct compact_control *cc) 5328c2ecf20Sopenharmony_ci{ 5338c2ecf20Sopenharmony_ci if (*locked) { 5348c2ecf20Sopenharmony_ci spin_unlock_irqrestore(lock, flags); 5358c2ecf20Sopenharmony_ci *locked = false; 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci if (fatal_signal_pending(current)) { 5398c2ecf20Sopenharmony_ci cc->contended = true; 5408c2ecf20Sopenharmony_ci return true; 5418c2ecf20Sopenharmony_ci } 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci cond_resched(); 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci return false; 5468c2ecf20Sopenharmony_ci} 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci/* 5498c2ecf20Sopenharmony_ci * Isolate free pages onto a private freelist. If @strict is true, will abort 5508c2ecf20Sopenharmony_ci * returning 0 on any invalid PFNs or non-free pages inside of the pageblock 5518c2ecf20Sopenharmony_ci * (even though it may still end up isolating some pages). 5528c2ecf20Sopenharmony_ci */ 5538c2ecf20Sopenharmony_cistatic unsigned long isolate_freepages_block(struct compact_control *cc, 5548c2ecf20Sopenharmony_ci unsigned long *start_pfn, 5558c2ecf20Sopenharmony_ci unsigned long end_pfn, 5568c2ecf20Sopenharmony_ci struct list_head *freelist, 5578c2ecf20Sopenharmony_ci unsigned int stride, 5588c2ecf20Sopenharmony_ci bool strict) 5598c2ecf20Sopenharmony_ci{ 5608c2ecf20Sopenharmony_ci int nr_scanned = 0, total_isolated = 0; 5618c2ecf20Sopenharmony_ci struct page *cursor; 5628c2ecf20Sopenharmony_ci unsigned long flags = 0; 5638c2ecf20Sopenharmony_ci bool locked = false; 5648c2ecf20Sopenharmony_ci unsigned long blockpfn = *start_pfn; 5658c2ecf20Sopenharmony_ci unsigned int order; 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci /* Strict mode is for isolation, speed is secondary */ 5688c2ecf20Sopenharmony_ci if (strict) 5698c2ecf20Sopenharmony_ci stride = 1; 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_ci cursor = pfn_to_page(blockpfn); 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci /* Isolate free pages. */ 5748c2ecf20Sopenharmony_ci for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) { 5758c2ecf20Sopenharmony_ci int isolated; 5768c2ecf20Sopenharmony_ci struct page *page = cursor; 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci /* 5798c2ecf20Sopenharmony_ci * Periodically drop the lock (if held) regardless of its 5808c2ecf20Sopenharmony_ci * contention, to give chance to IRQs. Abort if fatal signal 5818c2ecf20Sopenharmony_ci * pending or async compaction detects need_resched() 5828c2ecf20Sopenharmony_ci */ 5838c2ecf20Sopenharmony_ci if (!(blockpfn % SWAP_CLUSTER_MAX) 5848c2ecf20Sopenharmony_ci && compact_unlock_should_abort(&cc->zone->lock, flags, 5858c2ecf20Sopenharmony_ci &locked, cc)) 5868c2ecf20Sopenharmony_ci break; 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci nr_scanned++; 5898c2ecf20Sopenharmony_ci if (!pfn_valid_within(blockpfn)) 5908c2ecf20Sopenharmony_ci goto isolate_fail; 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ci /* 5938c2ecf20Sopenharmony_ci * For compound pages such as THP and hugetlbfs, we can save 5948c2ecf20Sopenharmony_ci * potentially a lot of iterations if we skip them at once. 5958c2ecf20Sopenharmony_ci * The check is racy, but we can consider only valid values 5968c2ecf20Sopenharmony_ci * and the only danger is skipping too much. 5978c2ecf20Sopenharmony_ci */ 5988c2ecf20Sopenharmony_ci if (PageCompound(page)) { 5998c2ecf20Sopenharmony_ci const unsigned int order = compound_order(page); 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci if (likely(order < MAX_ORDER)) { 6028c2ecf20Sopenharmony_ci blockpfn += (1UL << order) - 1; 6038c2ecf20Sopenharmony_ci cursor += (1UL << order) - 1; 6048c2ecf20Sopenharmony_ci } 6058c2ecf20Sopenharmony_ci goto isolate_fail; 6068c2ecf20Sopenharmony_ci } 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci if (!PageBuddy(page)) 6098c2ecf20Sopenharmony_ci goto isolate_fail; 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci /* 6128c2ecf20Sopenharmony_ci * If we already hold the lock, we can skip some rechecking. 6138c2ecf20Sopenharmony_ci * Note that if we hold the lock now, checked_pageblock was 6148c2ecf20Sopenharmony_ci * already set in some previous iteration (or strict is true), 6158c2ecf20Sopenharmony_ci * so it is correct to skip the suitable migration target 6168c2ecf20Sopenharmony_ci * recheck as well. 6178c2ecf20Sopenharmony_ci */ 6188c2ecf20Sopenharmony_ci if (!locked) { 6198c2ecf20Sopenharmony_ci locked = compact_lock_irqsave(&cc->zone->lock, 6208c2ecf20Sopenharmony_ci &flags, cc); 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci /* Recheck this is a buddy page under lock */ 6238c2ecf20Sopenharmony_ci if (!PageBuddy(page)) 6248c2ecf20Sopenharmony_ci goto isolate_fail; 6258c2ecf20Sopenharmony_ci } 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci /* Found a free page, will break it into order-0 pages */ 6288c2ecf20Sopenharmony_ci order = buddy_order(page); 6298c2ecf20Sopenharmony_ci isolated = __isolate_free_page(page, order); 6308c2ecf20Sopenharmony_ci if (!isolated) 6318c2ecf20Sopenharmony_ci break; 6328c2ecf20Sopenharmony_ci set_page_private(page, order); 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_ci total_isolated += isolated; 6358c2ecf20Sopenharmony_ci cc->nr_freepages += isolated; 6368c2ecf20Sopenharmony_ci list_add_tail(&page->lru, freelist); 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci if (!strict && cc->nr_migratepages <= cc->nr_freepages) { 6398c2ecf20Sopenharmony_ci blockpfn += isolated; 6408c2ecf20Sopenharmony_ci break; 6418c2ecf20Sopenharmony_ci } 6428c2ecf20Sopenharmony_ci /* Advance to the end of split page */ 6438c2ecf20Sopenharmony_ci blockpfn += isolated - 1; 6448c2ecf20Sopenharmony_ci cursor += isolated - 1; 6458c2ecf20Sopenharmony_ci continue; 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ciisolate_fail: 6488c2ecf20Sopenharmony_ci if (strict) 6498c2ecf20Sopenharmony_ci break; 6508c2ecf20Sopenharmony_ci else 6518c2ecf20Sopenharmony_ci continue; 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_ci } 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci if (locked) 6568c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cc->zone->lock, flags); 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci /* 6598c2ecf20Sopenharmony_ci * There is a tiny chance that we have read bogus compound_order(), 6608c2ecf20Sopenharmony_ci * so be careful to not go outside of the pageblock. 6618c2ecf20Sopenharmony_ci */ 6628c2ecf20Sopenharmony_ci if (unlikely(blockpfn > end_pfn)) 6638c2ecf20Sopenharmony_ci blockpfn = end_pfn; 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn, 6668c2ecf20Sopenharmony_ci nr_scanned, total_isolated); 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci /* Record how far we have got within the block */ 6698c2ecf20Sopenharmony_ci *start_pfn = blockpfn; 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci /* 6728c2ecf20Sopenharmony_ci * If strict isolation is requested by CMA then check that all the 6738c2ecf20Sopenharmony_ci * pages requested were isolated. If there were any failures, 0 is 6748c2ecf20Sopenharmony_ci * returned and CMA will fail. 6758c2ecf20Sopenharmony_ci */ 6768c2ecf20Sopenharmony_ci if (strict && blockpfn < end_pfn) 6778c2ecf20Sopenharmony_ci total_isolated = 0; 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci cc->total_free_scanned += nr_scanned; 6808c2ecf20Sopenharmony_ci if (total_isolated) 6818c2ecf20Sopenharmony_ci count_compact_events(COMPACTISOLATED, total_isolated); 6828c2ecf20Sopenharmony_ci return total_isolated; 6838c2ecf20Sopenharmony_ci} 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci/** 6868c2ecf20Sopenharmony_ci * isolate_freepages_range() - isolate free pages. 6878c2ecf20Sopenharmony_ci * @cc: Compaction control structure. 6888c2ecf20Sopenharmony_ci * @start_pfn: The first PFN to start isolating. 6898c2ecf20Sopenharmony_ci * @end_pfn: The one-past-last PFN. 6908c2ecf20Sopenharmony_ci * 6918c2ecf20Sopenharmony_ci * Non-free pages, invalid PFNs, or zone boundaries within the 6928c2ecf20Sopenharmony_ci * [start_pfn, end_pfn) range are considered errors, cause function to 6938c2ecf20Sopenharmony_ci * undo its actions and return zero. 6948c2ecf20Sopenharmony_ci * 6958c2ecf20Sopenharmony_ci * Otherwise, function returns one-past-the-last PFN of isolated page 6968c2ecf20Sopenharmony_ci * (which may be greater then end_pfn if end fell in a middle of 6978c2ecf20Sopenharmony_ci * a free page). 6988c2ecf20Sopenharmony_ci */ 6998c2ecf20Sopenharmony_ciunsigned long 7008c2ecf20Sopenharmony_ciisolate_freepages_range(struct compact_control *cc, 7018c2ecf20Sopenharmony_ci unsigned long start_pfn, unsigned long end_pfn) 7028c2ecf20Sopenharmony_ci{ 7038c2ecf20Sopenharmony_ci unsigned long isolated, pfn, block_start_pfn, block_end_pfn; 7048c2ecf20Sopenharmony_ci LIST_HEAD(freelist); 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci pfn = start_pfn; 7078c2ecf20Sopenharmony_ci block_start_pfn = pageblock_start_pfn(pfn); 7088c2ecf20Sopenharmony_ci if (block_start_pfn < cc->zone->zone_start_pfn) 7098c2ecf20Sopenharmony_ci block_start_pfn = cc->zone->zone_start_pfn; 7108c2ecf20Sopenharmony_ci block_end_pfn = pageblock_end_pfn(pfn); 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci for (; pfn < end_pfn; pfn += isolated, 7138c2ecf20Sopenharmony_ci block_start_pfn = block_end_pfn, 7148c2ecf20Sopenharmony_ci block_end_pfn += pageblock_nr_pages) { 7158c2ecf20Sopenharmony_ci /* Protect pfn from changing by isolate_freepages_block */ 7168c2ecf20Sopenharmony_ci unsigned long isolate_start_pfn = pfn; 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ci block_end_pfn = min(block_end_pfn, end_pfn); 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_ci /* 7218c2ecf20Sopenharmony_ci * pfn could pass the block_end_pfn if isolated freepage 7228c2ecf20Sopenharmony_ci * is more than pageblock order. In this case, we adjust 7238c2ecf20Sopenharmony_ci * scanning range to right one. 7248c2ecf20Sopenharmony_ci */ 7258c2ecf20Sopenharmony_ci if (pfn >= block_end_pfn) { 7268c2ecf20Sopenharmony_ci block_start_pfn = pageblock_start_pfn(pfn); 7278c2ecf20Sopenharmony_ci block_end_pfn = pageblock_end_pfn(pfn); 7288c2ecf20Sopenharmony_ci block_end_pfn = min(block_end_pfn, end_pfn); 7298c2ecf20Sopenharmony_ci } 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_ci if (!pageblock_pfn_to_page(block_start_pfn, 7328c2ecf20Sopenharmony_ci block_end_pfn, cc->zone)) 7338c2ecf20Sopenharmony_ci break; 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci isolated = isolate_freepages_block(cc, &isolate_start_pfn, 7368c2ecf20Sopenharmony_ci block_end_pfn, &freelist, 0, true); 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci /* 7398c2ecf20Sopenharmony_ci * In strict mode, isolate_freepages_block() returns 0 if 7408c2ecf20Sopenharmony_ci * there are any holes in the block (ie. invalid PFNs or 7418c2ecf20Sopenharmony_ci * non-free pages). 7428c2ecf20Sopenharmony_ci */ 7438c2ecf20Sopenharmony_ci if (!isolated) 7448c2ecf20Sopenharmony_ci break; 7458c2ecf20Sopenharmony_ci 7468c2ecf20Sopenharmony_ci /* 7478c2ecf20Sopenharmony_ci * If we managed to isolate pages, it is always (1 << n) * 7488c2ecf20Sopenharmony_ci * pageblock_nr_pages for some non-negative n. (Max order 7498c2ecf20Sopenharmony_ci * page may span two pageblocks). 7508c2ecf20Sopenharmony_ci */ 7518c2ecf20Sopenharmony_ci } 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci /* __isolate_free_page() does not map the pages */ 7548c2ecf20Sopenharmony_ci split_map_pages(&freelist); 7558c2ecf20Sopenharmony_ci 7568c2ecf20Sopenharmony_ci if (pfn < end_pfn) { 7578c2ecf20Sopenharmony_ci /* Loop terminated early, cleanup. */ 7588c2ecf20Sopenharmony_ci release_freepages(&freelist); 7598c2ecf20Sopenharmony_ci return 0; 7608c2ecf20Sopenharmony_ci } 7618c2ecf20Sopenharmony_ci 7628c2ecf20Sopenharmony_ci /* We don't use freelists for anything. */ 7638c2ecf20Sopenharmony_ci return pfn; 7648c2ecf20Sopenharmony_ci} 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci/* Similar to reclaim, but different enough that they don't share logic */ 7678c2ecf20Sopenharmony_cistatic bool too_many_isolated(pg_data_t *pgdat) 7688c2ecf20Sopenharmony_ci{ 7698c2ecf20Sopenharmony_ci unsigned long active, inactive, isolated; 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci inactive = node_page_state(pgdat, NR_INACTIVE_FILE) + 7728c2ecf20Sopenharmony_ci node_page_state(pgdat, NR_INACTIVE_ANON); 7738c2ecf20Sopenharmony_ci active = node_page_state(pgdat, NR_ACTIVE_FILE) + 7748c2ecf20Sopenharmony_ci node_page_state(pgdat, NR_ACTIVE_ANON); 7758c2ecf20Sopenharmony_ci isolated = node_page_state(pgdat, NR_ISOLATED_FILE) + 7768c2ecf20Sopenharmony_ci node_page_state(pgdat, NR_ISOLATED_ANON); 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci return isolated > (inactive + active) / 2; 7798c2ecf20Sopenharmony_ci} 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci/** 7828c2ecf20Sopenharmony_ci * isolate_migratepages_block() - isolate all migrate-able pages within 7838c2ecf20Sopenharmony_ci * a single pageblock 7848c2ecf20Sopenharmony_ci * @cc: Compaction control structure. 7858c2ecf20Sopenharmony_ci * @low_pfn: The first PFN to isolate 7868c2ecf20Sopenharmony_ci * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock 7878c2ecf20Sopenharmony_ci * @isolate_mode: Isolation mode to be used. 7888c2ecf20Sopenharmony_ci * 7898c2ecf20Sopenharmony_ci * Isolate all pages that can be migrated from the range specified by 7908c2ecf20Sopenharmony_ci * [low_pfn, end_pfn). The range is expected to be within same pageblock. 7918c2ecf20Sopenharmony_ci * Returns zero if there is a fatal signal pending, otherwise PFN of the 7928c2ecf20Sopenharmony_ci * first page that was not scanned (which may be both less, equal to or more 7938c2ecf20Sopenharmony_ci * than end_pfn). 7948c2ecf20Sopenharmony_ci * 7958c2ecf20Sopenharmony_ci * The pages are isolated on cc->migratepages list (not required to be empty), 7968c2ecf20Sopenharmony_ci * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field 7978c2ecf20Sopenharmony_ci * is neither read nor updated. 7988c2ecf20Sopenharmony_ci */ 7998c2ecf20Sopenharmony_cistatic unsigned long 8008c2ecf20Sopenharmony_ciisolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, 8018c2ecf20Sopenharmony_ci unsigned long end_pfn, isolate_mode_t isolate_mode) 8028c2ecf20Sopenharmony_ci{ 8038c2ecf20Sopenharmony_ci pg_data_t *pgdat = cc->zone->zone_pgdat; 8048c2ecf20Sopenharmony_ci unsigned long nr_scanned = 0, nr_isolated = 0; 8058c2ecf20Sopenharmony_ci struct lruvec *lruvec; 8068c2ecf20Sopenharmony_ci unsigned long flags = 0; 8078c2ecf20Sopenharmony_ci bool locked = false; 8088c2ecf20Sopenharmony_ci struct page *page = NULL, *valid_page = NULL; 8098c2ecf20Sopenharmony_ci unsigned long start_pfn = low_pfn; 8108c2ecf20Sopenharmony_ci bool skip_on_failure = false; 8118c2ecf20Sopenharmony_ci unsigned long next_skip_pfn = 0; 8128c2ecf20Sopenharmony_ci bool skip_updated = false; 8138c2ecf20Sopenharmony_ci 8148c2ecf20Sopenharmony_ci /* 8158c2ecf20Sopenharmony_ci * Ensure that there are not too many pages isolated from the LRU 8168c2ecf20Sopenharmony_ci * list by either parallel reclaimers or compaction. If there are, 8178c2ecf20Sopenharmony_ci * delay for some time until fewer pages are isolated 8188c2ecf20Sopenharmony_ci */ 8198c2ecf20Sopenharmony_ci while (unlikely(too_many_isolated(pgdat))) { 8208c2ecf20Sopenharmony_ci /* stop isolation if there are still pages not migrated */ 8218c2ecf20Sopenharmony_ci if (cc->nr_migratepages) 8228c2ecf20Sopenharmony_ci return 0; 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci /* async migration should just abort */ 8258c2ecf20Sopenharmony_ci if (cc->mode == MIGRATE_ASYNC) 8268c2ecf20Sopenharmony_ci return 0; 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci congestion_wait(BLK_RW_ASYNC, HZ/10); 8298c2ecf20Sopenharmony_ci 8308c2ecf20Sopenharmony_ci if (fatal_signal_pending(current)) 8318c2ecf20Sopenharmony_ci return 0; 8328c2ecf20Sopenharmony_ci } 8338c2ecf20Sopenharmony_ci 8348c2ecf20Sopenharmony_ci cond_resched(); 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) { 8378c2ecf20Sopenharmony_ci skip_on_failure = true; 8388c2ecf20Sopenharmony_ci next_skip_pfn = block_end_pfn(low_pfn, cc->order); 8398c2ecf20Sopenharmony_ci } 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci /* Time to isolate some pages for migration */ 8428c2ecf20Sopenharmony_ci for (; low_pfn < end_pfn; low_pfn++) { 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci if (skip_on_failure && low_pfn >= next_skip_pfn) { 8458c2ecf20Sopenharmony_ci /* 8468c2ecf20Sopenharmony_ci * We have isolated all migration candidates in the 8478c2ecf20Sopenharmony_ci * previous order-aligned block, and did not skip it due 8488c2ecf20Sopenharmony_ci * to failure. We should migrate the pages now and 8498c2ecf20Sopenharmony_ci * hopefully succeed compaction. 8508c2ecf20Sopenharmony_ci */ 8518c2ecf20Sopenharmony_ci if (nr_isolated) 8528c2ecf20Sopenharmony_ci break; 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ci /* 8558c2ecf20Sopenharmony_ci * We failed to isolate in the previous order-aligned 8568c2ecf20Sopenharmony_ci * block. Set the new boundary to the end of the 8578c2ecf20Sopenharmony_ci * current block. Note we can't simply increase 8588c2ecf20Sopenharmony_ci * next_skip_pfn by 1 << order, as low_pfn might have 8598c2ecf20Sopenharmony_ci * been incremented by a higher number due to skipping 8608c2ecf20Sopenharmony_ci * a compound or a high-order buddy page in the 8618c2ecf20Sopenharmony_ci * previous loop iteration. 8628c2ecf20Sopenharmony_ci */ 8638c2ecf20Sopenharmony_ci next_skip_pfn = block_end_pfn(low_pfn, cc->order); 8648c2ecf20Sopenharmony_ci } 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci /* 8678c2ecf20Sopenharmony_ci * Periodically drop the lock (if held) regardless of its 8688c2ecf20Sopenharmony_ci * contention, to give chance to IRQs. Abort completely if 8698c2ecf20Sopenharmony_ci * a fatal signal is pending. 8708c2ecf20Sopenharmony_ci */ 8718c2ecf20Sopenharmony_ci if (!(low_pfn % SWAP_CLUSTER_MAX) 8728c2ecf20Sopenharmony_ci && compact_unlock_should_abort(&pgdat->lru_lock, 8738c2ecf20Sopenharmony_ci flags, &locked, cc)) { 8748c2ecf20Sopenharmony_ci low_pfn = 0; 8758c2ecf20Sopenharmony_ci goto fatal_pending; 8768c2ecf20Sopenharmony_ci } 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci if (!pfn_valid_within(low_pfn)) 8798c2ecf20Sopenharmony_ci goto isolate_fail; 8808c2ecf20Sopenharmony_ci nr_scanned++; 8818c2ecf20Sopenharmony_ci 8828c2ecf20Sopenharmony_ci page = pfn_to_page(low_pfn); 8838c2ecf20Sopenharmony_ci 8848c2ecf20Sopenharmony_ci /* 8858c2ecf20Sopenharmony_ci * Check if the pageblock has already been marked skipped. 8868c2ecf20Sopenharmony_ci * Only the aligned PFN is checked as the caller isolates 8878c2ecf20Sopenharmony_ci * COMPACT_CLUSTER_MAX at a time so the second call must 8888c2ecf20Sopenharmony_ci * not falsely conclude that the block should be skipped. 8898c2ecf20Sopenharmony_ci */ 8908c2ecf20Sopenharmony_ci if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) { 8918c2ecf20Sopenharmony_ci if (!cc->ignore_skip_hint && get_pageblock_skip(page)) { 8928c2ecf20Sopenharmony_ci low_pfn = end_pfn; 8938c2ecf20Sopenharmony_ci goto isolate_abort; 8948c2ecf20Sopenharmony_ci } 8958c2ecf20Sopenharmony_ci valid_page = page; 8968c2ecf20Sopenharmony_ci } 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci /* 8998c2ecf20Sopenharmony_ci * Skip if free. We read page order here without zone lock 9008c2ecf20Sopenharmony_ci * which is generally unsafe, but the race window is small and 9018c2ecf20Sopenharmony_ci * the worst thing that can happen is that we skip some 9028c2ecf20Sopenharmony_ci * potential isolation targets. 9038c2ecf20Sopenharmony_ci */ 9048c2ecf20Sopenharmony_ci if (PageBuddy(page)) { 9058c2ecf20Sopenharmony_ci unsigned long freepage_order = buddy_order_unsafe(page); 9068c2ecf20Sopenharmony_ci 9078c2ecf20Sopenharmony_ci /* 9088c2ecf20Sopenharmony_ci * Without lock, we cannot be sure that what we got is 9098c2ecf20Sopenharmony_ci * a valid page order. Consider only values in the 9108c2ecf20Sopenharmony_ci * valid order range to prevent low_pfn overflow. 9118c2ecf20Sopenharmony_ci */ 9128c2ecf20Sopenharmony_ci if (freepage_order > 0 && freepage_order < MAX_ORDER) 9138c2ecf20Sopenharmony_ci low_pfn += (1UL << freepage_order) - 1; 9148c2ecf20Sopenharmony_ci continue; 9158c2ecf20Sopenharmony_ci } 9168c2ecf20Sopenharmony_ci 9178c2ecf20Sopenharmony_ci /* 9188c2ecf20Sopenharmony_ci * Regardless of being on LRU, compound pages such as THP and 9198c2ecf20Sopenharmony_ci * hugetlbfs are not to be compacted unless we are attempting 9208c2ecf20Sopenharmony_ci * an allocation much larger than the huge page size (eg CMA). 9218c2ecf20Sopenharmony_ci * We can potentially save a lot of iterations if we skip them 9228c2ecf20Sopenharmony_ci * at once. The check is racy, but we can consider only valid 9238c2ecf20Sopenharmony_ci * values and the only danger is skipping too much. 9248c2ecf20Sopenharmony_ci */ 9258c2ecf20Sopenharmony_ci if (PageCompound(page) && !cc->alloc_contig) { 9268c2ecf20Sopenharmony_ci const unsigned int order = compound_order(page); 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci if (likely(order < MAX_ORDER)) 9298c2ecf20Sopenharmony_ci low_pfn += (1UL << order) - 1; 9308c2ecf20Sopenharmony_ci goto isolate_fail; 9318c2ecf20Sopenharmony_ci } 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci /* 9348c2ecf20Sopenharmony_ci * Check may be lockless but that's ok as we recheck later. 9358c2ecf20Sopenharmony_ci * It's possible to migrate LRU and non-lru movable pages. 9368c2ecf20Sopenharmony_ci * Skip any other type of page 9378c2ecf20Sopenharmony_ci */ 9388c2ecf20Sopenharmony_ci if (!PageLRU(page)) { 9398c2ecf20Sopenharmony_ci /* 9408c2ecf20Sopenharmony_ci * __PageMovable can return false positive so we need 9418c2ecf20Sopenharmony_ci * to verify it under page_lock. 9428c2ecf20Sopenharmony_ci */ 9438c2ecf20Sopenharmony_ci if (unlikely(__PageMovable(page)) && 9448c2ecf20Sopenharmony_ci !PageIsolated(page)) { 9458c2ecf20Sopenharmony_ci if (locked) { 9468c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pgdat->lru_lock, 9478c2ecf20Sopenharmony_ci flags); 9488c2ecf20Sopenharmony_ci locked = false; 9498c2ecf20Sopenharmony_ci } 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_ci if (!isolate_movable_page(page, isolate_mode)) 9528c2ecf20Sopenharmony_ci goto isolate_success; 9538c2ecf20Sopenharmony_ci } 9548c2ecf20Sopenharmony_ci 9558c2ecf20Sopenharmony_ci goto isolate_fail; 9568c2ecf20Sopenharmony_ci } 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci /* 9598c2ecf20Sopenharmony_ci * Migration will fail if an anonymous page is pinned in memory, 9608c2ecf20Sopenharmony_ci * so avoid taking lru_lock and isolating it unnecessarily in an 9618c2ecf20Sopenharmony_ci * admittedly racy check. 9628c2ecf20Sopenharmony_ci */ 9638c2ecf20Sopenharmony_ci if (!page_mapping(page) && 9648c2ecf20Sopenharmony_ci page_count(page) > page_mapcount(page)) 9658c2ecf20Sopenharmony_ci goto isolate_fail; 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_ci /* 9688c2ecf20Sopenharmony_ci * Only allow to migrate anonymous pages in GFP_NOFS context 9698c2ecf20Sopenharmony_ci * because those do not depend on fs locks. 9708c2ecf20Sopenharmony_ci */ 9718c2ecf20Sopenharmony_ci if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page)) 9728c2ecf20Sopenharmony_ci goto isolate_fail; 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci /* If we already hold the lock, we can skip some rechecking */ 9758c2ecf20Sopenharmony_ci if (!locked) { 9768c2ecf20Sopenharmony_ci locked = compact_lock_irqsave(&pgdat->lru_lock, 9778c2ecf20Sopenharmony_ci &flags, cc); 9788c2ecf20Sopenharmony_ci 9798c2ecf20Sopenharmony_ci /* Try get exclusive access under lock */ 9808c2ecf20Sopenharmony_ci if (!skip_updated) { 9818c2ecf20Sopenharmony_ci skip_updated = true; 9828c2ecf20Sopenharmony_ci if (test_and_set_skip(cc, page, low_pfn)) 9838c2ecf20Sopenharmony_ci goto isolate_abort; 9848c2ecf20Sopenharmony_ci } 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci /* Recheck PageLRU and PageCompound under lock */ 9878c2ecf20Sopenharmony_ci if (!PageLRU(page)) 9888c2ecf20Sopenharmony_ci goto isolate_fail; 9898c2ecf20Sopenharmony_ci 9908c2ecf20Sopenharmony_ci /* 9918c2ecf20Sopenharmony_ci * Page become compound since the non-locked check, 9928c2ecf20Sopenharmony_ci * and it's on LRU. It can only be a THP so the order 9938c2ecf20Sopenharmony_ci * is safe to read and it's 0 for tail pages. 9948c2ecf20Sopenharmony_ci */ 9958c2ecf20Sopenharmony_ci if (unlikely(PageCompound(page) && !cc->alloc_contig)) { 9968c2ecf20Sopenharmony_ci low_pfn += compound_nr(page) - 1; 9978c2ecf20Sopenharmony_ci goto isolate_fail; 9988c2ecf20Sopenharmony_ci } 9998c2ecf20Sopenharmony_ci } 10008c2ecf20Sopenharmony_ci 10018c2ecf20Sopenharmony_ci lruvec = mem_cgroup_page_lruvec(page, pgdat); 10028c2ecf20Sopenharmony_ci 10038c2ecf20Sopenharmony_ci /* Try isolate the page */ 10048c2ecf20Sopenharmony_ci if (__isolate_lru_page(page, isolate_mode) != 0) 10058c2ecf20Sopenharmony_ci goto isolate_fail; 10068c2ecf20Sopenharmony_ci 10078c2ecf20Sopenharmony_ci /* The whole page is taken off the LRU; skip the tail pages. */ 10088c2ecf20Sopenharmony_ci if (PageCompound(page)) 10098c2ecf20Sopenharmony_ci low_pfn += compound_nr(page) - 1; 10108c2ecf20Sopenharmony_ci 10118c2ecf20Sopenharmony_ci /* Successfully isolated */ 10128c2ecf20Sopenharmony_ci del_page_from_lru_list(page, lruvec, page_lru(page)); 10138c2ecf20Sopenharmony_ci mod_node_page_state(page_pgdat(page), 10148c2ecf20Sopenharmony_ci NR_ISOLATED_ANON + page_is_file_lru(page), 10158c2ecf20Sopenharmony_ci thp_nr_pages(page)); 10168c2ecf20Sopenharmony_ci 10178c2ecf20Sopenharmony_ciisolate_success: 10188c2ecf20Sopenharmony_ci list_add(&page->lru, &cc->migratepages); 10198c2ecf20Sopenharmony_ci cc->nr_migratepages += compound_nr(page); 10208c2ecf20Sopenharmony_ci nr_isolated += compound_nr(page); 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_ci /* 10238c2ecf20Sopenharmony_ci * Avoid isolating too much unless this block is being 10248c2ecf20Sopenharmony_ci * rescanned (e.g. dirty/writeback pages, parallel allocation) 10258c2ecf20Sopenharmony_ci * or a lock is contended. For contention, isolate quickly to 10268c2ecf20Sopenharmony_ci * potentially remove one source of contention. 10278c2ecf20Sopenharmony_ci */ 10288c2ecf20Sopenharmony_ci if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX && 10298c2ecf20Sopenharmony_ci !cc->rescan && !cc->contended) { 10308c2ecf20Sopenharmony_ci ++low_pfn; 10318c2ecf20Sopenharmony_ci break; 10328c2ecf20Sopenharmony_ci } 10338c2ecf20Sopenharmony_ci 10348c2ecf20Sopenharmony_ci continue; 10358c2ecf20Sopenharmony_ciisolate_fail: 10368c2ecf20Sopenharmony_ci if (!skip_on_failure) 10378c2ecf20Sopenharmony_ci continue; 10388c2ecf20Sopenharmony_ci 10398c2ecf20Sopenharmony_ci /* 10408c2ecf20Sopenharmony_ci * We have isolated some pages, but then failed. Release them 10418c2ecf20Sopenharmony_ci * instead of migrating, as we cannot form the cc->order buddy 10428c2ecf20Sopenharmony_ci * page anyway. 10438c2ecf20Sopenharmony_ci */ 10448c2ecf20Sopenharmony_ci if (nr_isolated) { 10458c2ecf20Sopenharmony_ci if (locked) { 10468c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pgdat->lru_lock, flags); 10478c2ecf20Sopenharmony_ci locked = false; 10488c2ecf20Sopenharmony_ci } 10498c2ecf20Sopenharmony_ci putback_movable_pages(&cc->migratepages); 10508c2ecf20Sopenharmony_ci cc->nr_migratepages = 0; 10518c2ecf20Sopenharmony_ci nr_isolated = 0; 10528c2ecf20Sopenharmony_ci } 10538c2ecf20Sopenharmony_ci 10548c2ecf20Sopenharmony_ci if (low_pfn < next_skip_pfn) { 10558c2ecf20Sopenharmony_ci low_pfn = next_skip_pfn - 1; 10568c2ecf20Sopenharmony_ci /* 10578c2ecf20Sopenharmony_ci * The check near the loop beginning would have updated 10588c2ecf20Sopenharmony_ci * next_skip_pfn too, but this is a bit simpler. 10598c2ecf20Sopenharmony_ci */ 10608c2ecf20Sopenharmony_ci next_skip_pfn += 1UL << cc->order; 10618c2ecf20Sopenharmony_ci } 10628c2ecf20Sopenharmony_ci } 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci /* 10658c2ecf20Sopenharmony_ci * The PageBuddy() check could have potentially brought us outside 10668c2ecf20Sopenharmony_ci * the range to be scanned. 10678c2ecf20Sopenharmony_ci */ 10688c2ecf20Sopenharmony_ci if (unlikely(low_pfn > end_pfn)) 10698c2ecf20Sopenharmony_ci low_pfn = end_pfn; 10708c2ecf20Sopenharmony_ci 10718c2ecf20Sopenharmony_ciisolate_abort: 10728c2ecf20Sopenharmony_ci if (locked) 10738c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pgdat->lru_lock, flags); 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci /* 10768c2ecf20Sopenharmony_ci * Updated the cached scanner pfn once the pageblock has been scanned 10778c2ecf20Sopenharmony_ci * Pages will either be migrated in which case there is no point 10788c2ecf20Sopenharmony_ci * scanning in the near future or migration failed in which case the 10798c2ecf20Sopenharmony_ci * failure reason may persist. The block is marked for skipping if 10808c2ecf20Sopenharmony_ci * there were no pages isolated in the block or if the block is 10818c2ecf20Sopenharmony_ci * rescanned twice in a row. 10828c2ecf20Sopenharmony_ci */ 10838c2ecf20Sopenharmony_ci if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) { 10848c2ecf20Sopenharmony_ci if (valid_page && !skip_updated) 10858c2ecf20Sopenharmony_ci set_pageblock_skip(valid_page); 10868c2ecf20Sopenharmony_ci update_cached_migrate(cc, low_pfn); 10878c2ecf20Sopenharmony_ci } 10888c2ecf20Sopenharmony_ci 10898c2ecf20Sopenharmony_ci trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn, 10908c2ecf20Sopenharmony_ci nr_scanned, nr_isolated); 10918c2ecf20Sopenharmony_ci 10928c2ecf20Sopenharmony_cifatal_pending: 10938c2ecf20Sopenharmony_ci cc->total_migrate_scanned += nr_scanned; 10948c2ecf20Sopenharmony_ci if (nr_isolated) 10958c2ecf20Sopenharmony_ci count_compact_events(COMPACTISOLATED, nr_isolated); 10968c2ecf20Sopenharmony_ci 10978c2ecf20Sopenharmony_ci return low_pfn; 10988c2ecf20Sopenharmony_ci} 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci/** 11018c2ecf20Sopenharmony_ci * isolate_migratepages_range() - isolate migrate-able pages in a PFN range 11028c2ecf20Sopenharmony_ci * @cc: Compaction control structure. 11038c2ecf20Sopenharmony_ci * @start_pfn: The first PFN to start isolating. 11048c2ecf20Sopenharmony_ci * @end_pfn: The one-past-last PFN. 11058c2ecf20Sopenharmony_ci * 11068c2ecf20Sopenharmony_ci * Returns zero if isolation fails fatally due to e.g. pending signal. 11078c2ecf20Sopenharmony_ci * Otherwise, function returns one-past-the-last PFN of isolated page 11088c2ecf20Sopenharmony_ci * (which may be greater than end_pfn if end fell in a middle of a THP page). 11098c2ecf20Sopenharmony_ci */ 11108c2ecf20Sopenharmony_ciunsigned long 11118c2ecf20Sopenharmony_ciisolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, 11128c2ecf20Sopenharmony_ci unsigned long end_pfn) 11138c2ecf20Sopenharmony_ci{ 11148c2ecf20Sopenharmony_ci unsigned long pfn, block_start_pfn, block_end_pfn; 11158c2ecf20Sopenharmony_ci 11168c2ecf20Sopenharmony_ci /* Scan block by block. First and last block may be incomplete */ 11178c2ecf20Sopenharmony_ci pfn = start_pfn; 11188c2ecf20Sopenharmony_ci block_start_pfn = pageblock_start_pfn(pfn); 11198c2ecf20Sopenharmony_ci if (block_start_pfn < cc->zone->zone_start_pfn) 11208c2ecf20Sopenharmony_ci block_start_pfn = cc->zone->zone_start_pfn; 11218c2ecf20Sopenharmony_ci block_end_pfn = pageblock_end_pfn(pfn); 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_ci for (; pfn < end_pfn; pfn = block_end_pfn, 11248c2ecf20Sopenharmony_ci block_start_pfn = block_end_pfn, 11258c2ecf20Sopenharmony_ci block_end_pfn += pageblock_nr_pages) { 11268c2ecf20Sopenharmony_ci 11278c2ecf20Sopenharmony_ci block_end_pfn = min(block_end_pfn, end_pfn); 11288c2ecf20Sopenharmony_ci 11298c2ecf20Sopenharmony_ci if (!pageblock_pfn_to_page(block_start_pfn, 11308c2ecf20Sopenharmony_ci block_end_pfn, cc->zone)) 11318c2ecf20Sopenharmony_ci continue; 11328c2ecf20Sopenharmony_ci 11338c2ecf20Sopenharmony_ci pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, 11348c2ecf20Sopenharmony_ci ISOLATE_UNEVICTABLE); 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ci if (!pfn) 11378c2ecf20Sopenharmony_ci break; 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX) 11408c2ecf20Sopenharmony_ci break; 11418c2ecf20Sopenharmony_ci } 11428c2ecf20Sopenharmony_ci 11438c2ecf20Sopenharmony_ci return pfn; 11448c2ecf20Sopenharmony_ci} 11458c2ecf20Sopenharmony_ci 11468c2ecf20Sopenharmony_ci#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 11478c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPACTION 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_cistatic bool suitable_migration_source(struct compact_control *cc, 11508c2ecf20Sopenharmony_ci struct page *page) 11518c2ecf20Sopenharmony_ci{ 11528c2ecf20Sopenharmony_ci int block_mt; 11538c2ecf20Sopenharmony_ci 11548c2ecf20Sopenharmony_ci if (pageblock_skip_persistent(page)) 11558c2ecf20Sopenharmony_ci return false; 11568c2ecf20Sopenharmony_ci 11578c2ecf20Sopenharmony_ci if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction) 11588c2ecf20Sopenharmony_ci return true; 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci block_mt = get_pageblock_migratetype(page); 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci if (cc->migratetype == MIGRATE_MOVABLE) 11638c2ecf20Sopenharmony_ci return is_migrate_movable(block_mt); 11648c2ecf20Sopenharmony_ci else 11658c2ecf20Sopenharmony_ci return block_mt == cc->migratetype; 11668c2ecf20Sopenharmony_ci} 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ci/* Returns true if the page is within a block suitable for migration to */ 11698c2ecf20Sopenharmony_cistatic bool suitable_migration_target(struct compact_control *cc, 11708c2ecf20Sopenharmony_ci struct page *page) 11718c2ecf20Sopenharmony_ci{ 11728c2ecf20Sopenharmony_ci /* If the page is a large free page, then disallow migration */ 11738c2ecf20Sopenharmony_ci if (PageBuddy(page)) { 11748c2ecf20Sopenharmony_ci /* 11758c2ecf20Sopenharmony_ci * We are checking page_order without zone->lock taken. But 11768c2ecf20Sopenharmony_ci * the only small danger is that we skip a potentially suitable 11778c2ecf20Sopenharmony_ci * pageblock, so it's not worth to check order for valid range. 11788c2ecf20Sopenharmony_ci */ 11798c2ecf20Sopenharmony_ci if (buddy_order_unsafe(page) >= pageblock_order) 11808c2ecf20Sopenharmony_ci return false; 11818c2ecf20Sopenharmony_ci } 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci if (cc->ignore_block_suitable) 11848c2ecf20Sopenharmony_ci return true; 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ci /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 11878c2ecf20Sopenharmony_ci if (is_migrate_movable(get_pageblock_migratetype(page))) 11888c2ecf20Sopenharmony_ci return true; 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_ci /* Otherwise skip the block */ 11918c2ecf20Sopenharmony_ci return false; 11928c2ecf20Sopenharmony_ci} 11938c2ecf20Sopenharmony_ci 11948c2ecf20Sopenharmony_cistatic inline unsigned int 11958c2ecf20Sopenharmony_cifreelist_scan_limit(struct compact_control *cc) 11968c2ecf20Sopenharmony_ci{ 11978c2ecf20Sopenharmony_ci unsigned short shift = BITS_PER_LONG - 1; 11988c2ecf20Sopenharmony_ci 11998c2ecf20Sopenharmony_ci return (COMPACT_CLUSTER_MAX >> min(shift, cc->fast_search_fail)) + 1; 12008c2ecf20Sopenharmony_ci} 12018c2ecf20Sopenharmony_ci 12028c2ecf20Sopenharmony_ci/* 12038c2ecf20Sopenharmony_ci * Test whether the free scanner has reached the same or lower pageblock than 12048c2ecf20Sopenharmony_ci * the migration scanner, and compaction should thus terminate. 12058c2ecf20Sopenharmony_ci */ 12068c2ecf20Sopenharmony_cistatic inline bool compact_scanners_met(struct compact_control *cc) 12078c2ecf20Sopenharmony_ci{ 12088c2ecf20Sopenharmony_ci return (cc->free_pfn >> pageblock_order) 12098c2ecf20Sopenharmony_ci <= (cc->migrate_pfn >> pageblock_order); 12108c2ecf20Sopenharmony_ci} 12118c2ecf20Sopenharmony_ci 12128c2ecf20Sopenharmony_ci/* 12138c2ecf20Sopenharmony_ci * Used when scanning for a suitable migration target which scans freelists 12148c2ecf20Sopenharmony_ci * in reverse. Reorders the list such as the unscanned pages are scanned 12158c2ecf20Sopenharmony_ci * first on the next iteration of the free scanner 12168c2ecf20Sopenharmony_ci */ 12178c2ecf20Sopenharmony_cistatic void 12188c2ecf20Sopenharmony_cimove_freelist_head(struct list_head *freelist, struct page *freepage) 12198c2ecf20Sopenharmony_ci{ 12208c2ecf20Sopenharmony_ci LIST_HEAD(sublist); 12218c2ecf20Sopenharmony_ci 12228c2ecf20Sopenharmony_ci if (!list_is_last(freelist, &freepage->lru)) { 12238c2ecf20Sopenharmony_ci list_cut_before(&sublist, freelist, &freepage->lru); 12248c2ecf20Sopenharmony_ci if (!list_empty(&sublist)) 12258c2ecf20Sopenharmony_ci list_splice_tail(&sublist, freelist); 12268c2ecf20Sopenharmony_ci } 12278c2ecf20Sopenharmony_ci} 12288c2ecf20Sopenharmony_ci 12298c2ecf20Sopenharmony_ci/* 12308c2ecf20Sopenharmony_ci * Similar to move_freelist_head except used by the migration scanner 12318c2ecf20Sopenharmony_ci * when scanning forward. It's possible for these list operations to 12328c2ecf20Sopenharmony_ci * move against each other if they search the free list exactly in 12338c2ecf20Sopenharmony_ci * lockstep. 12348c2ecf20Sopenharmony_ci */ 12358c2ecf20Sopenharmony_cistatic void 12368c2ecf20Sopenharmony_cimove_freelist_tail(struct list_head *freelist, struct page *freepage) 12378c2ecf20Sopenharmony_ci{ 12388c2ecf20Sopenharmony_ci LIST_HEAD(sublist); 12398c2ecf20Sopenharmony_ci 12408c2ecf20Sopenharmony_ci if (!list_is_first(freelist, &freepage->lru)) { 12418c2ecf20Sopenharmony_ci list_cut_position(&sublist, freelist, &freepage->lru); 12428c2ecf20Sopenharmony_ci if (!list_empty(&sublist)) 12438c2ecf20Sopenharmony_ci list_splice_tail(&sublist, freelist); 12448c2ecf20Sopenharmony_ci } 12458c2ecf20Sopenharmony_ci} 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_cistatic void 12488c2ecf20Sopenharmony_cifast_isolate_around(struct compact_control *cc, unsigned long pfn) 12498c2ecf20Sopenharmony_ci{ 12508c2ecf20Sopenharmony_ci unsigned long start_pfn, end_pfn; 12518c2ecf20Sopenharmony_ci struct page *page; 12528c2ecf20Sopenharmony_ci 12538c2ecf20Sopenharmony_ci /* Do not search around if there are enough pages already */ 12548c2ecf20Sopenharmony_ci if (cc->nr_freepages >= cc->nr_migratepages) 12558c2ecf20Sopenharmony_ci return; 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_ci /* Minimise scanning during async compaction */ 12588c2ecf20Sopenharmony_ci if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC) 12598c2ecf20Sopenharmony_ci return; 12608c2ecf20Sopenharmony_ci 12618c2ecf20Sopenharmony_ci /* Pageblock boundaries */ 12628c2ecf20Sopenharmony_ci start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn); 12638c2ecf20Sopenharmony_ci end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)); 12648c2ecf20Sopenharmony_ci 12658c2ecf20Sopenharmony_ci page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone); 12668c2ecf20Sopenharmony_ci if (!page) 12678c2ecf20Sopenharmony_ci return; 12688c2ecf20Sopenharmony_ci 12698c2ecf20Sopenharmony_ci isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false); 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_ci /* Skip this pageblock in the future as it's full or nearly full */ 12728c2ecf20Sopenharmony_ci if (cc->nr_freepages < cc->nr_migratepages) 12738c2ecf20Sopenharmony_ci set_pageblock_skip(page); 12748c2ecf20Sopenharmony_ci 12758c2ecf20Sopenharmony_ci return; 12768c2ecf20Sopenharmony_ci} 12778c2ecf20Sopenharmony_ci 12788c2ecf20Sopenharmony_ci/* Search orders in round-robin fashion */ 12798c2ecf20Sopenharmony_cistatic int next_search_order(struct compact_control *cc, int order) 12808c2ecf20Sopenharmony_ci{ 12818c2ecf20Sopenharmony_ci order--; 12828c2ecf20Sopenharmony_ci if (order < 0) 12838c2ecf20Sopenharmony_ci order = cc->order - 1; 12848c2ecf20Sopenharmony_ci 12858c2ecf20Sopenharmony_ci /* Search wrapped around? */ 12868c2ecf20Sopenharmony_ci if (order == cc->search_order) { 12878c2ecf20Sopenharmony_ci cc->search_order--; 12888c2ecf20Sopenharmony_ci if (cc->search_order < 0) 12898c2ecf20Sopenharmony_ci cc->search_order = cc->order - 1; 12908c2ecf20Sopenharmony_ci return -1; 12918c2ecf20Sopenharmony_ci } 12928c2ecf20Sopenharmony_ci 12938c2ecf20Sopenharmony_ci return order; 12948c2ecf20Sopenharmony_ci} 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_cistatic unsigned long 12978c2ecf20Sopenharmony_cifast_isolate_freepages(struct compact_control *cc) 12988c2ecf20Sopenharmony_ci{ 12998c2ecf20Sopenharmony_ci unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1); 13008c2ecf20Sopenharmony_ci unsigned int nr_scanned = 0; 13018c2ecf20Sopenharmony_ci unsigned long low_pfn, min_pfn, highest = 0; 13028c2ecf20Sopenharmony_ci unsigned long nr_isolated = 0; 13038c2ecf20Sopenharmony_ci unsigned long distance; 13048c2ecf20Sopenharmony_ci struct page *page = NULL; 13058c2ecf20Sopenharmony_ci bool scan_start = false; 13068c2ecf20Sopenharmony_ci int order; 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci /* Full compaction passes in a negative order */ 13098c2ecf20Sopenharmony_ci if (cc->order <= 0) 13108c2ecf20Sopenharmony_ci return cc->free_pfn; 13118c2ecf20Sopenharmony_ci 13128c2ecf20Sopenharmony_ci /* 13138c2ecf20Sopenharmony_ci * If starting the scan, use a deeper search and use the highest 13148c2ecf20Sopenharmony_ci * PFN found if a suitable one is not found. 13158c2ecf20Sopenharmony_ci */ 13168c2ecf20Sopenharmony_ci if (cc->free_pfn >= cc->zone->compact_init_free_pfn) { 13178c2ecf20Sopenharmony_ci limit = pageblock_nr_pages >> 1; 13188c2ecf20Sopenharmony_ci scan_start = true; 13198c2ecf20Sopenharmony_ci } 13208c2ecf20Sopenharmony_ci 13218c2ecf20Sopenharmony_ci /* 13228c2ecf20Sopenharmony_ci * Preferred point is in the top quarter of the scan space but take 13238c2ecf20Sopenharmony_ci * a pfn from the top half if the search is problematic. 13248c2ecf20Sopenharmony_ci */ 13258c2ecf20Sopenharmony_ci distance = (cc->free_pfn - cc->migrate_pfn); 13268c2ecf20Sopenharmony_ci low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2)); 13278c2ecf20Sopenharmony_ci min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1)); 13288c2ecf20Sopenharmony_ci 13298c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(min_pfn > low_pfn)) 13308c2ecf20Sopenharmony_ci low_pfn = min_pfn; 13318c2ecf20Sopenharmony_ci 13328c2ecf20Sopenharmony_ci /* 13338c2ecf20Sopenharmony_ci * Search starts from the last successful isolation order or the next 13348c2ecf20Sopenharmony_ci * order to search after a previous failure 13358c2ecf20Sopenharmony_ci */ 13368c2ecf20Sopenharmony_ci cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order); 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_ci for (order = cc->search_order; 13398c2ecf20Sopenharmony_ci !page && order >= 0; 13408c2ecf20Sopenharmony_ci order = next_search_order(cc, order)) { 13418c2ecf20Sopenharmony_ci struct free_area *area = &cc->zone->free_area[order]; 13428c2ecf20Sopenharmony_ci struct list_head *freelist; 13438c2ecf20Sopenharmony_ci struct page *freepage; 13448c2ecf20Sopenharmony_ci unsigned long flags; 13458c2ecf20Sopenharmony_ci unsigned int order_scanned = 0; 13468c2ecf20Sopenharmony_ci unsigned long high_pfn = 0; 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_ci if (!area->nr_free) 13498c2ecf20Sopenharmony_ci continue; 13508c2ecf20Sopenharmony_ci 13518c2ecf20Sopenharmony_ci spin_lock_irqsave(&cc->zone->lock, flags); 13528c2ecf20Sopenharmony_ci freelist = &area->free_list[MIGRATE_MOVABLE]; 13538c2ecf20Sopenharmony_ci list_for_each_entry_reverse(freepage, freelist, lru) { 13548c2ecf20Sopenharmony_ci unsigned long pfn; 13558c2ecf20Sopenharmony_ci 13568c2ecf20Sopenharmony_ci order_scanned++; 13578c2ecf20Sopenharmony_ci nr_scanned++; 13588c2ecf20Sopenharmony_ci pfn = page_to_pfn(freepage); 13598c2ecf20Sopenharmony_ci 13608c2ecf20Sopenharmony_ci if (pfn >= highest) 13618c2ecf20Sopenharmony_ci highest = max(pageblock_start_pfn(pfn), 13628c2ecf20Sopenharmony_ci cc->zone->zone_start_pfn); 13638c2ecf20Sopenharmony_ci 13648c2ecf20Sopenharmony_ci if (pfn >= low_pfn) { 13658c2ecf20Sopenharmony_ci cc->fast_search_fail = 0; 13668c2ecf20Sopenharmony_ci cc->search_order = order; 13678c2ecf20Sopenharmony_ci page = freepage; 13688c2ecf20Sopenharmony_ci break; 13698c2ecf20Sopenharmony_ci } 13708c2ecf20Sopenharmony_ci 13718c2ecf20Sopenharmony_ci if (pfn >= min_pfn && pfn > high_pfn) { 13728c2ecf20Sopenharmony_ci high_pfn = pfn; 13738c2ecf20Sopenharmony_ci 13748c2ecf20Sopenharmony_ci /* Shorten the scan if a candidate is found */ 13758c2ecf20Sopenharmony_ci limit >>= 1; 13768c2ecf20Sopenharmony_ci } 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_ci if (order_scanned >= limit) 13798c2ecf20Sopenharmony_ci break; 13808c2ecf20Sopenharmony_ci } 13818c2ecf20Sopenharmony_ci 13828c2ecf20Sopenharmony_ci /* Use a minimum pfn if a preferred one was not found */ 13838c2ecf20Sopenharmony_ci if (!page && high_pfn) { 13848c2ecf20Sopenharmony_ci page = pfn_to_page(high_pfn); 13858c2ecf20Sopenharmony_ci 13868c2ecf20Sopenharmony_ci /* Update freepage for the list reorder below */ 13878c2ecf20Sopenharmony_ci freepage = page; 13888c2ecf20Sopenharmony_ci } 13898c2ecf20Sopenharmony_ci 13908c2ecf20Sopenharmony_ci /* Reorder to so a future search skips recent pages */ 13918c2ecf20Sopenharmony_ci move_freelist_head(freelist, freepage); 13928c2ecf20Sopenharmony_ci 13938c2ecf20Sopenharmony_ci /* Isolate the page if available */ 13948c2ecf20Sopenharmony_ci if (page) { 13958c2ecf20Sopenharmony_ci if (__isolate_free_page(page, order)) { 13968c2ecf20Sopenharmony_ci set_page_private(page, order); 13978c2ecf20Sopenharmony_ci nr_isolated = 1 << order; 13988c2ecf20Sopenharmony_ci cc->nr_freepages += nr_isolated; 13998c2ecf20Sopenharmony_ci list_add_tail(&page->lru, &cc->freepages); 14008c2ecf20Sopenharmony_ci count_compact_events(COMPACTISOLATED, nr_isolated); 14018c2ecf20Sopenharmony_ci } else { 14028c2ecf20Sopenharmony_ci /* If isolation fails, abort the search */ 14038c2ecf20Sopenharmony_ci order = cc->search_order + 1; 14048c2ecf20Sopenharmony_ci page = NULL; 14058c2ecf20Sopenharmony_ci } 14068c2ecf20Sopenharmony_ci } 14078c2ecf20Sopenharmony_ci 14088c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cc->zone->lock, flags); 14098c2ecf20Sopenharmony_ci 14108c2ecf20Sopenharmony_ci /* 14118c2ecf20Sopenharmony_ci * Smaller scan on next order so the total scan ig related 14128c2ecf20Sopenharmony_ci * to freelist_scan_limit. 14138c2ecf20Sopenharmony_ci */ 14148c2ecf20Sopenharmony_ci if (order_scanned >= limit) 14158c2ecf20Sopenharmony_ci limit = min(1U, limit >> 1); 14168c2ecf20Sopenharmony_ci } 14178c2ecf20Sopenharmony_ci 14188c2ecf20Sopenharmony_ci if (!page) { 14198c2ecf20Sopenharmony_ci cc->fast_search_fail++; 14208c2ecf20Sopenharmony_ci if (scan_start) { 14218c2ecf20Sopenharmony_ci /* 14228c2ecf20Sopenharmony_ci * Use the highest PFN found above min. If one was 14238c2ecf20Sopenharmony_ci * not found, be pessimistic for direct compaction 14248c2ecf20Sopenharmony_ci * and use the min mark. 14258c2ecf20Sopenharmony_ci */ 14268c2ecf20Sopenharmony_ci if (highest) { 14278c2ecf20Sopenharmony_ci page = pfn_to_page(highest); 14288c2ecf20Sopenharmony_ci cc->free_pfn = highest; 14298c2ecf20Sopenharmony_ci } else { 14308c2ecf20Sopenharmony_ci if (cc->direct_compaction && pfn_valid(min_pfn)) { 14318c2ecf20Sopenharmony_ci page = pageblock_pfn_to_page(min_pfn, 14328c2ecf20Sopenharmony_ci min(pageblock_end_pfn(min_pfn), 14338c2ecf20Sopenharmony_ci zone_end_pfn(cc->zone)), 14348c2ecf20Sopenharmony_ci cc->zone); 14358c2ecf20Sopenharmony_ci cc->free_pfn = min_pfn; 14368c2ecf20Sopenharmony_ci } 14378c2ecf20Sopenharmony_ci } 14388c2ecf20Sopenharmony_ci } 14398c2ecf20Sopenharmony_ci } 14408c2ecf20Sopenharmony_ci 14418c2ecf20Sopenharmony_ci if (highest && highest >= cc->zone->compact_cached_free_pfn) { 14428c2ecf20Sopenharmony_ci highest -= pageblock_nr_pages; 14438c2ecf20Sopenharmony_ci cc->zone->compact_cached_free_pfn = highest; 14448c2ecf20Sopenharmony_ci } 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci cc->total_free_scanned += nr_scanned; 14478c2ecf20Sopenharmony_ci if (!page) 14488c2ecf20Sopenharmony_ci return cc->free_pfn; 14498c2ecf20Sopenharmony_ci 14508c2ecf20Sopenharmony_ci low_pfn = page_to_pfn(page); 14518c2ecf20Sopenharmony_ci fast_isolate_around(cc, low_pfn); 14528c2ecf20Sopenharmony_ci return low_pfn; 14538c2ecf20Sopenharmony_ci} 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci/* 14568c2ecf20Sopenharmony_ci * Based on information in the current compact_control, find blocks 14578c2ecf20Sopenharmony_ci * suitable for isolating free pages from and then isolate them. 14588c2ecf20Sopenharmony_ci */ 14598c2ecf20Sopenharmony_cistatic void isolate_freepages(struct compact_control *cc) 14608c2ecf20Sopenharmony_ci{ 14618c2ecf20Sopenharmony_ci struct zone *zone = cc->zone; 14628c2ecf20Sopenharmony_ci struct page *page; 14638c2ecf20Sopenharmony_ci unsigned long block_start_pfn; /* start of current pageblock */ 14648c2ecf20Sopenharmony_ci unsigned long isolate_start_pfn; /* exact pfn we start at */ 14658c2ecf20Sopenharmony_ci unsigned long block_end_pfn; /* end of current pageblock */ 14668c2ecf20Sopenharmony_ci unsigned long low_pfn; /* lowest pfn scanner is able to scan */ 14678c2ecf20Sopenharmony_ci struct list_head *freelist = &cc->freepages; 14688c2ecf20Sopenharmony_ci unsigned int stride; 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_ci /* Try a small search of the free lists for a candidate */ 14718c2ecf20Sopenharmony_ci isolate_start_pfn = fast_isolate_freepages(cc); 14728c2ecf20Sopenharmony_ci if (cc->nr_freepages) 14738c2ecf20Sopenharmony_ci goto splitmap; 14748c2ecf20Sopenharmony_ci 14758c2ecf20Sopenharmony_ci /* 14768c2ecf20Sopenharmony_ci * Initialise the free scanner. The starting point is where we last 14778c2ecf20Sopenharmony_ci * successfully isolated from, zone-cached value, or the end of the 14788c2ecf20Sopenharmony_ci * zone when isolating for the first time. For looping we also need 14798c2ecf20Sopenharmony_ci * this pfn aligned down to the pageblock boundary, because we do 14808c2ecf20Sopenharmony_ci * block_start_pfn -= pageblock_nr_pages in the for loop. 14818c2ecf20Sopenharmony_ci * For ending point, take care when isolating in last pageblock of a 14828c2ecf20Sopenharmony_ci * zone which ends in the middle of a pageblock. 14838c2ecf20Sopenharmony_ci * The low boundary is the end of the pageblock the migration scanner 14848c2ecf20Sopenharmony_ci * is using. 14858c2ecf20Sopenharmony_ci */ 14868c2ecf20Sopenharmony_ci isolate_start_pfn = cc->free_pfn; 14878c2ecf20Sopenharmony_ci block_start_pfn = pageblock_start_pfn(isolate_start_pfn); 14888c2ecf20Sopenharmony_ci block_end_pfn = min(block_start_pfn + pageblock_nr_pages, 14898c2ecf20Sopenharmony_ci zone_end_pfn(zone)); 14908c2ecf20Sopenharmony_ci low_pfn = pageblock_end_pfn(cc->migrate_pfn); 14918c2ecf20Sopenharmony_ci stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1; 14928c2ecf20Sopenharmony_ci 14938c2ecf20Sopenharmony_ci /* 14948c2ecf20Sopenharmony_ci * Isolate free pages until enough are available to migrate the 14958c2ecf20Sopenharmony_ci * pages on cc->migratepages. We stop searching if the migrate 14968c2ecf20Sopenharmony_ci * and free page scanners meet or enough free pages are isolated. 14978c2ecf20Sopenharmony_ci */ 14988c2ecf20Sopenharmony_ci for (; block_start_pfn >= low_pfn; 14998c2ecf20Sopenharmony_ci block_end_pfn = block_start_pfn, 15008c2ecf20Sopenharmony_ci block_start_pfn -= pageblock_nr_pages, 15018c2ecf20Sopenharmony_ci isolate_start_pfn = block_start_pfn) { 15028c2ecf20Sopenharmony_ci unsigned long nr_isolated; 15038c2ecf20Sopenharmony_ci 15048c2ecf20Sopenharmony_ci /* 15058c2ecf20Sopenharmony_ci * This can iterate a massively long zone without finding any 15068c2ecf20Sopenharmony_ci * suitable migration targets, so periodically check resched. 15078c2ecf20Sopenharmony_ci */ 15088c2ecf20Sopenharmony_ci if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))) 15098c2ecf20Sopenharmony_ci cond_resched(); 15108c2ecf20Sopenharmony_ci 15118c2ecf20Sopenharmony_ci page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn, 15128c2ecf20Sopenharmony_ci zone); 15138c2ecf20Sopenharmony_ci if (!page) 15148c2ecf20Sopenharmony_ci continue; 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci /* Check the block is suitable for migration */ 15178c2ecf20Sopenharmony_ci if (!suitable_migration_target(cc, page)) 15188c2ecf20Sopenharmony_ci continue; 15198c2ecf20Sopenharmony_ci 15208c2ecf20Sopenharmony_ci /* If isolation recently failed, do not retry */ 15218c2ecf20Sopenharmony_ci if (!isolation_suitable(cc, page)) 15228c2ecf20Sopenharmony_ci continue; 15238c2ecf20Sopenharmony_ci 15248c2ecf20Sopenharmony_ci /* Found a block suitable for isolating free pages from. */ 15258c2ecf20Sopenharmony_ci nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn, 15268c2ecf20Sopenharmony_ci block_end_pfn, freelist, stride, false); 15278c2ecf20Sopenharmony_ci 15288c2ecf20Sopenharmony_ci /* Update the skip hint if the full pageblock was scanned */ 15298c2ecf20Sopenharmony_ci if (isolate_start_pfn == block_end_pfn) 15308c2ecf20Sopenharmony_ci update_pageblock_skip(cc, page, block_start_pfn); 15318c2ecf20Sopenharmony_ci 15328c2ecf20Sopenharmony_ci /* Are enough freepages isolated? */ 15338c2ecf20Sopenharmony_ci if (cc->nr_freepages >= cc->nr_migratepages) { 15348c2ecf20Sopenharmony_ci if (isolate_start_pfn >= block_end_pfn) { 15358c2ecf20Sopenharmony_ci /* 15368c2ecf20Sopenharmony_ci * Restart at previous pageblock if more 15378c2ecf20Sopenharmony_ci * freepages can be isolated next time. 15388c2ecf20Sopenharmony_ci */ 15398c2ecf20Sopenharmony_ci isolate_start_pfn = 15408c2ecf20Sopenharmony_ci block_start_pfn - pageblock_nr_pages; 15418c2ecf20Sopenharmony_ci } 15428c2ecf20Sopenharmony_ci break; 15438c2ecf20Sopenharmony_ci } else if (isolate_start_pfn < block_end_pfn) { 15448c2ecf20Sopenharmony_ci /* 15458c2ecf20Sopenharmony_ci * If isolation failed early, do not continue 15468c2ecf20Sopenharmony_ci * needlessly. 15478c2ecf20Sopenharmony_ci */ 15488c2ecf20Sopenharmony_ci break; 15498c2ecf20Sopenharmony_ci } 15508c2ecf20Sopenharmony_ci 15518c2ecf20Sopenharmony_ci /* Adjust stride depending on isolation */ 15528c2ecf20Sopenharmony_ci if (nr_isolated) { 15538c2ecf20Sopenharmony_ci stride = 1; 15548c2ecf20Sopenharmony_ci continue; 15558c2ecf20Sopenharmony_ci } 15568c2ecf20Sopenharmony_ci stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1); 15578c2ecf20Sopenharmony_ci } 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_ci /* 15608c2ecf20Sopenharmony_ci * Record where the free scanner will restart next time. Either we 15618c2ecf20Sopenharmony_ci * broke from the loop and set isolate_start_pfn based on the last 15628c2ecf20Sopenharmony_ci * call to isolate_freepages_block(), or we met the migration scanner 15638c2ecf20Sopenharmony_ci * and the loop terminated due to isolate_start_pfn < low_pfn 15648c2ecf20Sopenharmony_ci */ 15658c2ecf20Sopenharmony_ci cc->free_pfn = isolate_start_pfn; 15668c2ecf20Sopenharmony_ci 15678c2ecf20Sopenharmony_cisplitmap: 15688c2ecf20Sopenharmony_ci /* __isolate_free_page() does not map the pages */ 15698c2ecf20Sopenharmony_ci split_map_pages(freelist); 15708c2ecf20Sopenharmony_ci} 15718c2ecf20Sopenharmony_ci 15728c2ecf20Sopenharmony_ci/* 15738c2ecf20Sopenharmony_ci * This is a migrate-callback that "allocates" freepages by taking pages 15748c2ecf20Sopenharmony_ci * from the isolated freelists in the block we are migrating to. 15758c2ecf20Sopenharmony_ci */ 15768c2ecf20Sopenharmony_cistatic struct page *compaction_alloc(struct page *migratepage, 15778c2ecf20Sopenharmony_ci unsigned long data) 15788c2ecf20Sopenharmony_ci{ 15798c2ecf20Sopenharmony_ci struct compact_control *cc = (struct compact_control *)data; 15808c2ecf20Sopenharmony_ci struct page *freepage; 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_ci if (list_empty(&cc->freepages)) { 15838c2ecf20Sopenharmony_ci isolate_freepages(cc); 15848c2ecf20Sopenharmony_ci 15858c2ecf20Sopenharmony_ci if (list_empty(&cc->freepages)) 15868c2ecf20Sopenharmony_ci return NULL; 15878c2ecf20Sopenharmony_ci } 15888c2ecf20Sopenharmony_ci 15898c2ecf20Sopenharmony_ci freepage = list_entry(cc->freepages.next, struct page, lru); 15908c2ecf20Sopenharmony_ci list_del(&freepage->lru); 15918c2ecf20Sopenharmony_ci cc->nr_freepages--; 15928c2ecf20Sopenharmony_ci 15938c2ecf20Sopenharmony_ci return freepage; 15948c2ecf20Sopenharmony_ci} 15958c2ecf20Sopenharmony_ci 15968c2ecf20Sopenharmony_ci/* 15978c2ecf20Sopenharmony_ci * This is a migrate-callback that "frees" freepages back to the isolated 15988c2ecf20Sopenharmony_ci * freelist. All pages on the freelist are from the same zone, so there is no 15998c2ecf20Sopenharmony_ci * special handling needed for NUMA. 16008c2ecf20Sopenharmony_ci */ 16018c2ecf20Sopenharmony_cistatic void compaction_free(struct page *page, unsigned long data) 16028c2ecf20Sopenharmony_ci{ 16038c2ecf20Sopenharmony_ci struct compact_control *cc = (struct compact_control *)data; 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci list_add(&page->lru, &cc->freepages); 16068c2ecf20Sopenharmony_ci cc->nr_freepages++; 16078c2ecf20Sopenharmony_ci} 16088c2ecf20Sopenharmony_ci 16098c2ecf20Sopenharmony_ci/* possible outcome of isolate_migratepages */ 16108c2ecf20Sopenharmony_citypedef enum { 16118c2ecf20Sopenharmony_ci ISOLATE_ABORT, /* Abort compaction now */ 16128c2ecf20Sopenharmony_ci ISOLATE_NONE, /* No pages isolated, continue scanning */ 16138c2ecf20Sopenharmony_ci ISOLATE_SUCCESS, /* Pages isolated, migrate */ 16148c2ecf20Sopenharmony_ci} isolate_migrate_t; 16158c2ecf20Sopenharmony_ci 16168c2ecf20Sopenharmony_ci/* 16178c2ecf20Sopenharmony_ci * Allow userspace to control policy on scanning the unevictable LRU for 16188c2ecf20Sopenharmony_ci * compactable pages. 16198c2ecf20Sopenharmony_ci */ 16208c2ecf20Sopenharmony_ci#ifdef CONFIG_PREEMPT_RT 16218c2ecf20Sopenharmony_ciint sysctl_compact_unevictable_allowed __read_mostly = 0; 16228c2ecf20Sopenharmony_ci#else 16238c2ecf20Sopenharmony_ciint sysctl_compact_unevictable_allowed __read_mostly = 1; 16248c2ecf20Sopenharmony_ci#endif 16258c2ecf20Sopenharmony_ci 16268c2ecf20Sopenharmony_cistatic inline void 16278c2ecf20Sopenharmony_ciupdate_fast_start_pfn(struct compact_control *cc, unsigned long pfn) 16288c2ecf20Sopenharmony_ci{ 16298c2ecf20Sopenharmony_ci if (cc->fast_start_pfn == ULONG_MAX) 16308c2ecf20Sopenharmony_ci return; 16318c2ecf20Sopenharmony_ci 16328c2ecf20Sopenharmony_ci if (!cc->fast_start_pfn) 16338c2ecf20Sopenharmony_ci cc->fast_start_pfn = pfn; 16348c2ecf20Sopenharmony_ci 16358c2ecf20Sopenharmony_ci cc->fast_start_pfn = min(cc->fast_start_pfn, pfn); 16368c2ecf20Sopenharmony_ci} 16378c2ecf20Sopenharmony_ci 16388c2ecf20Sopenharmony_cistatic inline unsigned long 16398c2ecf20Sopenharmony_cireinit_migrate_pfn(struct compact_control *cc) 16408c2ecf20Sopenharmony_ci{ 16418c2ecf20Sopenharmony_ci if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX) 16428c2ecf20Sopenharmony_ci return cc->migrate_pfn; 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ci cc->migrate_pfn = cc->fast_start_pfn; 16458c2ecf20Sopenharmony_ci cc->fast_start_pfn = ULONG_MAX; 16468c2ecf20Sopenharmony_ci 16478c2ecf20Sopenharmony_ci return cc->migrate_pfn; 16488c2ecf20Sopenharmony_ci} 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_ci/* 16518c2ecf20Sopenharmony_ci * Briefly search the free lists for a migration source that already has 16528c2ecf20Sopenharmony_ci * some free pages to reduce the number of pages that need migration 16538c2ecf20Sopenharmony_ci * before a pageblock is free. 16548c2ecf20Sopenharmony_ci */ 16558c2ecf20Sopenharmony_cistatic unsigned long fast_find_migrateblock(struct compact_control *cc) 16568c2ecf20Sopenharmony_ci{ 16578c2ecf20Sopenharmony_ci unsigned int limit = freelist_scan_limit(cc); 16588c2ecf20Sopenharmony_ci unsigned int nr_scanned = 0; 16598c2ecf20Sopenharmony_ci unsigned long distance; 16608c2ecf20Sopenharmony_ci unsigned long pfn = cc->migrate_pfn; 16618c2ecf20Sopenharmony_ci unsigned long high_pfn; 16628c2ecf20Sopenharmony_ci int order; 16638c2ecf20Sopenharmony_ci bool found_block = false; 16648c2ecf20Sopenharmony_ci 16658c2ecf20Sopenharmony_ci /* Skip hints are relied on to avoid repeats on the fast search */ 16668c2ecf20Sopenharmony_ci if (cc->ignore_skip_hint) 16678c2ecf20Sopenharmony_ci return pfn; 16688c2ecf20Sopenharmony_ci 16698c2ecf20Sopenharmony_ci /* 16708c2ecf20Sopenharmony_ci * If the migrate_pfn is not at the start of a zone or the start 16718c2ecf20Sopenharmony_ci * of a pageblock then assume this is a continuation of a previous 16728c2ecf20Sopenharmony_ci * scan restarted due to COMPACT_CLUSTER_MAX. 16738c2ecf20Sopenharmony_ci */ 16748c2ecf20Sopenharmony_ci if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn)) 16758c2ecf20Sopenharmony_ci return pfn; 16768c2ecf20Sopenharmony_ci 16778c2ecf20Sopenharmony_ci /* 16788c2ecf20Sopenharmony_ci * For smaller orders, just linearly scan as the number of pages 16798c2ecf20Sopenharmony_ci * to migrate should be relatively small and does not necessarily 16808c2ecf20Sopenharmony_ci * justify freeing up a large block for a small allocation. 16818c2ecf20Sopenharmony_ci */ 16828c2ecf20Sopenharmony_ci if (cc->order <= PAGE_ALLOC_COSTLY_ORDER) 16838c2ecf20Sopenharmony_ci return pfn; 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_ci /* 16868c2ecf20Sopenharmony_ci * Only allow kcompactd and direct requests for movable pages to 16878c2ecf20Sopenharmony_ci * quickly clear out a MOVABLE pageblock for allocation. This 16888c2ecf20Sopenharmony_ci * reduces the risk that a large movable pageblock is freed for 16898c2ecf20Sopenharmony_ci * an unmovable/reclaimable small allocation. 16908c2ecf20Sopenharmony_ci */ 16918c2ecf20Sopenharmony_ci if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE) 16928c2ecf20Sopenharmony_ci return pfn; 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci /* 16958c2ecf20Sopenharmony_ci * When starting the migration scanner, pick any pageblock within the 16968c2ecf20Sopenharmony_ci * first half of the search space. Otherwise try and pick a pageblock 16978c2ecf20Sopenharmony_ci * within the first eighth to reduce the chances that a migration 16988c2ecf20Sopenharmony_ci * target later becomes a source. 16998c2ecf20Sopenharmony_ci */ 17008c2ecf20Sopenharmony_ci distance = (cc->free_pfn - cc->migrate_pfn) >> 1; 17018c2ecf20Sopenharmony_ci if (cc->migrate_pfn != cc->zone->zone_start_pfn) 17028c2ecf20Sopenharmony_ci distance >>= 2; 17038c2ecf20Sopenharmony_ci high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance); 17048c2ecf20Sopenharmony_ci 17058c2ecf20Sopenharmony_ci for (order = cc->order - 1; 17068c2ecf20Sopenharmony_ci order >= PAGE_ALLOC_COSTLY_ORDER && !found_block && nr_scanned < limit; 17078c2ecf20Sopenharmony_ci order--) { 17088c2ecf20Sopenharmony_ci struct free_area *area = &cc->zone->free_area[order]; 17098c2ecf20Sopenharmony_ci struct list_head *freelist; 17108c2ecf20Sopenharmony_ci unsigned long flags; 17118c2ecf20Sopenharmony_ci struct page *freepage; 17128c2ecf20Sopenharmony_ci 17138c2ecf20Sopenharmony_ci if (!area->nr_free) 17148c2ecf20Sopenharmony_ci continue; 17158c2ecf20Sopenharmony_ci 17168c2ecf20Sopenharmony_ci spin_lock_irqsave(&cc->zone->lock, flags); 17178c2ecf20Sopenharmony_ci freelist = &area->free_list[MIGRATE_MOVABLE]; 17188c2ecf20Sopenharmony_ci list_for_each_entry(freepage, freelist, lru) { 17198c2ecf20Sopenharmony_ci unsigned long free_pfn; 17208c2ecf20Sopenharmony_ci 17218c2ecf20Sopenharmony_ci if (nr_scanned++ >= limit) { 17228c2ecf20Sopenharmony_ci move_freelist_tail(freelist, freepage); 17238c2ecf20Sopenharmony_ci break; 17248c2ecf20Sopenharmony_ci } 17258c2ecf20Sopenharmony_ci 17268c2ecf20Sopenharmony_ci free_pfn = page_to_pfn(freepage); 17278c2ecf20Sopenharmony_ci if (free_pfn < high_pfn) { 17288c2ecf20Sopenharmony_ci /* 17298c2ecf20Sopenharmony_ci * Avoid if skipped recently. Ideally it would 17308c2ecf20Sopenharmony_ci * move to the tail but even safe iteration of 17318c2ecf20Sopenharmony_ci * the list assumes an entry is deleted, not 17328c2ecf20Sopenharmony_ci * reordered. 17338c2ecf20Sopenharmony_ci */ 17348c2ecf20Sopenharmony_ci if (get_pageblock_skip(freepage)) 17358c2ecf20Sopenharmony_ci continue; 17368c2ecf20Sopenharmony_ci 17378c2ecf20Sopenharmony_ci /* Reorder to so a future search skips recent pages */ 17388c2ecf20Sopenharmony_ci move_freelist_tail(freelist, freepage); 17398c2ecf20Sopenharmony_ci 17408c2ecf20Sopenharmony_ci update_fast_start_pfn(cc, free_pfn); 17418c2ecf20Sopenharmony_ci pfn = pageblock_start_pfn(free_pfn); 17428c2ecf20Sopenharmony_ci if (pfn < cc->zone->zone_start_pfn) 17438c2ecf20Sopenharmony_ci pfn = cc->zone->zone_start_pfn; 17448c2ecf20Sopenharmony_ci cc->fast_search_fail = 0; 17458c2ecf20Sopenharmony_ci found_block = true; 17468c2ecf20Sopenharmony_ci set_pageblock_skip(freepage); 17478c2ecf20Sopenharmony_ci break; 17488c2ecf20Sopenharmony_ci } 17498c2ecf20Sopenharmony_ci } 17508c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cc->zone->lock, flags); 17518c2ecf20Sopenharmony_ci } 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci cc->total_migrate_scanned += nr_scanned; 17548c2ecf20Sopenharmony_ci 17558c2ecf20Sopenharmony_ci /* 17568c2ecf20Sopenharmony_ci * If fast scanning failed then use a cached entry for a page block 17578c2ecf20Sopenharmony_ci * that had free pages as the basis for starting a linear scan. 17588c2ecf20Sopenharmony_ci */ 17598c2ecf20Sopenharmony_ci if (!found_block) { 17608c2ecf20Sopenharmony_ci cc->fast_search_fail++; 17618c2ecf20Sopenharmony_ci pfn = reinit_migrate_pfn(cc); 17628c2ecf20Sopenharmony_ci } 17638c2ecf20Sopenharmony_ci return pfn; 17648c2ecf20Sopenharmony_ci} 17658c2ecf20Sopenharmony_ci 17668c2ecf20Sopenharmony_ci/* 17678c2ecf20Sopenharmony_ci * Isolate all pages that can be migrated from the first suitable block, 17688c2ecf20Sopenharmony_ci * starting at the block pointed to by the migrate scanner pfn within 17698c2ecf20Sopenharmony_ci * compact_control. 17708c2ecf20Sopenharmony_ci */ 17718c2ecf20Sopenharmony_cistatic isolate_migrate_t isolate_migratepages(struct compact_control *cc) 17728c2ecf20Sopenharmony_ci{ 17738c2ecf20Sopenharmony_ci unsigned long block_start_pfn; 17748c2ecf20Sopenharmony_ci unsigned long block_end_pfn; 17758c2ecf20Sopenharmony_ci unsigned long low_pfn; 17768c2ecf20Sopenharmony_ci struct page *page; 17778c2ecf20Sopenharmony_ci const isolate_mode_t isolate_mode = 17788c2ecf20Sopenharmony_ci (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) | 17798c2ecf20Sopenharmony_ci (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0); 17808c2ecf20Sopenharmony_ci bool fast_find_block; 17818c2ecf20Sopenharmony_ci 17828c2ecf20Sopenharmony_ci /* 17838c2ecf20Sopenharmony_ci * Start at where we last stopped, or beginning of the zone as 17848c2ecf20Sopenharmony_ci * initialized by compact_zone(). The first failure will use 17858c2ecf20Sopenharmony_ci * the lowest PFN as the starting point for linear scanning. 17868c2ecf20Sopenharmony_ci */ 17878c2ecf20Sopenharmony_ci low_pfn = fast_find_migrateblock(cc); 17888c2ecf20Sopenharmony_ci block_start_pfn = pageblock_start_pfn(low_pfn); 17898c2ecf20Sopenharmony_ci if (block_start_pfn < cc->zone->zone_start_pfn) 17908c2ecf20Sopenharmony_ci block_start_pfn = cc->zone->zone_start_pfn; 17918c2ecf20Sopenharmony_ci 17928c2ecf20Sopenharmony_ci /* 17938c2ecf20Sopenharmony_ci * fast_find_migrateblock marks a pageblock skipped so to avoid 17948c2ecf20Sopenharmony_ci * the isolation_suitable check below, check whether the fast 17958c2ecf20Sopenharmony_ci * search was successful. 17968c2ecf20Sopenharmony_ci */ 17978c2ecf20Sopenharmony_ci fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail; 17988c2ecf20Sopenharmony_ci 17998c2ecf20Sopenharmony_ci /* Only scan within a pageblock boundary */ 18008c2ecf20Sopenharmony_ci block_end_pfn = pageblock_end_pfn(low_pfn); 18018c2ecf20Sopenharmony_ci 18028c2ecf20Sopenharmony_ci /* 18038c2ecf20Sopenharmony_ci * Iterate over whole pageblocks until we find the first suitable. 18048c2ecf20Sopenharmony_ci * Do not cross the free scanner. 18058c2ecf20Sopenharmony_ci */ 18068c2ecf20Sopenharmony_ci for (; block_end_pfn <= cc->free_pfn; 18078c2ecf20Sopenharmony_ci fast_find_block = false, 18088c2ecf20Sopenharmony_ci low_pfn = block_end_pfn, 18098c2ecf20Sopenharmony_ci block_start_pfn = block_end_pfn, 18108c2ecf20Sopenharmony_ci block_end_pfn += pageblock_nr_pages) { 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci /* 18138c2ecf20Sopenharmony_ci * This can potentially iterate a massively long zone with 18148c2ecf20Sopenharmony_ci * many pageblocks unsuitable, so periodically check if we 18158c2ecf20Sopenharmony_ci * need to schedule. 18168c2ecf20Sopenharmony_ci */ 18178c2ecf20Sopenharmony_ci if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))) 18188c2ecf20Sopenharmony_ci cond_resched(); 18198c2ecf20Sopenharmony_ci 18208c2ecf20Sopenharmony_ci page = pageblock_pfn_to_page(block_start_pfn, 18218c2ecf20Sopenharmony_ci block_end_pfn, cc->zone); 18228c2ecf20Sopenharmony_ci if (!page) 18238c2ecf20Sopenharmony_ci continue; 18248c2ecf20Sopenharmony_ci 18258c2ecf20Sopenharmony_ci /* 18268c2ecf20Sopenharmony_ci * If isolation recently failed, do not retry. Only check the 18278c2ecf20Sopenharmony_ci * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock 18288c2ecf20Sopenharmony_ci * to be visited multiple times. Assume skip was checked 18298c2ecf20Sopenharmony_ci * before making it "skip" so other compaction instances do 18308c2ecf20Sopenharmony_ci * not scan the same block. 18318c2ecf20Sopenharmony_ci */ 18328c2ecf20Sopenharmony_ci if (IS_ALIGNED(low_pfn, pageblock_nr_pages) && 18338c2ecf20Sopenharmony_ci !fast_find_block && !isolation_suitable(cc, page)) 18348c2ecf20Sopenharmony_ci continue; 18358c2ecf20Sopenharmony_ci 18368c2ecf20Sopenharmony_ci /* 18378c2ecf20Sopenharmony_ci * For async compaction, also only scan in MOVABLE blocks 18388c2ecf20Sopenharmony_ci * without huge pages. Async compaction is optimistic to see 18398c2ecf20Sopenharmony_ci * if the minimum amount of work satisfies the allocation. 18408c2ecf20Sopenharmony_ci * The cached PFN is updated as it's possible that all 18418c2ecf20Sopenharmony_ci * remaining blocks between source and target are unsuitable 18428c2ecf20Sopenharmony_ci * and the compaction scanners fail to meet. 18438c2ecf20Sopenharmony_ci */ 18448c2ecf20Sopenharmony_ci if (!suitable_migration_source(cc, page)) { 18458c2ecf20Sopenharmony_ci update_cached_migrate(cc, block_end_pfn); 18468c2ecf20Sopenharmony_ci continue; 18478c2ecf20Sopenharmony_ci } 18488c2ecf20Sopenharmony_ci 18498c2ecf20Sopenharmony_ci /* Perform the isolation */ 18508c2ecf20Sopenharmony_ci low_pfn = isolate_migratepages_block(cc, low_pfn, 18518c2ecf20Sopenharmony_ci block_end_pfn, isolate_mode); 18528c2ecf20Sopenharmony_ci 18538c2ecf20Sopenharmony_ci if (!low_pfn) 18548c2ecf20Sopenharmony_ci return ISOLATE_ABORT; 18558c2ecf20Sopenharmony_ci 18568c2ecf20Sopenharmony_ci /* 18578c2ecf20Sopenharmony_ci * Either we isolated something and proceed with migration. Or 18588c2ecf20Sopenharmony_ci * we failed and compact_zone should decide if we should 18598c2ecf20Sopenharmony_ci * continue or not. 18608c2ecf20Sopenharmony_ci */ 18618c2ecf20Sopenharmony_ci break; 18628c2ecf20Sopenharmony_ci } 18638c2ecf20Sopenharmony_ci 18648c2ecf20Sopenharmony_ci /* Record where migration scanner will be restarted. */ 18658c2ecf20Sopenharmony_ci cc->migrate_pfn = low_pfn; 18668c2ecf20Sopenharmony_ci 18678c2ecf20Sopenharmony_ci return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; 18688c2ecf20Sopenharmony_ci} 18698c2ecf20Sopenharmony_ci 18708c2ecf20Sopenharmony_ci/* 18718c2ecf20Sopenharmony_ci * order == -1 is expected when compacting via 18728c2ecf20Sopenharmony_ci * /proc/sys/vm/compact_memory 18738c2ecf20Sopenharmony_ci */ 18748c2ecf20Sopenharmony_cistatic inline bool is_via_compact_memory(int order) 18758c2ecf20Sopenharmony_ci{ 18768c2ecf20Sopenharmony_ci return order == -1; 18778c2ecf20Sopenharmony_ci} 18788c2ecf20Sopenharmony_ci 18798c2ecf20Sopenharmony_cistatic bool kswapd_is_running(pg_data_t *pgdat) 18808c2ecf20Sopenharmony_ci{ 18818c2ecf20Sopenharmony_ci return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING); 18828c2ecf20Sopenharmony_ci} 18838c2ecf20Sopenharmony_ci 18848c2ecf20Sopenharmony_ci/* 18858c2ecf20Sopenharmony_ci * A zone's fragmentation score is the external fragmentation wrt to the 18868c2ecf20Sopenharmony_ci * COMPACTION_HPAGE_ORDER scaled by the zone's size. It returns a value 18878c2ecf20Sopenharmony_ci * in the range [0, 100]. 18888c2ecf20Sopenharmony_ci * 18898c2ecf20Sopenharmony_ci * The scaling factor ensures that proactive compaction focuses on larger 18908c2ecf20Sopenharmony_ci * zones like ZONE_NORMAL, rather than smaller, specialized zones like 18918c2ecf20Sopenharmony_ci * ZONE_DMA32. For smaller zones, the score value remains close to zero, 18928c2ecf20Sopenharmony_ci * and thus never exceeds the high threshold for proactive compaction. 18938c2ecf20Sopenharmony_ci */ 18948c2ecf20Sopenharmony_cistatic unsigned int fragmentation_score_zone(struct zone *zone) 18958c2ecf20Sopenharmony_ci{ 18968c2ecf20Sopenharmony_ci unsigned long score; 18978c2ecf20Sopenharmony_ci 18988c2ecf20Sopenharmony_ci score = zone->present_pages * 18998c2ecf20Sopenharmony_ci extfrag_for_order(zone, COMPACTION_HPAGE_ORDER); 19008c2ecf20Sopenharmony_ci return div64_ul(score, zone->zone_pgdat->node_present_pages + 1); 19018c2ecf20Sopenharmony_ci} 19028c2ecf20Sopenharmony_ci 19038c2ecf20Sopenharmony_ci/* 19048c2ecf20Sopenharmony_ci * The per-node proactive (background) compaction process is started by its 19058c2ecf20Sopenharmony_ci * corresponding kcompactd thread when the node's fragmentation score 19068c2ecf20Sopenharmony_ci * exceeds the high threshold. The compaction process remains active till 19078c2ecf20Sopenharmony_ci * the node's score falls below the low threshold, or one of the back-off 19088c2ecf20Sopenharmony_ci * conditions is met. 19098c2ecf20Sopenharmony_ci */ 19108c2ecf20Sopenharmony_cistatic unsigned int fragmentation_score_node(pg_data_t *pgdat) 19118c2ecf20Sopenharmony_ci{ 19128c2ecf20Sopenharmony_ci unsigned int score = 0; 19138c2ecf20Sopenharmony_ci int zoneid; 19148c2ecf20Sopenharmony_ci 19158c2ecf20Sopenharmony_ci for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 19168c2ecf20Sopenharmony_ci struct zone *zone; 19178c2ecf20Sopenharmony_ci 19188c2ecf20Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 19198c2ecf20Sopenharmony_ci score += fragmentation_score_zone(zone); 19208c2ecf20Sopenharmony_ci } 19218c2ecf20Sopenharmony_ci 19228c2ecf20Sopenharmony_ci return score; 19238c2ecf20Sopenharmony_ci} 19248c2ecf20Sopenharmony_ci 19258c2ecf20Sopenharmony_cistatic unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low) 19268c2ecf20Sopenharmony_ci{ 19278c2ecf20Sopenharmony_ci unsigned int wmark_low; 19288c2ecf20Sopenharmony_ci 19298c2ecf20Sopenharmony_ci /* 19308c2ecf20Sopenharmony_ci * Cap the low watermak to avoid excessive compaction 19318c2ecf20Sopenharmony_ci * activity in case a user sets the proactivess tunable 19328c2ecf20Sopenharmony_ci * close to 100 (maximum). 19338c2ecf20Sopenharmony_ci */ 19348c2ecf20Sopenharmony_ci wmark_low = max(100U - sysctl_compaction_proactiveness, 5U); 19358c2ecf20Sopenharmony_ci return low ? wmark_low : min(wmark_low + 10, 100U); 19368c2ecf20Sopenharmony_ci} 19378c2ecf20Sopenharmony_ci 19388c2ecf20Sopenharmony_cistatic bool should_proactive_compact_node(pg_data_t *pgdat) 19398c2ecf20Sopenharmony_ci{ 19408c2ecf20Sopenharmony_ci int wmark_high; 19418c2ecf20Sopenharmony_ci 19428c2ecf20Sopenharmony_ci if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat)) 19438c2ecf20Sopenharmony_ci return false; 19448c2ecf20Sopenharmony_ci 19458c2ecf20Sopenharmony_ci wmark_high = fragmentation_score_wmark(pgdat, false); 19468c2ecf20Sopenharmony_ci return fragmentation_score_node(pgdat) > wmark_high; 19478c2ecf20Sopenharmony_ci} 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_cistatic enum compact_result __compact_finished(struct compact_control *cc) 19508c2ecf20Sopenharmony_ci{ 19518c2ecf20Sopenharmony_ci unsigned int order; 19528c2ecf20Sopenharmony_ci const int migratetype = cc->migratetype; 19538c2ecf20Sopenharmony_ci int ret; 19548c2ecf20Sopenharmony_ci 19558c2ecf20Sopenharmony_ci /* Compaction run completes if the migrate and free scanner meet */ 19568c2ecf20Sopenharmony_ci if (compact_scanners_met(cc)) { 19578c2ecf20Sopenharmony_ci /* Let the next compaction start anew. */ 19588c2ecf20Sopenharmony_ci reset_cached_positions(cc->zone); 19598c2ecf20Sopenharmony_ci 19608c2ecf20Sopenharmony_ci /* 19618c2ecf20Sopenharmony_ci * Mark that the PG_migrate_skip information should be cleared 19628c2ecf20Sopenharmony_ci * by kswapd when it goes to sleep. kcompactd does not set the 19638c2ecf20Sopenharmony_ci * flag itself as the decision to be clear should be directly 19648c2ecf20Sopenharmony_ci * based on an allocation request. 19658c2ecf20Sopenharmony_ci */ 19668c2ecf20Sopenharmony_ci if (cc->direct_compaction) 19678c2ecf20Sopenharmony_ci cc->zone->compact_blockskip_flush = true; 19688c2ecf20Sopenharmony_ci 19698c2ecf20Sopenharmony_ci if (cc->whole_zone) 19708c2ecf20Sopenharmony_ci return COMPACT_COMPLETE; 19718c2ecf20Sopenharmony_ci else 19728c2ecf20Sopenharmony_ci return COMPACT_PARTIAL_SKIPPED; 19738c2ecf20Sopenharmony_ci } 19748c2ecf20Sopenharmony_ci 19758c2ecf20Sopenharmony_ci if (cc->proactive_compaction) { 19768c2ecf20Sopenharmony_ci int score, wmark_low; 19778c2ecf20Sopenharmony_ci pg_data_t *pgdat; 19788c2ecf20Sopenharmony_ci 19798c2ecf20Sopenharmony_ci pgdat = cc->zone->zone_pgdat; 19808c2ecf20Sopenharmony_ci if (kswapd_is_running(pgdat)) 19818c2ecf20Sopenharmony_ci return COMPACT_PARTIAL_SKIPPED; 19828c2ecf20Sopenharmony_ci 19838c2ecf20Sopenharmony_ci score = fragmentation_score_zone(cc->zone); 19848c2ecf20Sopenharmony_ci wmark_low = fragmentation_score_wmark(pgdat, true); 19858c2ecf20Sopenharmony_ci 19868c2ecf20Sopenharmony_ci if (score > wmark_low) 19878c2ecf20Sopenharmony_ci ret = COMPACT_CONTINUE; 19888c2ecf20Sopenharmony_ci else 19898c2ecf20Sopenharmony_ci ret = COMPACT_SUCCESS; 19908c2ecf20Sopenharmony_ci 19918c2ecf20Sopenharmony_ci goto out; 19928c2ecf20Sopenharmony_ci } 19938c2ecf20Sopenharmony_ci 19948c2ecf20Sopenharmony_ci if (is_via_compact_memory(cc->order)) 19958c2ecf20Sopenharmony_ci return COMPACT_CONTINUE; 19968c2ecf20Sopenharmony_ci 19978c2ecf20Sopenharmony_ci /* 19988c2ecf20Sopenharmony_ci * Always finish scanning a pageblock to reduce the possibility of 19998c2ecf20Sopenharmony_ci * fallbacks in the future. This is particularly important when 20008c2ecf20Sopenharmony_ci * migration source is unmovable/reclaimable but it's not worth 20018c2ecf20Sopenharmony_ci * special casing. 20028c2ecf20Sopenharmony_ci */ 20038c2ecf20Sopenharmony_ci if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages)) 20048c2ecf20Sopenharmony_ci return COMPACT_CONTINUE; 20058c2ecf20Sopenharmony_ci 20068c2ecf20Sopenharmony_ci /* Direct compactor: Is a suitable page free? */ 20078c2ecf20Sopenharmony_ci ret = COMPACT_NO_SUITABLE_PAGE; 20088c2ecf20Sopenharmony_ci for (order = cc->order; order < MAX_ORDER; order++) { 20098c2ecf20Sopenharmony_ci struct free_area *area = &cc->zone->free_area[order]; 20108c2ecf20Sopenharmony_ci bool can_steal; 20118c2ecf20Sopenharmony_ci 20128c2ecf20Sopenharmony_ci /* Job done if page is free of the right migratetype */ 20138c2ecf20Sopenharmony_ci if (!free_area_empty(area, migratetype)) 20148c2ecf20Sopenharmony_ci return COMPACT_SUCCESS; 20158c2ecf20Sopenharmony_ci 20168c2ecf20Sopenharmony_ci#ifdef CONFIG_CMA 20178c2ecf20Sopenharmony_ci /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */ 20188c2ecf20Sopenharmony_ci if (migratetype == get_cma_migratetype() && 20198c2ecf20Sopenharmony_ci !free_area_empty(area, MIGRATE_CMA)) 20208c2ecf20Sopenharmony_ci return COMPACT_SUCCESS; 20218c2ecf20Sopenharmony_ci#endif 20228c2ecf20Sopenharmony_ci /* 20238c2ecf20Sopenharmony_ci * Job done if allocation would steal freepages from 20248c2ecf20Sopenharmony_ci * other migratetype buddy lists. 20258c2ecf20Sopenharmony_ci */ 20268c2ecf20Sopenharmony_ci if (find_suitable_fallback(area, order, migratetype, 20278c2ecf20Sopenharmony_ci true, &can_steal) != -1) { 20288c2ecf20Sopenharmony_ci 20298c2ecf20Sopenharmony_ci /* movable pages are OK in any pageblock */ 20308c2ecf20Sopenharmony_ci if (migratetype == MIGRATE_MOVABLE) 20318c2ecf20Sopenharmony_ci return COMPACT_SUCCESS; 20328c2ecf20Sopenharmony_ci 20338c2ecf20Sopenharmony_ci /* 20348c2ecf20Sopenharmony_ci * We are stealing for a non-movable allocation. Make 20358c2ecf20Sopenharmony_ci * sure we finish compacting the current pageblock 20368c2ecf20Sopenharmony_ci * first so it is as free as possible and we won't 20378c2ecf20Sopenharmony_ci * have to steal another one soon. This only applies 20388c2ecf20Sopenharmony_ci * to sync compaction, as async compaction operates 20398c2ecf20Sopenharmony_ci * on pageblocks of the same migratetype. 20408c2ecf20Sopenharmony_ci */ 20418c2ecf20Sopenharmony_ci if (cc->mode == MIGRATE_ASYNC || 20428c2ecf20Sopenharmony_ci IS_ALIGNED(cc->migrate_pfn, 20438c2ecf20Sopenharmony_ci pageblock_nr_pages)) { 20448c2ecf20Sopenharmony_ci return COMPACT_SUCCESS; 20458c2ecf20Sopenharmony_ci } 20468c2ecf20Sopenharmony_ci 20478c2ecf20Sopenharmony_ci ret = COMPACT_CONTINUE; 20488c2ecf20Sopenharmony_ci break; 20498c2ecf20Sopenharmony_ci } 20508c2ecf20Sopenharmony_ci } 20518c2ecf20Sopenharmony_ci 20528c2ecf20Sopenharmony_ciout: 20538c2ecf20Sopenharmony_ci if (cc->contended || fatal_signal_pending(current)) 20548c2ecf20Sopenharmony_ci ret = COMPACT_CONTENDED; 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_ci return ret; 20578c2ecf20Sopenharmony_ci} 20588c2ecf20Sopenharmony_ci 20598c2ecf20Sopenharmony_cistatic enum compact_result compact_finished(struct compact_control *cc) 20608c2ecf20Sopenharmony_ci{ 20618c2ecf20Sopenharmony_ci int ret; 20628c2ecf20Sopenharmony_ci 20638c2ecf20Sopenharmony_ci ret = __compact_finished(cc); 20648c2ecf20Sopenharmony_ci trace_mm_compaction_finished(cc->zone, cc->order, ret); 20658c2ecf20Sopenharmony_ci if (ret == COMPACT_NO_SUITABLE_PAGE) 20668c2ecf20Sopenharmony_ci ret = COMPACT_CONTINUE; 20678c2ecf20Sopenharmony_ci 20688c2ecf20Sopenharmony_ci return ret; 20698c2ecf20Sopenharmony_ci} 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci/* 20728c2ecf20Sopenharmony_ci * compaction_suitable: Is this suitable to run compaction on this zone now? 20738c2ecf20Sopenharmony_ci * Returns 20748c2ecf20Sopenharmony_ci * COMPACT_SKIPPED - If there are too few free pages for compaction 20758c2ecf20Sopenharmony_ci * COMPACT_SUCCESS - If the allocation would succeed without compaction 20768c2ecf20Sopenharmony_ci * COMPACT_CONTINUE - If compaction should run now 20778c2ecf20Sopenharmony_ci */ 20788c2ecf20Sopenharmony_cistatic enum compact_result __compaction_suitable(struct zone *zone, int order, 20798c2ecf20Sopenharmony_ci unsigned int alloc_flags, 20808c2ecf20Sopenharmony_ci int highest_zoneidx, 20818c2ecf20Sopenharmony_ci unsigned long wmark_target) 20828c2ecf20Sopenharmony_ci{ 20838c2ecf20Sopenharmony_ci unsigned long watermark; 20848c2ecf20Sopenharmony_ci 20858c2ecf20Sopenharmony_ci if (is_via_compact_memory(order)) 20868c2ecf20Sopenharmony_ci return COMPACT_CONTINUE; 20878c2ecf20Sopenharmony_ci 20888c2ecf20Sopenharmony_ci watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK); 20898c2ecf20Sopenharmony_ci /* 20908c2ecf20Sopenharmony_ci * If watermarks for high-order allocation are already met, there 20918c2ecf20Sopenharmony_ci * should be no need for compaction at all. 20928c2ecf20Sopenharmony_ci */ 20938c2ecf20Sopenharmony_ci if (zone_watermark_ok(zone, order, watermark, highest_zoneidx, 20948c2ecf20Sopenharmony_ci alloc_flags)) 20958c2ecf20Sopenharmony_ci return COMPACT_SUCCESS; 20968c2ecf20Sopenharmony_ci 20978c2ecf20Sopenharmony_ci /* 20988c2ecf20Sopenharmony_ci * Watermarks for order-0 must be met for compaction to be able to 20998c2ecf20Sopenharmony_ci * isolate free pages for migration targets. This means that the 21008c2ecf20Sopenharmony_ci * watermark and alloc_flags have to match, or be more pessimistic than 21018c2ecf20Sopenharmony_ci * the check in __isolate_free_page(). We don't use the direct 21028c2ecf20Sopenharmony_ci * compactor's alloc_flags, as they are not relevant for freepage 21038c2ecf20Sopenharmony_ci * isolation. We however do use the direct compactor's highest_zoneidx 21048c2ecf20Sopenharmony_ci * to skip over zones where lowmem reserves would prevent allocation 21058c2ecf20Sopenharmony_ci * even if compaction succeeds. 21068c2ecf20Sopenharmony_ci * For costly orders, we require low watermark instead of min for 21078c2ecf20Sopenharmony_ci * compaction to proceed to increase its chances. 21088c2ecf20Sopenharmony_ci * ALLOC_CMA is used, as pages in CMA pageblocks are considered 21098c2ecf20Sopenharmony_ci * suitable migration targets 21108c2ecf20Sopenharmony_ci */ 21118c2ecf20Sopenharmony_ci watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? 21128c2ecf20Sopenharmony_ci low_wmark_pages(zone) : min_wmark_pages(zone); 21138c2ecf20Sopenharmony_ci watermark += compact_gap(order); 21148c2ecf20Sopenharmony_ci if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx, 21158c2ecf20Sopenharmony_ci ALLOC_CMA, wmark_target)) 21168c2ecf20Sopenharmony_ci return COMPACT_SKIPPED; 21178c2ecf20Sopenharmony_ci 21188c2ecf20Sopenharmony_ci return COMPACT_CONTINUE; 21198c2ecf20Sopenharmony_ci} 21208c2ecf20Sopenharmony_ci 21218c2ecf20Sopenharmony_cienum compact_result compaction_suitable(struct zone *zone, int order, 21228c2ecf20Sopenharmony_ci unsigned int alloc_flags, 21238c2ecf20Sopenharmony_ci int highest_zoneidx) 21248c2ecf20Sopenharmony_ci{ 21258c2ecf20Sopenharmony_ci enum compact_result ret; 21268c2ecf20Sopenharmony_ci int fragindex; 21278c2ecf20Sopenharmony_ci 21288c2ecf20Sopenharmony_ci ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx, 21298c2ecf20Sopenharmony_ci zone_page_state(zone, NR_FREE_PAGES)); 21308c2ecf20Sopenharmony_ci /* 21318c2ecf20Sopenharmony_ci * fragmentation index determines if allocation failures are due to 21328c2ecf20Sopenharmony_ci * low memory or external fragmentation 21338c2ecf20Sopenharmony_ci * 21348c2ecf20Sopenharmony_ci * index of -1000 would imply allocations might succeed depending on 21358c2ecf20Sopenharmony_ci * watermarks, but we already failed the high-order watermark check 21368c2ecf20Sopenharmony_ci * index towards 0 implies failure is due to lack of memory 21378c2ecf20Sopenharmony_ci * index towards 1000 implies failure is due to fragmentation 21388c2ecf20Sopenharmony_ci * 21398c2ecf20Sopenharmony_ci * Only compact if a failure would be due to fragmentation. Also 21408c2ecf20Sopenharmony_ci * ignore fragindex for non-costly orders where the alternative to 21418c2ecf20Sopenharmony_ci * a successful reclaim/compaction is OOM. Fragindex and the 21428c2ecf20Sopenharmony_ci * vm.extfrag_threshold sysctl is meant as a heuristic to prevent 21438c2ecf20Sopenharmony_ci * excessive compaction for costly orders, but it should not be at the 21448c2ecf20Sopenharmony_ci * expense of system stability. 21458c2ecf20Sopenharmony_ci */ 21468c2ecf20Sopenharmony_ci if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) { 21478c2ecf20Sopenharmony_ci fragindex = fragmentation_index(zone, order); 21488c2ecf20Sopenharmony_ci if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) 21498c2ecf20Sopenharmony_ci ret = COMPACT_NOT_SUITABLE_ZONE; 21508c2ecf20Sopenharmony_ci } 21518c2ecf20Sopenharmony_ci 21528c2ecf20Sopenharmony_ci trace_mm_compaction_suitable(zone, order, ret); 21538c2ecf20Sopenharmony_ci if (ret == COMPACT_NOT_SUITABLE_ZONE) 21548c2ecf20Sopenharmony_ci ret = COMPACT_SKIPPED; 21558c2ecf20Sopenharmony_ci 21568c2ecf20Sopenharmony_ci return ret; 21578c2ecf20Sopenharmony_ci} 21588c2ecf20Sopenharmony_ci 21598c2ecf20Sopenharmony_cibool compaction_zonelist_suitable(struct alloc_context *ac, int order, 21608c2ecf20Sopenharmony_ci int alloc_flags) 21618c2ecf20Sopenharmony_ci{ 21628c2ecf20Sopenharmony_ci struct zone *zone; 21638c2ecf20Sopenharmony_ci struct zoneref *z; 21648c2ecf20Sopenharmony_ci 21658c2ecf20Sopenharmony_ci /* 21668c2ecf20Sopenharmony_ci * Make sure at least one zone would pass __compaction_suitable if we continue 21678c2ecf20Sopenharmony_ci * retrying the reclaim. 21688c2ecf20Sopenharmony_ci */ 21698c2ecf20Sopenharmony_ci for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, 21708c2ecf20Sopenharmony_ci ac->highest_zoneidx, ac->nodemask) { 21718c2ecf20Sopenharmony_ci unsigned long available; 21728c2ecf20Sopenharmony_ci enum compact_result compact_result; 21738c2ecf20Sopenharmony_ci 21748c2ecf20Sopenharmony_ci /* 21758c2ecf20Sopenharmony_ci * Do not consider all the reclaimable memory because we do not 21768c2ecf20Sopenharmony_ci * want to trash just for a single high order allocation which 21778c2ecf20Sopenharmony_ci * is even not guaranteed to appear even if __compaction_suitable 21788c2ecf20Sopenharmony_ci * is happy about the watermark check. 21798c2ecf20Sopenharmony_ci */ 21808c2ecf20Sopenharmony_ci available = zone_reclaimable_pages(zone) / order; 21818c2ecf20Sopenharmony_ci available += zone_page_state_snapshot(zone, NR_FREE_PAGES); 21828c2ecf20Sopenharmony_ci compact_result = __compaction_suitable(zone, order, alloc_flags, 21838c2ecf20Sopenharmony_ci ac->highest_zoneidx, available); 21848c2ecf20Sopenharmony_ci if (compact_result != COMPACT_SKIPPED) 21858c2ecf20Sopenharmony_ci return true; 21868c2ecf20Sopenharmony_ci } 21878c2ecf20Sopenharmony_ci 21888c2ecf20Sopenharmony_ci return false; 21898c2ecf20Sopenharmony_ci} 21908c2ecf20Sopenharmony_ci 21918c2ecf20Sopenharmony_cistatic enum compact_result 21928c2ecf20Sopenharmony_cicompact_zone(struct compact_control *cc, struct capture_control *capc) 21938c2ecf20Sopenharmony_ci{ 21948c2ecf20Sopenharmony_ci enum compact_result ret; 21958c2ecf20Sopenharmony_ci unsigned long start_pfn = cc->zone->zone_start_pfn; 21968c2ecf20Sopenharmony_ci unsigned long end_pfn = zone_end_pfn(cc->zone); 21978c2ecf20Sopenharmony_ci unsigned long last_migrated_pfn; 21988c2ecf20Sopenharmony_ci const bool sync = cc->mode != MIGRATE_ASYNC; 21998c2ecf20Sopenharmony_ci bool update_cached; 22008c2ecf20Sopenharmony_ci 22018c2ecf20Sopenharmony_ci /* 22028c2ecf20Sopenharmony_ci * These counters track activities during zone compaction. Initialize 22038c2ecf20Sopenharmony_ci * them before compacting a new zone. 22048c2ecf20Sopenharmony_ci */ 22058c2ecf20Sopenharmony_ci cc->total_migrate_scanned = 0; 22068c2ecf20Sopenharmony_ci cc->total_free_scanned = 0; 22078c2ecf20Sopenharmony_ci cc->nr_migratepages = 0; 22088c2ecf20Sopenharmony_ci cc->nr_freepages = 0; 22098c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cc->freepages); 22108c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cc->migratepages); 22118c2ecf20Sopenharmony_ci 22128c2ecf20Sopenharmony_ci cc->migratetype = gfp_migratetype(cc->gfp_mask); 22138c2ecf20Sopenharmony_ci ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags, 22148c2ecf20Sopenharmony_ci cc->highest_zoneidx); 22158c2ecf20Sopenharmony_ci /* Compaction is likely to fail */ 22168c2ecf20Sopenharmony_ci if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED) 22178c2ecf20Sopenharmony_ci return ret; 22188c2ecf20Sopenharmony_ci 22198c2ecf20Sopenharmony_ci /* huh, compaction_suitable is returning something unexpected */ 22208c2ecf20Sopenharmony_ci VM_BUG_ON(ret != COMPACT_CONTINUE); 22218c2ecf20Sopenharmony_ci 22228c2ecf20Sopenharmony_ci /* 22238c2ecf20Sopenharmony_ci * Clear pageblock skip if there were failures recently and compaction 22248c2ecf20Sopenharmony_ci * is about to be retried after being deferred. 22258c2ecf20Sopenharmony_ci */ 22268c2ecf20Sopenharmony_ci if (compaction_restarting(cc->zone, cc->order)) 22278c2ecf20Sopenharmony_ci __reset_isolation_suitable(cc->zone); 22288c2ecf20Sopenharmony_ci 22298c2ecf20Sopenharmony_ci /* 22308c2ecf20Sopenharmony_ci * Setup to move all movable pages to the end of the zone. Used cached 22318c2ecf20Sopenharmony_ci * information on where the scanners should start (unless we explicitly 22328c2ecf20Sopenharmony_ci * want to compact the whole zone), but check that it is initialised 22338c2ecf20Sopenharmony_ci * by ensuring the values are within zone boundaries. 22348c2ecf20Sopenharmony_ci */ 22358c2ecf20Sopenharmony_ci cc->fast_start_pfn = 0; 22368c2ecf20Sopenharmony_ci if (cc->whole_zone) { 22378c2ecf20Sopenharmony_ci cc->migrate_pfn = start_pfn; 22388c2ecf20Sopenharmony_ci cc->free_pfn = pageblock_start_pfn(end_pfn - 1); 22398c2ecf20Sopenharmony_ci } else { 22408c2ecf20Sopenharmony_ci cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync]; 22418c2ecf20Sopenharmony_ci cc->free_pfn = cc->zone->compact_cached_free_pfn; 22428c2ecf20Sopenharmony_ci if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { 22438c2ecf20Sopenharmony_ci cc->free_pfn = pageblock_start_pfn(end_pfn - 1); 22448c2ecf20Sopenharmony_ci cc->zone->compact_cached_free_pfn = cc->free_pfn; 22458c2ecf20Sopenharmony_ci } 22468c2ecf20Sopenharmony_ci if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { 22478c2ecf20Sopenharmony_ci cc->migrate_pfn = start_pfn; 22488c2ecf20Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; 22498c2ecf20Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; 22508c2ecf20Sopenharmony_ci } 22518c2ecf20Sopenharmony_ci 22528c2ecf20Sopenharmony_ci if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn) 22538c2ecf20Sopenharmony_ci cc->whole_zone = true; 22548c2ecf20Sopenharmony_ci } 22558c2ecf20Sopenharmony_ci 22568c2ecf20Sopenharmony_ci last_migrated_pfn = 0; 22578c2ecf20Sopenharmony_ci 22588c2ecf20Sopenharmony_ci /* 22598c2ecf20Sopenharmony_ci * Migrate has separate cached PFNs for ASYNC and SYNC* migration on 22608c2ecf20Sopenharmony_ci * the basis that some migrations will fail in ASYNC mode. However, 22618c2ecf20Sopenharmony_ci * if the cached PFNs match and pageblocks are skipped due to having 22628c2ecf20Sopenharmony_ci * no isolation candidates, then the sync state does not matter. 22638c2ecf20Sopenharmony_ci * Until a pageblock with isolation candidates is found, keep the 22648c2ecf20Sopenharmony_ci * cached PFNs in sync to avoid revisiting the same blocks. 22658c2ecf20Sopenharmony_ci */ 22668c2ecf20Sopenharmony_ci update_cached = !sync && 22678c2ecf20Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1]; 22688c2ecf20Sopenharmony_ci 22698c2ecf20Sopenharmony_ci trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, 22708c2ecf20Sopenharmony_ci cc->free_pfn, end_pfn, sync); 22718c2ecf20Sopenharmony_ci 22728c2ecf20Sopenharmony_ci migrate_prep_local(); 22738c2ecf20Sopenharmony_ci 22748c2ecf20Sopenharmony_ci while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) { 22758c2ecf20Sopenharmony_ci int err; 22768c2ecf20Sopenharmony_ci unsigned long start_pfn = cc->migrate_pfn; 22778c2ecf20Sopenharmony_ci 22788c2ecf20Sopenharmony_ci /* 22798c2ecf20Sopenharmony_ci * Avoid multiple rescans which can happen if a page cannot be 22808c2ecf20Sopenharmony_ci * isolated (dirty/writeback in async mode) or if the migrated 22818c2ecf20Sopenharmony_ci * pages are being allocated before the pageblock is cleared. 22828c2ecf20Sopenharmony_ci * The first rescan will capture the entire pageblock for 22838c2ecf20Sopenharmony_ci * migration. If it fails, it'll be marked skip and scanning 22848c2ecf20Sopenharmony_ci * will proceed as normal. 22858c2ecf20Sopenharmony_ci */ 22868c2ecf20Sopenharmony_ci cc->rescan = false; 22878c2ecf20Sopenharmony_ci if (pageblock_start_pfn(last_migrated_pfn) == 22888c2ecf20Sopenharmony_ci pageblock_start_pfn(start_pfn)) { 22898c2ecf20Sopenharmony_ci cc->rescan = true; 22908c2ecf20Sopenharmony_ci } 22918c2ecf20Sopenharmony_ci 22928c2ecf20Sopenharmony_ci switch (isolate_migratepages(cc)) { 22938c2ecf20Sopenharmony_ci case ISOLATE_ABORT: 22948c2ecf20Sopenharmony_ci ret = COMPACT_CONTENDED; 22958c2ecf20Sopenharmony_ci putback_movable_pages(&cc->migratepages); 22968c2ecf20Sopenharmony_ci cc->nr_migratepages = 0; 22978c2ecf20Sopenharmony_ci goto out; 22988c2ecf20Sopenharmony_ci case ISOLATE_NONE: 22998c2ecf20Sopenharmony_ci if (update_cached) { 23008c2ecf20Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[1] = 23018c2ecf20Sopenharmony_ci cc->zone->compact_cached_migrate_pfn[0]; 23028c2ecf20Sopenharmony_ci } 23038c2ecf20Sopenharmony_ci 23048c2ecf20Sopenharmony_ci /* 23058c2ecf20Sopenharmony_ci * We haven't isolated and migrated anything, but 23068c2ecf20Sopenharmony_ci * there might still be unflushed migrations from 23078c2ecf20Sopenharmony_ci * previous cc->order aligned block. 23088c2ecf20Sopenharmony_ci */ 23098c2ecf20Sopenharmony_ci goto check_drain; 23108c2ecf20Sopenharmony_ci case ISOLATE_SUCCESS: 23118c2ecf20Sopenharmony_ci update_cached = false; 23128c2ecf20Sopenharmony_ci last_migrated_pfn = start_pfn; 23138c2ecf20Sopenharmony_ci ; 23148c2ecf20Sopenharmony_ci } 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_ci err = migrate_pages(&cc->migratepages, compaction_alloc, 23178c2ecf20Sopenharmony_ci compaction_free, (unsigned long)cc, cc->mode, 23188c2ecf20Sopenharmony_ci MR_COMPACTION); 23198c2ecf20Sopenharmony_ci 23208c2ecf20Sopenharmony_ci trace_mm_compaction_migratepages(cc->nr_migratepages, err, 23218c2ecf20Sopenharmony_ci &cc->migratepages); 23228c2ecf20Sopenharmony_ci 23238c2ecf20Sopenharmony_ci /* All pages were either migrated or will be released */ 23248c2ecf20Sopenharmony_ci cc->nr_migratepages = 0; 23258c2ecf20Sopenharmony_ci if (err) { 23268c2ecf20Sopenharmony_ci putback_movable_pages(&cc->migratepages); 23278c2ecf20Sopenharmony_ci /* 23288c2ecf20Sopenharmony_ci * migrate_pages() may return -ENOMEM when scanners meet 23298c2ecf20Sopenharmony_ci * and we want compact_finished() to detect it 23308c2ecf20Sopenharmony_ci */ 23318c2ecf20Sopenharmony_ci if (err == -ENOMEM && !compact_scanners_met(cc)) { 23328c2ecf20Sopenharmony_ci ret = COMPACT_CONTENDED; 23338c2ecf20Sopenharmony_ci goto out; 23348c2ecf20Sopenharmony_ci } 23358c2ecf20Sopenharmony_ci /* 23368c2ecf20Sopenharmony_ci * We failed to migrate at least one page in the current 23378c2ecf20Sopenharmony_ci * order-aligned block, so skip the rest of it. 23388c2ecf20Sopenharmony_ci */ 23398c2ecf20Sopenharmony_ci if (cc->direct_compaction && 23408c2ecf20Sopenharmony_ci (cc->mode == MIGRATE_ASYNC)) { 23418c2ecf20Sopenharmony_ci cc->migrate_pfn = block_end_pfn( 23428c2ecf20Sopenharmony_ci cc->migrate_pfn - 1, cc->order); 23438c2ecf20Sopenharmony_ci /* Draining pcplists is useless in this case */ 23448c2ecf20Sopenharmony_ci last_migrated_pfn = 0; 23458c2ecf20Sopenharmony_ci } 23468c2ecf20Sopenharmony_ci } 23478c2ecf20Sopenharmony_ci 23488c2ecf20Sopenharmony_cicheck_drain: 23498c2ecf20Sopenharmony_ci /* 23508c2ecf20Sopenharmony_ci * Has the migration scanner moved away from the previous 23518c2ecf20Sopenharmony_ci * cc->order aligned block where we migrated from? If yes, 23528c2ecf20Sopenharmony_ci * flush the pages that were freed, so that they can merge and 23538c2ecf20Sopenharmony_ci * compact_finished() can detect immediately if allocation 23548c2ecf20Sopenharmony_ci * would succeed. 23558c2ecf20Sopenharmony_ci */ 23568c2ecf20Sopenharmony_ci if (cc->order > 0 && last_migrated_pfn) { 23578c2ecf20Sopenharmony_ci unsigned long current_block_start = 23588c2ecf20Sopenharmony_ci block_start_pfn(cc->migrate_pfn, cc->order); 23598c2ecf20Sopenharmony_ci 23608c2ecf20Sopenharmony_ci if (last_migrated_pfn < current_block_start) { 23618c2ecf20Sopenharmony_ci lru_add_drain_cpu_zone(cc->zone); 23628c2ecf20Sopenharmony_ci /* No more flushing until we migrate again */ 23638c2ecf20Sopenharmony_ci last_migrated_pfn = 0; 23648c2ecf20Sopenharmony_ci } 23658c2ecf20Sopenharmony_ci } 23668c2ecf20Sopenharmony_ci 23678c2ecf20Sopenharmony_ci /* Stop if a page has been captured */ 23688c2ecf20Sopenharmony_ci if (capc && capc->page) { 23698c2ecf20Sopenharmony_ci ret = COMPACT_SUCCESS; 23708c2ecf20Sopenharmony_ci break; 23718c2ecf20Sopenharmony_ci } 23728c2ecf20Sopenharmony_ci } 23738c2ecf20Sopenharmony_ci 23748c2ecf20Sopenharmony_ciout: 23758c2ecf20Sopenharmony_ci /* 23768c2ecf20Sopenharmony_ci * Release free pages and update where the free scanner should restart, 23778c2ecf20Sopenharmony_ci * so we don't leave any returned pages behind in the next attempt. 23788c2ecf20Sopenharmony_ci */ 23798c2ecf20Sopenharmony_ci if (cc->nr_freepages > 0) { 23808c2ecf20Sopenharmony_ci unsigned long free_pfn = release_freepages(&cc->freepages); 23818c2ecf20Sopenharmony_ci 23828c2ecf20Sopenharmony_ci cc->nr_freepages = 0; 23838c2ecf20Sopenharmony_ci VM_BUG_ON(free_pfn == 0); 23848c2ecf20Sopenharmony_ci /* The cached pfn is always the first in a pageblock */ 23858c2ecf20Sopenharmony_ci free_pfn = pageblock_start_pfn(free_pfn); 23868c2ecf20Sopenharmony_ci /* 23878c2ecf20Sopenharmony_ci * Only go back, not forward. The cached pfn might have been 23888c2ecf20Sopenharmony_ci * already reset to zone end in compact_finished() 23898c2ecf20Sopenharmony_ci */ 23908c2ecf20Sopenharmony_ci if (free_pfn > cc->zone->compact_cached_free_pfn) 23918c2ecf20Sopenharmony_ci cc->zone->compact_cached_free_pfn = free_pfn; 23928c2ecf20Sopenharmony_ci } 23938c2ecf20Sopenharmony_ci 23948c2ecf20Sopenharmony_ci count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned); 23958c2ecf20Sopenharmony_ci count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned); 23968c2ecf20Sopenharmony_ci 23978c2ecf20Sopenharmony_ci trace_mm_compaction_end(start_pfn, cc->migrate_pfn, 23988c2ecf20Sopenharmony_ci cc->free_pfn, end_pfn, sync, ret); 23998c2ecf20Sopenharmony_ci 24008c2ecf20Sopenharmony_ci return ret; 24018c2ecf20Sopenharmony_ci} 24028c2ecf20Sopenharmony_ci 24038c2ecf20Sopenharmony_cistatic enum compact_result compact_zone_order(struct zone *zone, int order, 24048c2ecf20Sopenharmony_ci gfp_t gfp_mask, enum compact_priority prio, 24058c2ecf20Sopenharmony_ci unsigned int alloc_flags, int highest_zoneidx, 24068c2ecf20Sopenharmony_ci struct page **capture) 24078c2ecf20Sopenharmony_ci{ 24088c2ecf20Sopenharmony_ci enum compact_result ret; 24098c2ecf20Sopenharmony_ci struct compact_control cc = { 24108c2ecf20Sopenharmony_ci .order = order, 24118c2ecf20Sopenharmony_ci .search_order = order, 24128c2ecf20Sopenharmony_ci .gfp_mask = gfp_mask, 24138c2ecf20Sopenharmony_ci .zone = zone, 24148c2ecf20Sopenharmony_ci .mode = (prio == COMPACT_PRIO_ASYNC) ? 24158c2ecf20Sopenharmony_ci MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT, 24168c2ecf20Sopenharmony_ci .alloc_flags = alloc_flags, 24178c2ecf20Sopenharmony_ci .highest_zoneidx = highest_zoneidx, 24188c2ecf20Sopenharmony_ci .direct_compaction = true, 24198c2ecf20Sopenharmony_ci .whole_zone = (prio == MIN_COMPACT_PRIORITY), 24208c2ecf20Sopenharmony_ci .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY), 24218c2ecf20Sopenharmony_ci .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY) 24228c2ecf20Sopenharmony_ci }; 24238c2ecf20Sopenharmony_ci struct capture_control capc = { 24248c2ecf20Sopenharmony_ci .cc = &cc, 24258c2ecf20Sopenharmony_ci .page = NULL, 24268c2ecf20Sopenharmony_ci }; 24278c2ecf20Sopenharmony_ci 24288c2ecf20Sopenharmony_ci /* 24298c2ecf20Sopenharmony_ci * Make sure the structs are really initialized before we expose the 24308c2ecf20Sopenharmony_ci * capture control, in case we are interrupted and the interrupt handler 24318c2ecf20Sopenharmony_ci * frees a page. 24328c2ecf20Sopenharmony_ci */ 24338c2ecf20Sopenharmony_ci barrier(); 24348c2ecf20Sopenharmony_ci WRITE_ONCE(current->capture_control, &capc); 24358c2ecf20Sopenharmony_ci 24368c2ecf20Sopenharmony_ci ret = compact_zone(&cc, &capc); 24378c2ecf20Sopenharmony_ci 24388c2ecf20Sopenharmony_ci VM_BUG_ON(!list_empty(&cc.freepages)); 24398c2ecf20Sopenharmony_ci VM_BUG_ON(!list_empty(&cc.migratepages)); 24408c2ecf20Sopenharmony_ci 24418c2ecf20Sopenharmony_ci /* 24428c2ecf20Sopenharmony_ci * Make sure we hide capture control first before we read the captured 24438c2ecf20Sopenharmony_ci * page pointer, otherwise an interrupt could free and capture a page 24448c2ecf20Sopenharmony_ci * and we would leak it. 24458c2ecf20Sopenharmony_ci */ 24468c2ecf20Sopenharmony_ci WRITE_ONCE(current->capture_control, NULL); 24478c2ecf20Sopenharmony_ci *capture = READ_ONCE(capc.page); 24488c2ecf20Sopenharmony_ci 24498c2ecf20Sopenharmony_ci return ret; 24508c2ecf20Sopenharmony_ci} 24518c2ecf20Sopenharmony_ci 24528c2ecf20Sopenharmony_ciint sysctl_extfrag_threshold = 500; 24538c2ecf20Sopenharmony_ci 24548c2ecf20Sopenharmony_ci/** 24558c2ecf20Sopenharmony_ci * try_to_compact_pages - Direct compact to satisfy a high-order allocation 24568c2ecf20Sopenharmony_ci * @gfp_mask: The GFP mask of the current allocation 24578c2ecf20Sopenharmony_ci * @order: The order of the current allocation 24588c2ecf20Sopenharmony_ci * @alloc_flags: The allocation flags of the current allocation 24598c2ecf20Sopenharmony_ci * @ac: The context of current allocation 24608c2ecf20Sopenharmony_ci * @prio: Determines how hard direct compaction should try to succeed 24618c2ecf20Sopenharmony_ci * @capture: Pointer to free page created by compaction will be stored here 24628c2ecf20Sopenharmony_ci * 24638c2ecf20Sopenharmony_ci * This is the main entry point for direct page compaction. 24648c2ecf20Sopenharmony_ci */ 24658c2ecf20Sopenharmony_cienum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, 24668c2ecf20Sopenharmony_ci unsigned int alloc_flags, const struct alloc_context *ac, 24678c2ecf20Sopenharmony_ci enum compact_priority prio, struct page **capture) 24688c2ecf20Sopenharmony_ci{ 24698c2ecf20Sopenharmony_ci int may_perform_io = gfp_mask & __GFP_IO; 24708c2ecf20Sopenharmony_ci struct zoneref *z; 24718c2ecf20Sopenharmony_ci struct zone *zone; 24728c2ecf20Sopenharmony_ci enum compact_result rc = COMPACT_SKIPPED; 24738c2ecf20Sopenharmony_ci 24748c2ecf20Sopenharmony_ci /* 24758c2ecf20Sopenharmony_ci * Check if the GFP flags allow compaction - GFP_NOIO is really 24768c2ecf20Sopenharmony_ci * tricky context because the migration might require IO 24778c2ecf20Sopenharmony_ci */ 24788c2ecf20Sopenharmony_ci if (!may_perform_io) 24798c2ecf20Sopenharmony_ci return COMPACT_SKIPPED; 24808c2ecf20Sopenharmony_ci 24818c2ecf20Sopenharmony_ci trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); 24828c2ecf20Sopenharmony_ci 24838c2ecf20Sopenharmony_ci /* Compact each zone in the list */ 24848c2ecf20Sopenharmony_ci for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, 24858c2ecf20Sopenharmony_ci ac->highest_zoneidx, ac->nodemask) { 24868c2ecf20Sopenharmony_ci enum compact_result status; 24878c2ecf20Sopenharmony_ci 24888c2ecf20Sopenharmony_ci if (prio > MIN_COMPACT_PRIORITY 24898c2ecf20Sopenharmony_ci && compaction_deferred(zone, order)) { 24908c2ecf20Sopenharmony_ci rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); 24918c2ecf20Sopenharmony_ci continue; 24928c2ecf20Sopenharmony_ci } 24938c2ecf20Sopenharmony_ci 24948c2ecf20Sopenharmony_ci status = compact_zone_order(zone, order, gfp_mask, prio, 24958c2ecf20Sopenharmony_ci alloc_flags, ac->highest_zoneidx, capture); 24968c2ecf20Sopenharmony_ci rc = max(status, rc); 24978c2ecf20Sopenharmony_ci 24988c2ecf20Sopenharmony_ci /* The allocation should succeed, stop compacting */ 24998c2ecf20Sopenharmony_ci if (status == COMPACT_SUCCESS) { 25008c2ecf20Sopenharmony_ci /* 25018c2ecf20Sopenharmony_ci * We think the allocation will succeed in this zone, 25028c2ecf20Sopenharmony_ci * but it is not certain, hence the false. The caller 25038c2ecf20Sopenharmony_ci * will repeat this with true if allocation indeed 25048c2ecf20Sopenharmony_ci * succeeds in this zone. 25058c2ecf20Sopenharmony_ci */ 25068c2ecf20Sopenharmony_ci compaction_defer_reset(zone, order, false); 25078c2ecf20Sopenharmony_ci 25088c2ecf20Sopenharmony_ci break; 25098c2ecf20Sopenharmony_ci } 25108c2ecf20Sopenharmony_ci 25118c2ecf20Sopenharmony_ci if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE || 25128c2ecf20Sopenharmony_ci status == COMPACT_PARTIAL_SKIPPED)) 25138c2ecf20Sopenharmony_ci /* 25148c2ecf20Sopenharmony_ci * We think that allocation won't succeed in this zone 25158c2ecf20Sopenharmony_ci * so we defer compaction there. If it ends up 25168c2ecf20Sopenharmony_ci * succeeding after all, it will be reset. 25178c2ecf20Sopenharmony_ci */ 25188c2ecf20Sopenharmony_ci defer_compaction(zone, order); 25198c2ecf20Sopenharmony_ci 25208c2ecf20Sopenharmony_ci /* 25218c2ecf20Sopenharmony_ci * We might have stopped compacting due to need_resched() in 25228c2ecf20Sopenharmony_ci * async compaction, or due to a fatal signal detected. In that 25238c2ecf20Sopenharmony_ci * case do not try further zones 25248c2ecf20Sopenharmony_ci */ 25258c2ecf20Sopenharmony_ci if ((prio == COMPACT_PRIO_ASYNC && need_resched()) 25268c2ecf20Sopenharmony_ci || fatal_signal_pending(current)) 25278c2ecf20Sopenharmony_ci break; 25288c2ecf20Sopenharmony_ci } 25298c2ecf20Sopenharmony_ci 25308c2ecf20Sopenharmony_ci return rc; 25318c2ecf20Sopenharmony_ci} 25328c2ecf20Sopenharmony_ci 25338c2ecf20Sopenharmony_ci/* 25348c2ecf20Sopenharmony_ci * Compact all zones within a node till each zone's fragmentation score 25358c2ecf20Sopenharmony_ci * reaches within proactive compaction thresholds (as determined by the 25368c2ecf20Sopenharmony_ci * proactiveness tunable). 25378c2ecf20Sopenharmony_ci * 25388c2ecf20Sopenharmony_ci * It is possible that the function returns before reaching score targets 25398c2ecf20Sopenharmony_ci * due to various back-off conditions, such as, contention on per-node or 25408c2ecf20Sopenharmony_ci * per-zone locks. 25418c2ecf20Sopenharmony_ci */ 25428c2ecf20Sopenharmony_cistatic void proactive_compact_node(pg_data_t *pgdat) 25438c2ecf20Sopenharmony_ci{ 25448c2ecf20Sopenharmony_ci int zoneid; 25458c2ecf20Sopenharmony_ci struct zone *zone; 25468c2ecf20Sopenharmony_ci struct compact_control cc = { 25478c2ecf20Sopenharmony_ci .order = -1, 25488c2ecf20Sopenharmony_ci .mode = MIGRATE_SYNC_LIGHT, 25498c2ecf20Sopenharmony_ci .ignore_skip_hint = true, 25508c2ecf20Sopenharmony_ci .whole_zone = true, 25518c2ecf20Sopenharmony_ci .gfp_mask = GFP_KERNEL, 25528c2ecf20Sopenharmony_ci .proactive_compaction = true, 25538c2ecf20Sopenharmony_ci }; 25548c2ecf20Sopenharmony_ci 25558c2ecf20Sopenharmony_ci for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 25568c2ecf20Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 25578c2ecf20Sopenharmony_ci if (!populated_zone(zone)) 25588c2ecf20Sopenharmony_ci continue; 25598c2ecf20Sopenharmony_ci 25608c2ecf20Sopenharmony_ci cc.zone = zone; 25618c2ecf20Sopenharmony_ci 25628c2ecf20Sopenharmony_ci compact_zone(&cc, NULL); 25638c2ecf20Sopenharmony_ci 25648c2ecf20Sopenharmony_ci VM_BUG_ON(!list_empty(&cc.freepages)); 25658c2ecf20Sopenharmony_ci VM_BUG_ON(!list_empty(&cc.migratepages)); 25668c2ecf20Sopenharmony_ci } 25678c2ecf20Sopenharmony_ci} 25688c2ecf20Sopenharmony_ci 25698c2ecf20Sopenharmony_ci/* Compact all zones within a node */ 25708c2ecf20Sopenharmony_cistatic void compact_node(int nid) 25718c2ecf20Sopenharmony_ci{ 25728c2ecf20Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 25738c2ecf20Sopenharmony_ci int zoneid; 25748c2ecf20Sopenharmony_ci struct zone *zone; 25758c2ecf20Sopenharmony_ci struct compact_control cc = { 25768c2ecf20Sopenharmony_ci .order = -1, 25778c2ecf20Sopenharmony_ci .mode = MIGRATE_SYNC, 25788c2ecf20Sopenharmony_ci .ignore_skip_hint = true, 25798c2ecf20Sopenharmony_ci .whole_zone = true, 25808c2ecf20Sopenharmony_ci .gfp_mask = GFP_KERNEL, 25818c2ecf20Sopenharmony_ci }; 25828c2ecf20Sopenharmony_ci 25838c2ecf20Sopenharmony_ci 25848c2ecf20Sopenharmony_ci for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 25858c2ecf20Sopenharmony_ci 25868c2ecf20Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 25878c2ecf20Sopenharmony_ci if (!populated_zone(zone)) 25888c2ecf20Sopenharmony_ci continue; 25898c2ecf20Sopenharmony_ci 25908c2ecf20Sopenharmony_ci cc.zone = zone; 25918c2ecf20Sopenharmony_ci 25928c2ecf20Sopenharmony_ci compact_zone(&cc, NULL); 25938c2ecf20Sopenharmony_ci 25948c2ecf20Sopenharmony_ci VM_BUG_ON(!list_empty(&cc.freepages)); 25958c2ecf20Sopenharmony_ci VM_BUG_ON(!list_empty(&cc.migratepages)); 25968c2ecf20Sopenharmony_ci } 25978c2ecf20Sopenharmony_ci} 25988c2ecf20Sopenharmony_ci 25998c2ecf20Sopenharmony_ci/* Compact all nodes in the system */ 26008c2ecf20Sopenharmony_cistatic void compact_nodes(void) 26018c2ecf20Sopenharmony_ci{ 26028c2ecf20Sopenharmony_ci int nid; 26038c2ecf20Sopenharmony_ci 26048c2ecf20Sopenharmony_ci /* Flush pending updates to the LRU lists */ 26058c2ecf20Sopenharmony_ci lru_add_drain_all(); 26068c2ecf20Sopenharmony_ci 26078c2ecf20Sopenharmony_ci for_each_online_node(nid) 26088c2ecf20Sopenharmony_ci compact_node(nid); 26098c2ecf20Sopenharmony_ci} 26108c2ecf20Sopenharmony_ci 26118c2ecf20Sopenharmony_ci/* The written value is actually unused, all memory is compacted */ 26128c2ecf20Sopenharmony_ciint sysctl_compact_memory; 26138c2ecf20Sopenharmony_ci 26148c2ecf20Sopenharmony_ci/* 26158c2ecf20Sopenharmony_ci * Tunable for proactive compaction. It determines how 26168c2ecf20Sopenharmony_ci * aggressively the kernel should compact memory in the 26178c2ecf20Sopenharmony_ci * background. It takes values in the range [0, 100]. 26188c2ecf20Sopenharmony_ci */ 26198c2ecf20Sopenharmony_ciunsigned int __read_mostly sysctl_compaction_proactiveness = 20; 26208c2ecf20Sopenharmony_ci 26218c2ecf20Sopenharmony_ci/* 26228c2ecf20Sopenharmony_ci * This is the entry point for compacting all nodes via 26238c2ecf20Sopenharmony_ci * /proc/sys/vm/compact_memory 26248c2ecf20Sopenharmony_ci */ 26258c2ecf20Sopenharmony_ciint sysctl_compaction_handler(struct ctl_table *table, int write, 26268c2ecf20Sopenharmony_ci void *buffer, size_t *length, loff_t *ppos) 26278c2ecf20Sopenharmony_ci{ 26288c2ecf20Sopenharmony_ci if (write) 26298c2ecf20Sopenharmony_ci compact_nodes(); 26308c2ecf20Sopenharmony_ci 26318c2ecf20Sopenharmony_ci return 0; 26328c2ecf20Sopenharmony_ci} 26338c2ecf20Sopenharmony_ci 26348c2ecf20Sopenharmony_ci#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) 26358c2ecf20Sopenharmony_cistatic ssize_t sysfs_compact_node(struct device *dev, 26368c2ecf20Sopenharmony_ci struct device_attribute *attr, 26378c2ecf20Sopenharmony_ci const char *buf, size_t count) 26388c2ecf20Sopenharmony_ci{ 26398c2ecf20Sopenharmony_ci int nid = dev->id; 26408c2ecf20Sopenharmony_ci 26418c2ecf20Sopenharmony_ci if (nid >= 0 && nid < nr_node_ids && node_online(nid)) { 26428c2ecf20Sopenharmony_ci /* Flush pending updates to the LRU lists */ 26438c2ecf20Sopenharmony_ci lru_add_drain_all(); 26448c2ecf20Sopenharmony_ci 26458c2ecf20Sopenharmony_ci compact_node(nid); 26468c2ecf20Sopenharmony_ci } 26478c2ecf20Sopenharmony_ci 26488c2ecf20Sopenharmony_ci return count; 26498c2ecf20Sopenharmony_ci} 26508c2ecf20Sopenharmony_cistatic DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node); 26518c2ecf20Sopenharmony_ci 26528c2ecf20Sopenharmony_ciint compaction_register_node(struct node *node) 26538c2ecf20Sopenharmony_ci{ 26548c2ecf20Sopenharmony_ci return device_create_file(&node->dev, &dev_attr_compact); 26558c2ecf20Sopenharmony_ci} 26568c2ecf20Sopenharmony_ci 26578c2ecf20Sopenharmony_civoid compaction_unregister_node(struct node *node) 26588c2ecf20Sopenharmony_ci{ 26598c2ecf20Sopenharmony_ci return device_remove_file(&node->dev, &dev_attr_compact); 26608c2ecf20Sopenharmony_ci} 26618c2ecf20Sopenharmony_ci#endif /* CONFIG_SYSFS && CONFIG_NUMA */ 26628c2ecf20Sopenharmony_ci 26638c2ecf20Sopenharmony_cistatic inline bool kcompactd_work_requested(pg_data_t *pgdat) 26648c2ecf20Sopenharmony_ci{ 26658c2ecf20Sopenharmony_ci return pgdat->kcompactd_max_order > 0 || kthread_should_stop(); 26668c2ecf20Sopenharmony_ci} 26678c2ecf20Sopenharmony_ci 26688c2ecf20Sopenharmony_cistatic bool kcompactd_node_suitable(pg_data_t *pgdat) 26698c2ecf20Sopenharmony_ci{ 26708c2ecf20Sopenharmony_ci int zoneid; 26718c2ecf20Sopenharmony_ci struct zone *zone; 26728c2ecf20Sopenharmony_ci enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx; 26738c2ecf20Sopenharmony_ci 26748c2ecf20Sopenharmony_ci for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) { 26758c2ecf20Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 26768c2ecf20Sopenharmony_ci 26778c2ecf20Sopenharmony_ci if (!populated_zone(zone)) 26788c2ecf20Sopenharmony_ci continue; 26798c2ecf20Sopenharmony_ci 26808c2ecf20Sopenharmony_ci if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0, 26818c2ecf20Sopenharmony_ci highest_zoneidx) == COMPACT_CONTINUE) 26828c2ecf20Sopenharmony_ci return true; 26838c2ecf20Sopenharmony_ci } 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci return false; 26868c2ecf20Sopenharmony_ci} 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_cistatic void kcompactd_do_work(pg_data_t *pgdat) 26898c2ecf20Sopenharmony_ci{ 26908c2ecf20Sopenharmony_ci /* 26918c2ecf20Sopenharmony_ci * With no special task, compact all zones so that a page of requested 26928c2ecf20Sopenharmony_ci * order is allocatable. 26938c2ecf20Sopenharmony_ci */ 26948c2ecf20Sopenharmony_ci int zoneid; 26958c2ecf20Sopenharmony_ci struct zone *zone; 26968c2ecf20Sopenharmony_ci struct compact_control cc = { 26978c2ecf20Sopenharmony_ci .order = pgdat->kcompactd_max_order, 26988c2ecf20Sopenharmony_ci .search_order = pgdat->kcompactd_max_order, 26998c2ecf20Sopenharmony_ci .highest_zoneidx = pgdat->kcompactd_highest_zoneidx, 27008c2ecf20Sopenharmony_ci .mode = MIGRATE_SYNC_LIGHT, 27018c2ecf20Sopenharmony_ci .ignore_skip_hint = false, 27028c2ecf20Sopenharmony_ci .gfp_mask = GFP_KERNEL, 27038c2ecf20Sopenharmony_ci }; 27048c2ecf20Sopenharmony_ci trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, 27058c2ecf20Sopenharmony_ci cc.highest_zoneidx); 27068c2ecf20Sopenharmony_ci count_compact_event(KCOMPACTD_WAKE); 27078c2ecf20Sopenharmony_ci 27088c2ecf20Sopenharmony_ci for (zoneid = 0; zoneid <= cc.highest_zoneidx; zoneid++) { 27098c2ecf20Sopenharmony_ci int status; 27108c2ecf20Sopenharmony_ci 27118c2ecf20Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 27128c2ecf20Sopenharmony_ci if (!populated_zone(zone)) 27138c2ecf20Sopenharmony_ci continue; 27148c2ecf20Sopenharmony_ci 27158c2ecf20Sopenharmony_ci if (compaction_deferred(zone, cc.order)) 27168c2ecf20Sopenharmony_ci continue; 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci if (compaction_suitable(zone, cc.order, 0, zoneid) != 27198c2ecf20Sopenharmony_ci COMPACT_CONTINUE) 27208c2ecf20Sopenharmony_ci continue; 27218c2ecf20Sopenharmony_ci 27228c2ecf20Sopenharmony_ci if (kthread_should_stop()) 27238c2ecf20Sopenharmony_ci return; 27248c2ecf20Sopenharmony_ci 27258c2ecf20Sopenharmony_ci cc.zone = zone; 27268c2ecf20Sopenharmony_ci status = compact_zone(&cc, NULL); 27278c2ecf20Sopenharmony_ci 27288c2ecf20Sopenharmony_ci if (status == COMPACT_SUCCESS) { 27298c2ecf20Sopenharmony_ci compaction_defer_reset(zone, cc.order, false); 27308c2ecf20Sopenharmony_ci } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) { 27318c2ecf20Sopenharmony_ci /* 27328c2ecf20Sopenharmony_ci * Buddy pages may become stranded on pcps that could 27338c2ecf20Sopenharmony_ci * otherwise coalesce on the zone's free area for 27348c2ecf20Sopenharmony_ci * order >= cc.order. This is ratelimited by the 27358c2ecf20Sopenharmony_ci * upcoming deferral. 27368c2ecf20Sopenharmony_ci */ 27378c2ecf20Sopenharmony_ci drain_all_pages(zone); 27388c2ecf20Sopenharmony_ci 27398c2ecf20Sopenharmony_ci /* 27408c2ecf20Sopenharmony_ci * We use sync migration mode here, so we defer like 27418c2ecf20Sopenharmony_ci * sync direct compaction does. 27428c2ecf20Sopenharmony_ci */ 27438c2ecf20Sopenharmony_ci defer_compaction(zone, cc.order); 27448c2ecf20Sopenharmony_ci } 27458c2ecf20Sopenharmony_ci 27468c2ecf20Sopenharmony_ci count_compact_events(KCOMPACTD_MIGRATE_SCANNED, 27478c2ecf20Sopenharmony_ci cc.total_migrate_scanned); 27488c2ecf20Sopenharmony_ci count_compact_events(KCOMPACTD_FREE_SCANNED, 27498c2ecf20Sopenharmony_ci cc.total_free_scanned); 27508c2ecf20Sopenharmony_ci 27518c2ecf20Sopenharmony_ci VM_BUG_ON(!list_empty(&cc.freepages)); 27528c2ecf20Sopenharmony_ci VM_BUG_ON(!list_empty(&cc.migratepages)); 27538c2ecf20Sopenharmony_ci } 27548c2ecf20Sopenharmony_ci 27558c2ecf20Sopenharmony_ci /* 27568c2ecf20Sopenharmony_ci * Regardless of success, we are done until woken up next. But remember 27578c2ecf20Sopenharmony_ci * the requested order/highest_zoneidx in case it was higher/tighter 27588c2ecf20Sopenharmony_ci * than our current ones 27598c2ecf20Sopenharmony_ci */ 27608c2ecf20Sopenharmony_ci if (pgdat->kcompactd_max_order <= cc.order) 27618c2ecf20Sopenharmony_ci pgdat->kcompactd_max_order = 0; 27628c2ecf20Sopenharmony_ci if (pgdat->kcompactd_highest_zoneidx >= cc.highest_zoneidx) 27638c2ecf20Sopenharmony_ci pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1; 27648c2ecf20Sopenharmony_ci} 27658c2ecf20Sopenharmony_ci 27668c2ecf20Sopenharmony_civoid wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx) 27678c2ecf20Sopenharmony_ci{ 27688c2ecf20Sopenharmony_ci if (!order) 27698c2ecf20Sopenharmony_ci return; 27708c2ecf20Sopenharmony_ci 27718c2ecf20Sopenharmony_ci if (pgdat->kcompactd_max_order < order) 27728c2ecf20Sopenharmony_ci pgdat->kcompactd_max_order = order; 27738c2ecf20Sopenharmony_ci 27748c2ecf20Sopenharmony_ci if (pgdat->kcompactd_highest_zoneidx > highest_zoneidx) 27758c2ecf20Sopenharmony_ci pgdat->kcompactd_highest_zoneidx = highest_zoneidx; 27768c2ecf20Sopenharmony_ci 27778c2ecf20Sopenharmony_ci /* 27788c2ecf20Sopenharmony_ci * Pairs with implicit barrier in wait_event_freezable() 27798c2ecf20Sopenharmony_ci * such that wakeups are not missed. 27808c2ecf20Sopenharmony_ci */ 27818c2ecf20Sopenharmony_ci if (!wq_has_sleeper(&pgdat->kcompactd_wait)) 27828c2ecf20Sopenharmony_ci return; 27838c2ecf20Sopenharmony_ci 27848c2ecf20Sopenharmony_ci if (!kcompactd_node_suitable(pgdat)) 27858c2ecf20Sopenharmony_ci return; 27868c2ecf20Sopenharmony_ci 27878c2ecf20Sopenharmony_ci trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order, 27888c2ecf20Sopenharmony_ci highest_zoneidx); 27898c2ecf20Sopenharmony_ci wake_up_interruptible(&pgdat->kcompactd_wait); 27908c2ecf20Sopenharmony_ci} 27918c2ecf20Sopenharmony_ci 27928c2ecf20Sopenharmony_ci/* 27938c2ecf20Sopenharmony_ci * The background compaction daemon, started as a kernel thread 27948c2ecf20Sopenharmony_ci * from the init process. 27958c2ecf20Sopenharmony_ci */ 27968c2ecf20Sopenharmony_cistatic int kcompactd(void *p) 27978c2ecf20Sopenharmony_ci{ 27988c2ecf20Sopenharmony_ci pg_data_t *pgdat = (pg_data_t*)p; 27998c2ecf20Sopenharmony_ci struct task_struct *tsk = current; 28008c2ecf20Sopenharmony_ci unsigned int proactive_defer = 0; 28018c2ecf20Sopenharmony_ci 28028c2ecf20Sopenharmony_ci const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 28038c2ecf20Sopenharmony_ci 28048c2ecf20Sopenharmony_ci if (!cpumask_empty(cpumask)) 28058c2ecf20Sopenharmony_ci set_cpus_allowed_ptr(tsk, cpumask); 28068c2ecf20Sopenharmony_ci 28078c2ecf20Sopenharmony_ci set_freezable(); 28088c2ecf20Sopenharmony_ci 28098c2ecf20Sopenharmony_ci pgdat->kcompactd_max_order = 0; 28108c2ecf20Sopenharmony_ci pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1; 28118c2ecf20Sopenharmony_ci 28128c2ecf20Sopenharmony_ci while (!kthread_should_stop()) { 28138c2ecf20Sopenharmony_ci unsigned long pflags; 28148c2ecf20Sopenharmony_ci 28158c2ecf20Sopenharmony_ci trace_mm_compaction_kcompactd_sleep(pgdat->node_id); 28168c2ecf20Sopenharmony_ci if (wait_event_freezable_timeout(pgdat->kcompactd_wait, 28178c2ecf20Sopenharmony_ci kcompactd_work_requested(pgdat), 28188c2ecf20Sopenharmony_ci msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) { 28198c2ecf20Sopenharmony_ci 28208c2ecf20Sopenharmony_ci psi_memstall_enter(&pflags); 28218c2ecf20Sopenharmony_ci kcompactd_do_work(pgdat); 28228c2ecf20Sopenharmony_ci psi_memstall_leave(&pflags); 28238c2ecf20Sopenharmony_ci continue; 28248c2ecf20Sopenharmony_ci } 28258c2ecf20Sopenharmony_ci 28268c2ecf20Sopenharmony_ci /* kcompactd wait timeout */ 28278c2ecf20Sopenharmony_ci if (should_proactive_compact_node(pgdat)) { 28288c2ecf20Sopenharmony_ci unsigned int prev_score, score; 28298c2ecf20Sopenharmony_ci 28308c2ecf20Sopenharmony_ci if (proactive_defer) { 28318c2ecf20Sopenharmony_ci proactive_defer--; 28328c2ecf20Sopenharmony_ci continue; 28338c2ecf20Sopenharmony_ci } 28348c2ecf20Sopenharmony_ci prev_score = fragmentation_score_node(pgdat); 28358c2ecf20Sopenharmony_ci proactive_compact_node(pgdat); 28368c2ecf20Sopenharmony_ci score = fragmentation_score_node(pgdat); 28378c2ecf20Sopenharmony_ci /* 28388c2ecf20Sopenharmony_ci * Defer proactive compaction if the fragmentation 28398c2ecf20Sopenharmony_ci * score did not go down i.e. no progress made. 28408c2ecf20Sopenharmony_ci */ 28418c2ecf20Sopenharmony_ci proactive_defer = score < prev_score ? 28428c2ecf20Sopenharmony_ci 0 : 1 << COMPACT_MAX_DEFER_SHIFT; 28438c2ecf20Sopenharmony_ci } 28448c2ecf20Sopenharmony_ci } 28458c2ecf20Sopenharmony_ci 28468c2ecf20Sopenharmony_ci return 0; 28478c2ecf20Sopenharmony_ci} 28488c2ecf20Sopenharmony_ci 28498c2ecf20Sopenharmony_ci/* 28508c2ecf20Sopenharmony_ci * This kcompactd start function will be called by init and node-hot-add. 28518c2ecf20Sopenharmony_ci * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added. 28528c2ecf20Sopenharmony_ci */ 28538c2ecf20Sopenharmony_ciint kcompactd_run(int nid) 28548c2ecf20Sopenharmony_ci{ 28558c2ecf20Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 28568c2ecf20Sopenharmony_ci int ret = 0; 28578c2ecf20Sopenharmony_ci 28588c2ecf20Sopenharmony_ci if (pgdat->kcompactd) 28598c2ecf20Sopenharmony_ci return 0; 28608c2ecf20Sopenharmony_ci 28618c2ecf20Sopenharmony_ci pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid); 28628c2ecf20Sopenharmony_ci if (IS_ERR(pgdat->kcompactd)) { 28638c2ecf20Sopenharmony_ci pr_err("Failed to start kcompactd on node %d\n", nid); 28648c2ecf20Sopenharmony_ci ret = PTR_ERR(pgdat->kcompactd); 28658c2ecf20Sopenharmony_ci pgdat->kcompactd = NULL; 28668c2ecf20Sopenharmony_ci } 28678c2ecf20Sopenharmony_ci return ret; 28688c2ecf20Sopenharmony_ci} 28698c2ecf20Sopenharmony_ci 28708c2ecf20Sopenharmony_ci/* 28718c2ecf20Sopenharmony_ci * Called by memory hotplug when all memory in a node is offlined. Caller must 28728c2ecf20Sopenharmony_ci * hold mem_hotplug_begin/end(). 28738c2ecf20Sopenharmony_ci */ 28748c2ecf20Sopenharmony_civoid kcompactd_stop(int nid) 28758c2ecf20Sopenharmony_ci{ 28768c2ecf20Sopenharmony_ci struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd; 28778c2ecf20Sopenharmony_ci 28788c2ecf20Sopenharmony_ci if (kcompactd) { 28798c2ecf20Sopenharmony_ci kthread_stop(kcompactd); 28808c2ecf20Sopenharmony_ci NODE_DATA(nid)->kcompactd = NULL; 28818c2ecf20Sopenharmony_ci } 28828c2ecf20Sopenharmony_ci} 28838c2ecf20Sopenharmony_ci 28848c2ecf20Sopenharmony_ci/* 28858c2ecf20Sopenharmony_ci * It's optimal to keep kcompactd on the same CPUs as their memory, but 28868c2ecf20Sopenharmony_ci * not required for correctness. So if the last cpu in a node goes 28878c2ecf20Sopenharmony_ci * away, we get changed to run anywhere: as the first one comes back, 28888c2ecf20Sopenharmony_ci * restore their cpu bindings. 28898c2ecf20Sopenharmony_ci */ 28908c2ecf20Sopenharmony_cistatic int kcompactd_cpu_online(unsigned int cpu) 28918c2ecf20Sopenharmony_ci{ 28928c2ecf20Sopenharmony_ci int nid; 28938c2ecf20Sopenharmony_ci 28948c2ecf20Sopenharmony_ci for_each_node_state(nid, N_MEMORY) { 28958c2ecf20Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 28968c2ecf20Sopenharmony_ci const struct cpumask *mask; 28978c2ecf20Sopenharmony_ci 28988c2ecf20Sopenharmony_ci mask = cpumask_of_node(pgdat->node_id); 28998c2ecf20Sopenharmony_ci 29008c2ecf20Sopenharmony_ci if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids) 29018c2ecf20Sopenharmony_ci /* One of our CPUs online: restore mask */ 29028c2ecf20Sopenharmony_ci set_cpus_allowed_ptr(pgdat->kcompactd, mask); 29038c2ecf20Sopenharmony_ci } 29048c2ecf20Sopenharmony_ci return 0; 29058c2ecf20Sopenharmony_ci} 29068c2ecf20Sopenharmony_ci 29078c2ecf20Sopenharmony_cistatic int __init kcompactd_init(void) 29088c2ecf20Sopenharmony_ci{ 29098c2ecf20Sopenharmony_ci int nid; 29108c2ecf20Sopenharmony_ci int ret; 29118c2ecf20Sopenharmony_ci 29128c2ecf20Sopenharmony_ci ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, 29138c2ecf20Sopenharmony_ci "mm/compaction:online", 29148c2ecf20Sopenharmony_ci kcompactd_cpu_online, NULL); 29158c2ecf20Sopenharmony_ci if (ret < 0) { 29168c2ecf20Sopenharmony_ci pr_err("kcompactd: failed to register hotplug callbacks.\n"); 29178c2ecf20Sopenharmony_ci return ret; 29188c2ecf20Sopenharmony_ci } 29198c2ecf20Sopenharmony_ci 29208c2ecf20Sopenharmony_ci for_each_node_state(nid, N_MEMORY) 29218c2ecf20Sopenharmony_ci kcompactd_run(nid); 29228c2ecf20Sopenharmony_ci return 0; 29238c2ecf20Sopenharmony_ci} 29248c2ecf20Sopenharmony_cisubsys_initcall(kcompactd_init) 29258c2ecf20Sopenharmony_ci 29268c2ecf20Sopenharmony_ci#endif /* CONFIG_COMPACTION */ 2927