162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * mm/memcg_reclaim.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#include <linux/mm.h> 862306a36Sopenharmony_ci#include <linux/backing-dev.h> 962306a36Sopenharmony_ci#include <linux/blkdev.h> 1062306a36Sopenharmony_ci#include <linux/hyperhold_inf.h> 1162306a36Sopenharmony_ci#include <linux/memcontrol.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#ifdef CONFIG_HYPERHOLD_FILE_LRU 1462306a36Sopenharmony_ci#include <linux/memcg_policy.h> 1562306a36Sopenharmony_ci#include "internal.h" 1662306a36Sopenharmony_ci#endif 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_cistatic inline bool is_swap_not_allowed(struct scan_control *sc, int swappiness) 1962306a36Sopenharmony_ci{ 2062306a36Sopenharmony_ci return !sc->may_swap || !swappiness || !get_nr_swap_pages(); 2162306a36Sopenharmony_ci} 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci/* 2462306a36Sopenharmony_ci * From 0 .. 100. Higher means more swappy. 2562306a36Sopenharmony_ci */ 2662306a36Sopenharmony_ci#define HYPERHOLD_SWAPPINESS 100 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_cistatic int get_hyperhold_swappiness(void) 2962306a36Sopenharmony_ci{ 3062306a36Sopenharmony_ci return is_hyperhold_enable() ? HYPERHOLD_SWAPPINESS : vm_swappiness; 3162306a36Sopenharmony_ci} 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_cistatic void get_scan_count_hyperhold(struct pglist_data *pgdat, 3462306a36Sopenharmony_ci struct scan_control *sc, unsigned long *nr, 3562306a36Sopenharmony_ci unsigned long *lru_pages) 3662306a36Sopenharmony_ci{ 3762306a36Sopenharmony_ci int swappiness = get_hyperhold_swappiness(); 3862306a36Sopenharmony_ci struct lruvec *lruvec = node_lruvec(pgdat); 3962306a36Sopenharmony_ci u64 fraction[2]; 4062306a36Sopenharmony_ci u64 denominator; 4162306a36Sopenharmony_ci enum scan_balance scan_balance; 4262306a36Sopenharmony_ci unsigned long ap, fp; 4362306a36Sopenharmony_ci enum lru_list lru; 4462306a36Sopenharmony_ci unsigned long pgdatfile; 4562306a36Sopenharmony_ci unsigned long pgdatfree; 4662306a36Sopenharmony_ci int z; 4762306a36Sopenharmony_ci unsigned long anon_cost, file_cost, total_cost; 4862306a36Sopenharmony_ci unsigned long total_high_wmark = 0; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci if (cgroup_reclaim(sc) && !swappiness) { 5262306a36Sopenharmony_ci scan_balance = SCAN_FILE; 5362306a36Sopenharmony_ci goto out; 5462306a36Sopenharmony_ci } 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci /* 5762306a36Sopenharmony_ci * Do not apply any pressure balancing cleverness when the 5862306a36Sopenharmony_ci * system is close to OOM, scan both anon and file equally 5962306a36Sopenharmony_ci * (unless the swappiness setting disagrees with swapping). 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_ci if (!sc->priority && swappiness) { 6262306a36Sopenharmony_ci scan_balance = SCAN_EQUAL; 6362306a36Sopenharmony_ci goto out; 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci if (!cgroup_reclaim(sc)) { 6762306a36Sopenharmony_ci pgdatfree = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); 6862306a36Sopenharmony_ci pgdatfile = node_page_state(pgdat, NR_ACTIVE_FILE) + 6962306a36Sopenharmony_ci node_page_state(pgdat, NR_INACTIVE_FILE); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci for (z = 0; z < MAX_NR_ZONES; z++) { 7262306a36Sopenharmony_ci struct zone *zone = &pgdat->node_zones[z]; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci if (!managed_zone(zone)) 7562306a36Sopenharmony_ci continue; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci total_high_wmark += high_wmark_pages(zone); 7862306a36Sopenharmony_ci } 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci if (unlikely(pgdatfile + pgdatfree <= total_high_wmark)) { 8162306a36Sopenharmony_ci /* 8262306a36Sopenharmony_ci * Force SCAN_ANON if there are enough inactive 8362306a36Sopenharmony_ci * anonymous pages on the LRU in eligible zones. 8462306a36Sopenharmony_ci * Otherwise, the small LRU gets thrashed. 8562306a36Sopenharmony_ci */ 8662306a36Sopenharmony_ci if (!inactive_is_low(lruvec, LRU_INACTIVE_ANON) && 8762306a36Sopenharmony_ci (lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, 8862306a36Sopenharmony_ci sc->reclaim_idx) >> 8962306a36Sopenharmony_ci (unsigned int)sc->priority)) { 9062306a36Sopenharmony_ci scan_balance = SCAN_ANON; 9162306a36Sopenharmony_ci goto out; 9262306a36Sopenharmony_ci } 9362306a36Sopenharmony_ci } 9462306a36Sopenharmony_ci } 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci /* 9762306a36Sopenharmony_ci * If there is enough inactive page cache, i.e. if the size of the 9862306a36Sopenharmony_ci * inactive list is greater than that of the active list *and* the 9962306a36Sopenharmony_ci * inactive list actually has some pages to scan on this priority, we 10062306a36Sopenharmony_ci * do not reclaim anything from the anonymous working set right now. 10162306a36Sopenharmony_ci * Without the second condition we could end up never scanning an 10262306a36Sopenharmony_ci * lruvec even if it has plenty of old anonymous pages unless the 10362306a36Sopenharmony_ci * system is under heavy pressure. 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_BALANCE_ANON_FILE_RECLAIM) && 10762306a36Sopenharmony_ci !inactive_is_low(lruvec, LRU_INACTIVE_FILE) && 10862306a36Sopenharmony_ci lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { 10962306a36Sopenharmony_ci scan_balance = SCAN_FILE; 11062306a36Sopenharmony_ci goto out; 11162306a36Sopenharmony_ci } 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci scan_balance = SCAN_FRACT; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci /* 11662306a36Sopenharmony_ci * Calculate the pressure balance between anon and file pages. 11762306a36Sopenharmony_ci * 11862306a36Sopenharmony_ci * The amount of pressure we put on each LRU is inversely 11962306a36Sopenharmony_ci * proportional to the cost of reclaiming each list, as 12062306a36Sopenharmony_ci * determined by the share of pages that are refaulting, times 12162306a36Sopenharmony_ci * the relative IO cost of bringing back a swapped out 12262306a36Sopenharmony_ci * anonymous page vs reloading a filesystem page (swappiness). 12362306a36Sopenharmony_ci * 12462306a36Sopenharmony_ci * Although we limit that influence to ensure no list gets 12562306a36Sopenharmony_ci * left behind completely: at least a third of the pressure is 12662306a36Sopenharmony_ci * applied, before swappiness. 12762306a36Sopenharmony_ci * 12862306a36Sopenharmony_ci * With swappiness at 100, anon and file have equal IO cost. 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_ci total_cost = sc->anon_cost + sc->file_cost; 13162306a36Sopenharmony_ci anon_cost = total_cost + sc->anon_cost; 13262306a36Sopenharmony_ci file_cost = total_cost + sc->file_cost; 13362306a36Sopenharmony_ci total_cost = anon_cost + file_cost; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci ap = swappiness * (total_cost + 1); 13662306a36Sopenharmony_ci ap /= anon_cost + 1; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci fp = (200 - swappiness) * (total_cost + 1); 13962306a36Sopenharmony_ci fp /= file_cost + 1; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci fraction[0] = ap; 14262306a36Sopenharmony_ci fraction[1] = fp; 14362306a36Sopenharmony_ci denominator = ap + fp; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ciout: 14662306a36Sopenharmony_ci *lru_pages = 0; 14762306a36Sopenharmony_ci for_each_evictable_lru(lru) { 14862306a36Sopenharmony_ci int file = is_file_lru(lru); 14962306a36Sopenharmony_ci unsigned long lruvec_size; 15062306a36Sopenharmony_ci unsigned long scan; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); 15362306a36Sopenharmony_ci scan = lruvec_size; 15462306a36Sopenharmony_ci *lru_pages += scan; 15562306a36Sopenharmony_ci scan >>= sc->priority; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci switch (scan_balance) { 15862306a36Sopenharmony_ci case SCAN_EQUAL: 15962306a36Sopenharmony_ci /* Scan lists relative to size */ 16062306a36Sopenharmony_ci break; 16162306a36Sopenharmony_ci case SCAN_FRACT: 16262306a36Sopenharmony_ci /* 16362306a36Sopenharmony_ci * Scan types proportional to swappiness and 16462306a36Sopenharmony_ci * their relative recent reclaim efficiency. 16562306a36Sopenharmony_ci * Make sure we don't miss the last page on 16662306a36Sopenharmony_ci * the offlined memory cgroups because of a 16762306a36Sopenharmony_ci * round-off error. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ci scan = DIV64_U64_ROUND_UP(scan * fraction[file], 17062306a36Sopenharmony_ci denominator); 17162306a36Sopenharmony_ci break; 17262306a36Sopenharmony_ci case SCAN_FILE: 17362306a36Sopenharmony_ci case SCAN_ANON: 17462306a36Sopenharmony_ci /* Scan one type exclusively */ 17562306a36Sopenharmony_ci if ((scan_balance == SCAN_FILE) != file) 17662306a36Sopenharmony_ci scan = 0; 17762306a36Sopenharmony_ci break; 17862306a36Sopenharmony_ci default: 17962306a36Sopenharmony_ci /* Look ma, no brain */ 18062306a36Sopenharmony_ci BUG(); 18162306a36Sopenharmony_ci } 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci nr[lru] = scan; 18462306a36Sopenharmony_ci } 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci#define ISOLATE_LIMIT_CNT 5 18862306a36Sopenharmony_civoid shrink_anon_memcg(struct pglist_data *pgdat, 18962306a36Sopenharmony_ci struct mem_cgroup *memcg, struct scan_control *sc, 19062306a36Sopenharmony_ci unsigned long *nr) 19162306a36Sopenharmony_ci{ 19262306a36Sopenharmony_ci struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); 19362306a36Sopenharmony_ci unsigned long nr_to_scan; 19462306a36Sopenharmony_ci enum lru_list lru; 19562306a36Sopenharmony_ci unsigned long nr_reclaimed = 0; 19662306a36Sopenharmony_ci struct blk_plug plug; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci blk_start_plug(&plug); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_ANON]) { 20162306a36Sopenharmony_ci for (lru = 0; lru <= LRU_ACTIVE_ANON; lru++) { 20262306a36Sopenharmony_ci if (nr[lru]) { 20362306a36Sopenharmony_ci nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); 20462306a36Sopenharmony_ci nr[lru] -= nr_to_scan; 20562306a36Sopenharmony_ci nr_reclaimed += 20662306a36Sopenharmony_ci shrink_list(lru, nr_to_scan, 20762306a36Sopenharmony_ci lruvec, sc); 20862306a36Sopenharmony_ci } 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci if (sc->nr_reclaimed >= sc->nr_to_reclaim || 21162306a36Sopenharmony_ci (sc->isolate_count > ISOLATE_LIMIT_CNT && 21262306a36Sopenharmony_ci sc->invoker == DIRECT_RECLAIM)) 21362306a36Sopenharmony_ci break; 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci blk_finish_plug(&plug); 21662306a36Sopenharmony_ci sc->nr_reclaimed += nr_reclaimed; 21762306a36Sopenharmony_ci sc->nr_reclaimed_anon += nr_reclaimed; 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_cistatic inline bool memcg_is_child_of(struct mem_cgroup *mcg, struct mem_cgroup *tmcg) 22162306a36Sopenharmony_ci{ 22262306a36Sopenharmony_ci if (tmcg == NULL) 22362306a36Sopenharmony_ci return true; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci while (!mem_cgroup_is_root(mcg)) { 22662306a36Sopenharmony_ci if (mcg == tmcg) 22762306a36Sopenharmony_ci break; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci mcg = parent_mem_cgroup(mcg); 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci return (mcg == tmcg); 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_cistatic void shrink_anon(struct pglist_data *pgdat, 23662306a36Sopenharmony_ci struct scan_control *sc, unsigned long *nr) 23762306a36Sopenharmony_ci{ 23862306a36Sopenharmony_ci unsigned long reclaimed; 23962306a36Sopenharmony_ci unsigned long scanned; 24062306a36Sopenharmony_ci struct mem_cgroup *memcg = NULL; 24162306a36Sopenharmony_ci struct mem_cgroup *target_memcg = sc->target_mem_cgroup; 24262306a36Sopenharmony_ci unsigned long nr_memcg[NR_LRU_LISTS]; 24362306a36Sopenharmony_ci unsigned long nr_node_active = lruvec_lru_size( 24462306a36Sopenharmony_ci node_lruvec(pgdat), LRU_ACTIVE_ANON, MAX_NR_ZONES); 24562306a36Sopenharmony_ci unsigned long nr_node_inactive = lruvec_lru_size( 24662306a36Sopenharmony_ci node_lruvec(pgdat), LRU_INACTIVE_ANON, MAX_NR_ZONES); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci while ((memcg = get_next_memcg(memcg))) { 24962306a36Sopenharmony_ci struct lruvec *lruvec = NULL; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci if (!memcg_is_child_of(memcg, target_memcg)) 25262306a36Sopenharmony_ci continue; 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci lruvec = mem_cgroup_lruvec(memcg, pgdat); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci reclaimed = sc->nr_reclaimed; 25762306a36Sopenharmony_ci scanned = sc->nr_scanned; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci nr_memcg[LRU_ACTIVE_ANON] = nr[LRU_ACTIVE_ANON] * 26062306a36Sopenharmony_ci lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, 26162306a36Sopenharmony_ci MAX_NR_ZONES) / (nr_node_active + 1); 26262306a36Sopenharmony_ci nr_memcg[LRU_INACTIVE_ANON] = nr[LRU_INACTIVE_ANON] * 26362306a36Sopenharmony_ci lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, 26462306a36Sopenharmony_ci MAX_NR_ZONES) / (nr_node_inactive + 1); 26562306a36Sopenharmony_ci nr_memcg[LRU_ACTIVE_FILE] = 0; 26662306a36Sopenharmony_ci nr_memcg[LRU_INACTIVE_FILE] = 0; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci /* 26962306a36Sopenharmony_ci * This loop can become CPU-bound when target memcgs 27062306a36Sopenharmony_ci * aren't eligible for reclaim - either because they 27162306a36Sopenharmony_ci * don't have any reclaimable pages, or because their 27262306a36Sopenharmony_ci * memory is explicitly protected. Avoid soft lockups. 27362306a36Sopenharmony_ci */ 27462306a36Sopenharmony_ci cond_resched(); 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci mem_cgroup_calculate_protection(target_memcg, memcg); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci if (mem_cgroup_below_min(target_memcg, memcg)) { 27962306a36Sopenharmony_ci /* 28062306a36Sopenharmony_ci * Hard protection. 28162306a36Sopenharmony_ci * If there is no reclaimable memory, OOM. 28262306a36Sopenharmony_ci */ 28362306a36Sopenharmony_ci continue; 28462306a36Sopenharmony_ci } else if (mem_cgroup_below_low(target_memcg, memcg)) { 28562306a36Sopenharmony_ci /* 28662306a36Sopenharmony_ci * Soft protection. 28762306a36Sopenharmony_ci * Respect the protection only as long as 28862306a36Sopenharmony_ci * there is an unprotected supply 28962306a36Sopenharmony_ci * of reclaimable memory from other cgroups. 29062306a36Sopenharmony_ci */ 29162306a36Sopenharmony_ci if (!sc->memcg_low_reclaim) { 29262306a36Sopenharmony_ci sc->memcg_low_skipped = 1; 29362306a36Sopenharmony_ci continue; 29462306a36Sopenharmony_ci } 29562306a36Sopenharmony_ci memcg_memory_event(memcg, MEMCG_LOW); 29662306a36Sopenharmony_ci } 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci shrink_anon_memcg(pgdat, memcg, sc, nr_memcg); 29962306a36Sopenharmony_ci shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, 30062306a36Sopenharmony_ci sc->priority); 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci vmpressure(sc->gfp_mask, memcg, false, 30362306a36Sopenharmony_ci sc->nr_scanned - scanned, 30462306a36Sopenharmony_ci sc->nr_reclaimed - reclaimed); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci if (sc->nr_reclaimed >= sc->nr_to_reclaim || 30762306a36Sopenharmony_ci (sc->isolate_count > ISOLATE_LIMIT_CNT && 30862306a36Sopenharmony_ci sc->invoker == DIRECT_RECLAIM)) { 30962306a36Sopenharmony_ci get_next_memcg_break(memcg); 31062306a36Sopenharmony_ci break; 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci} 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_cistatic void shrink_file(struct pglist_data *pgdat, 31662306a36Sopenharmony_ci struct scan_control *sc, unsigned long *nr) 31762306a36Sopenharmony_ci{ 31862306a36Sopenharmony_ci struct lruvec *lruvec = node_lruvec(pgdat); 31962306a36Sopenharmony_ci unsigned long nr_to_scan; 32062306a36Sopenharmony_ci enum lru_list lru; 32162306a36Sopenharmony_ci unsigned long nr_reclaimed = 0; 32262306a36Sopenharmony_ci struct blk_plug plug; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci blk_start_plug(&plug); 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci while (nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { 32762306a36Sopenharmony_ci for (lru = LRU_INACTIVE_FILE; lru <= LRU_ACTIVE_FILE; lru++) { 32862306a36Sopenharmony_ci if (nr[lru]) { 32962306a36Sopenharmony_ci nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); 33062306a36Sopenharmony_ci nr[lru] -= nr_to_scan; 33162306a36Sopenharmony_ci nr_reclaimed += shrink_list(lru, nr_to_scan, lruvec, sc); 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci } 33462306a36Sopenharmony_ci } 33562306a36Sopenharmony_ci blk_finish_plug(&plug); 33662306a36Sopenharmony_ci sc->nr_reclaimed += nr_reclaimed; 33762306a36Sopenharmony_ci sc->nr_reclaimed_file += nr_reclaimed; 33862306a36Sopenharmony_ci} 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_cibool shrink_node_hyperhold(struct pglist_data *pgdat, struct scan_control *sc) 34162306a36Sopenharmony_ci{ 34262306a36Sopenharmony_ci unsigned long nr_reclaimed; 34362306a36Sopenharmony_ci struct lruvec *target_lruvec; 34462306a36Sopenharmony_ci bool reclaimable = false; 34562306a36Sopenharmony_ci unsigned long file; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); 34862306a36Sopenharmony_ci do { 34962306a36Sopenharmony_ci /* Get scan count for file and anon */ 35062306a36Sopenharmony_ci unsigned long node_lru_pages = 0; 35162306a36Sopenharmony_ci unsigned long nr[NR_LRU_LISTS] = {0}; 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci memset(&sc->nr, 0, sizeof(sc->nr)); 35462306a36Sopenharmony_ci nr_reclaimed = sc->nr_reclaimed; 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci /* 35762306a36Sopenharmony_ci * Determine the scan balance between anon and file LRUs. 35862306a36Sopenharmony_ci */ 35962306a36Sopenharmony_ci spin_lock_irq(&target_lruvec->lru_lock); 36062306a36Sopenharmony_ci sc->anon_cost = mem_cgroup_lruvec(NULL, pgdat)->anon_cost; 36162306a36Sopenharmony_ci sc->file_cost = node_lruvec(pgdat)->file_cost; 36262306a36Sopenharmony_ci spin_unlock_irq(&target_lruvec->lru_lock); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci /* 36562306a36Sopenharmony_ci * Target desirable inactive:active list ratios for the anon 36662306a36Sopenharmony_ci * and file LRU lists. 36762306a36Sopenharmony_ci */ 36862306a36Sopenharmony_ci if (!sc->force_deactivate) { 36962306a36Sopenharmony_ci unsigned long refaults; 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci refaults = lruvec_page_state(target_lruvec, 37262306a36Sopenharmony_ci WORKINGSET_ACTIVATE_ANON); 37362306a36Sopenharmony_ci if (refaults != target_lruvec->refaults[0] || 37462306a36Sopenharmony_ci inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) 37562306a36Sopenharmony_ci sc->may_deactivate |= DEACTIVATE_ANON; 37662306a36Sopenharmony_ci else 37762306a36Sopenharmony_ci sc->may_deactivate &= ~DEACTIVATE_ANON; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci /* 38062306a36Sopenharmony_ci * When refaults are being observed, it means a new 38162306a36Sopenharmony_ci * workingset is being established. Deactivate to get 38262306a36Sopenharmony_ci * rid of any stale active pages quickly. 38362306a36Sopenharmony_ci */ 38462306a36Sopenharmony_ci#ifdef CONFIG_HYPERHOLD_FILE_LRU 38562306a36Sopenharmony_ci refaults = lruvec_page_state(node_lruvec(pgdat), 38662306a36Sopenharmony_ci WORKINGSET_ACTIVATE_FILE); 38762306a36Sopenharmony_ci if (refaults != node_lruvec(pgdat)->refaults[1] || 38862306a36Sopenharmony_ci inactive_is_low(node_lruvec(pgdat), LRU_INACTIVE_FILE)) 38962306a36Sopenharmony_ci sc->may_deactivate |= DEACTIVATE_FILE; 39062306a36Sopenharmony_ci#else 39162306a36Sopenharmony_ci refaults = lruvec_page_state(target_lruvec, 39262306a36Sopenharmony_ci WORKINGSET_ACTIVATE_FILE); 39362306a36Sopenharmony_ci if (refaults != target_lruvec->refaults[1] || 39462306a36Sopenharmony_ci inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) 39562306a36Sopenharmony_ci sc->may_deactivate |= DEACTIVATE_FILE; 39662306a36Sopenharmony_ci#endif 39762306a36Sopenharmony_ci else 39862306a36Sopenharmony_ci sc->may_deactivate &= ~DEACTIVATE_FILE; 39962306a36Sopenharmony_ci } else 40062306a36Sopenharmony_ci sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci /* 40362306a36Sopenharmony_ci * If we have plenty of inactive file pages that aren't 40462306a36Sopenharmony_ci * thrashing, try to reclaim those first before touching 40562306a36Sopenharmony_ci * anonymous pages. 40662306a36Sopenharmony_ci */ 40762306a36Sopenharmony_ci#ifdef CONFIG_HYPERHOLD_FILE_LRU 40862306a36Sopenharmony_ci file = lruvec_page_state(node_lruvec(pgdat), NR_INACTIVE_FILE); 40962306a36Sopenharmony_ci#else 41062306a36Sopenharmony_ci file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); 41162306a36Sopenharmony_ci#endif 41262306a36Sopenharmony_ci if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) 41362306a36Sopenharmony_ci sc->cache_trim_mode = 1; 41462306a36Sopenharmony_ci else 41562306a36Sopenharmony_ci sc->cache_trim_mode = 0; 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci /* 41862306a36Sopenharmony_ci * Prevent the reclaimer from falling into the cache trap: as 41962306a36Sopenharmony_ci * cache pages start out inactive, every cache fault will tip 42062306a36Sopenharmony_ci * the scan balance towards the file LRU. And as the file LRU 42162306a36Sopenharmony_ci * shrinks, so does the window for rotation from references. 42262306a36Sopenharmony_ci * This means we have a runaway feedback loop where a tiny 42362306a36Sopenharmony_ci * thrashing file LRU becomes infinitely more attractive than 42462306a36Sopenharmony_ci * anon pages. Try to detect this based on file LRU size. 42562306a36Sopenharmony_ci */ 42662306a36Sopenharmony_ci if (!cgroup_reclaim(sc)) { 42762306a36Sopenharmony_ci unsigned long total_high_wmark = 0; 42862306a36Sopenharmony_ci unsigned long free, anon; 42962306a36Sopenharmony_ci int z; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); 43262306a36Sopenharmony_ci file = node_page_state(pgdat, NR_ACTIVE_FILE) + 43362306a36Sopenharmony_ci node_page_state(pgdat, NR_INACTIVE_FILE); 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci for (z = 0; z < MAX_NR_ZONES; z++) { 43662306a36Sopenharmony_ci struct zone *zone = &pgdat->node_zones[z]; 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci if (!managed_zone(zone)) 43962306a36Sopenharmony_ci continue; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci total_high_wmark += high_wmark_pages(zone); 44262306a36Sopenharmony_ci } 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci /* 44562306a36Sopenharmony_ci * Consider anon: if that's low too, this isn't a 44662306a36Sopenharmony_ci * runaway file reclaim problem, but rather just 44762306a36Sopenharmony_ci * extreme pressure. Reclaim as per usual then. 44862306a36Sopenharmony_ci */ 44962306a36Sopenharmony_ci anon = node_page_state(pgdat, NR_INACTIVE_ANON); 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci sc->file_is_tiny = 45262306a36Sopenharmony_ci file + free <= total_high_wmark && 45362306a36Sopenharmony_ci !(sc->may_deactivate & DEACTIVATE_ANON) && 45462306a36Sopenharmony_ci anon >> sc->priority; 45562306a36Sopenharmony_ci } 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci get_scan_count_hyperhold(pgdat, sc, nr, &node_lru_pages); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci if (!cgroup_reclaim(sc)) { 46062306a36Sopenharmony_ci /* Shrink the Total-File-LRU */ 46162306a36Sopenharmony_ci shrink_file(pgdat, sc, nr); 46262306a36Sopenharmony_ci } 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci /* Shrink Anon by iterating score_list */ 46562306a36Sopenharmony_ci shrink_anon(pgdat, sc, nr); 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci if (sc->nr_reclaimed - nr_reclaimed) 46862306a36Sopenharmony_ci reclaimable = true; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci if (current_is_kswapd()) { 47162306a36Sopenharmony_ci /* 47262306a36Sopenharmony_ci * If reclaim is isolating dirty pages under writeback, 47362306a36Sopenharmony_ci * it implies that the long-lived page allocation rate 47462306a36Sopenharmony_ci * is exceeding the page laundering rate. Either the 47562306a36Sopenharmony_ci * global limits are not being effective at throttling 47662306a36Sopenharmony_ci * processes due to the page distribution throughout 47762306a36Sopenharmony_ci * zones or there is heavy usage of a slow backing 47862306a36Sopenharmony_ci * device. The only option is to throttle from reclaim 47962306a36Sopenharmony_ci * context which is not ideal as there is no guarantee 48062306a36Sopenharmony_ci * the dirtying process is throttled in the same way 48162306a36Sopenharmony_ci * balance_dirty_pages() manages. 48262306a36Sopenharmony_ci * 48362306a36Sopenharmony_ci * Once a node is flagged PGDAT_WRITEBACK, kswapd will 48462306a36Sopenharmony_ci * count the number of pages under pages flagged for 48562306a36Sopenharmony_ci * immediate reclaim and stall if any are encountered 48662306a36Sopenharmony_ci * in the nr_immediate check below. 48762306a36Sopenharmony_ci */ 48862306a36Sopenharmony_ci if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) 48962306a36Sopenharmony_ci set_bit(PGDAT_WRITEBACK, &pgdat->flags); 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci /* Allow kswapd to start writing pages during reclaim. */ 49262306a36Sopenharmony_ci if (sc->nr.unqueued_dirty == sc->nr.file_taken) 49362306a36Sopenharmony_ci set_bit(PGDAT_DIRTY, &pgdat->flags); 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci /* 49662306a36Sopenharmony_ci * If kswapd scans pages marked for immediate 49762306a36Sopenharmony_ci * reclaim and under writeback (nr_immediate), it 49862306a36Sopenharmony_ci * implies that pages are cycling through the LRU 49962306a36Sopenharmony_ci * faster than they are written so also forcibly stall. 50062306a36Sopenharmony_ci */ 50162306a36Sopenharmony_ci if (sc->nr.immediate) 50262306a36Sopenharmony_ci reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); 50362306a36Sopenharmony_ci } 50462306a36Sopenharmony_ci /* 50562306a36Sopenharmony_ci * Legacy memcg will stall in page writeback so avoid forcibly 50662306a36Sopenharmony_ci * stalling in reclaim_throttle(). 50762306a36Sopenharmony_ci */ 50862306a36Sopenharmony_ci if ((current_is_kswapd() || 50962306a36Sopenharmony_ci (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) && 51062306a36Sopenharmony_ci sc->nr.dirty && sc->nr.dirty == sc->nr.congested) 51162306a36Sopenharmony_ci set_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags); 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci /* 51462306a36Sopenharmony_ci * Stall direct reclaim for IO completions if underlying BDIs 51562306a36Sopenharmony_ci * and node is congested. Allow kswapd to continue until it 51662306a36Sopenharmony_ci * starts encountering unqueued dirty pages or cycling through 51762306a36Sopenharmony_ci * the LRU too quickly. 51862306a36Sopenharmony_ci */ 51962306a36Sopenharmony_ci if (!current_is_kswapd() && current_may_throttle() && 52062306a36Sopenharmony_ci !sc->hibernation_mode && 52162306a36Sopenharmony_ci test_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags)) 52262306a36Sopenharmony_ci reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, 52562306a36Sopenharmony_ci sc)); 52662306a36Sopenharmony_ci /* 52762306a36Sopenharmony_ci * Kswapd gives up on balancing particular nodes after too 52862306a36Sopenharmony_ci * many failures to reclaim anything from them and goes to 52962306a36Sopenharmony_ci * sleep. On reclaim progress, reset the failure counter. A 53062306a36Sopenharmony_ci * successful direct reclaim run will revive a dormant kswapd. 53162306a36Sopenharmony_ci */ 53262306a36Sopenharmony_ci if (reclaimable) 53362306a36Sopenharmony_ci pgdat->kswapd_failures = 0; 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci return reclaimable; 53662306a36Sopenharmony_ci} 537