18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/mm/swap.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci/* 98c2ecf20Sopenharmony_ci * This file contains the default values for the operation of the 108c2ecf20Sopenharmony_ci * Linux VM subsystem. Fine-tuning documentation can be found in 118c2ecf20Sopenharmony_ci * Documentation/admin-guide/sysctl/vm.rst. 128c2ecf20Sopenharmony_ci * Started 18.12.91 138c2ecf20Sopenharmony_ci * Swap aging added 23.2.95, Stephen Tweedie. 148c2ecf20Sopenharmony_ci * Buffermem limits added 12.3.98, Rik van Riel. 158c2ecf20Sopenharmony_ci */ 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include <linux/mm.h> 188c2ecf20Sopenharmony_ci#include <linux/sched.h> 198c2ecf20Sopenharmony_ci#include <linux/kernel_stat.h> 208c2ecf20Sopenharmony_ci#include <linux/swap.h> 218c2ecf20Sopenharmony_ci#include <linux/mman.h> 228c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 238c2ecf20Sopenharmony_ci#include <linux/pagevec.h> 248c2ecf20Sopenharmony_ci#include <linux/init.h> 258c2ecf20Sopenharmony_ci#include <linux/export.h> 268c2ecf20Sopenharmony_ci#include <linux/mm_inline.h> 278c2ecf20Sopenharmony_ci#include <linux/percpu_counter.h> 288c2ecf20Sopenharmony_ci#include <linux/memremap.h> 298c2ecf20Sopenharmony_ci#include <linux/percpu.h> 308c2ecf20Sopenharmony_ci#include <linux/cpu.h> 318c2ecf20Sopenharmony_ci#include <linux/notifier.h> 328c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 338c2ecf20Sopenharmony_ci#include <linux/memcontrol.h> 348c2ecf20Sopenharmony_ci#include <linux/gfp.h> 358c2ecf20Sopenharmony_ci#include <linux/uio.h> 368c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 378c2ecf20Sopenharmony_ci#include <linux/page_idle.h> 388c2ecf20Sopenharmony_ci#include <linux/local_lock.h> 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci#include "internal.h" 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS 438c2ecf20Sopenharmony_ci#include <trace/events/pagemap.h> 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci/* How many pages do we try to swap or page in/out together? */ 468c2ecf20Sopenharmony_ciint page_cluster; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci/* Protecting only lru_rotate.pvec which requires disabling interrupts */ 498c2ecf20Sopenharmony_cistruct lru_rotate { 508c2ecf20Sopenharmony_ci local_lock_t lock; 518c2ecf20Sopenharmony_ci struct pagevec pvec; 528c2ecf20Sopenharmony_ci}; 538c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = { 548c2ecf20Sopenharmony_ci .lock = INIT_LOCAL_LOCK(lock), 558c2ecf20Sopenharmony_ci}; 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci/* 588c2ecf20Sopenharmony_ci * The following struct pagevec are grouped together because they are protected 598c2ecf20Sopenharmony_ci * by disabling preemption (and interrupts remain enabled). 608c2ecf20Sopenharmony_ci */ 618c2ecf20Sopenharmony_cistruct lru_pvecs { 628c2ecf20Sopenharmony_ci local_lock_t lock; 638c2ecf20Sopenharmony_ci struct pagevec lru_add; 648c2ecf20Sopenharmony_ci struct pagevec lru_deactivate_file; 658c2ecf20Sopenharmony_ci struct pagevec lru_deactivate; 668c2ecf20Sopenharmony_ci struct pagevec lru_lazyfree; 678c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 688c2ecf20Sopenharmony_ci struct pagevec activate_page; 698c2ecf20Sopenharmony_ci#endif 708c2ecf20Sopenharmony_ci}; 718c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = { 728c2ecf20Sopenharmony_ci .lock = INIT_LOCAL_LOCK(lock), 738c2ecf20Sopenharmony_ci}; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci/* 768c2ecf20Sopenharmony_ci * This path almost never happens for VM activity - pages are normally 778c2ecf20Sopenharmony_ci * freed via pagevecs. But it gets used by networking. 788c2ecf20Sopenharmony_ci */ 798c2ecf20Sopenharmony_cistatic void __page_cache_release(struct page *page) 808c2ecf20Sopenharmony_ci{ 818c2ecf20Sopenharmony_ci if (PageLRU(page)) { 828c2ecf20Sopenharmony_ci pg_data_t *pgdat = page_pgdat(page); 838c2ecf20Sopenharmony_ci struct lruvec *lruvec; 848c2ecf20Sopenharmony_ci unsigned long flags; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci spin_lock_irqsave(&pgdat->lru_lock, flags); 878c2ecf20Sopenharmony_ci lruvec = mem_cgroup_page_lruvec(page, pgdat); 888c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLRU(page), page); 898c2ecf20Sopenharmony_ci __ClearPageLRU(page); 908c2ecf20Sopenharmony_ci del_page_from_lru_list(page, lruvec, page_off_lru(page)); 918c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pgdat->lru_lock, flags); 928c2ecf20Sopenharmony_ci } 938c2ecf20Sopenharmony_ci __ClearPageWaiters(page); 948c2ecf20Sopenharmony_ci} 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_cistatic void __put_single_page(struct page *page) 978c2ecf20Sopenharmony_ci{ 988c2ecf20Sopenharmony_ci __page_cache_release(page); 998c2ecf20Sopenharmony_ci mem_cgroup_uncharge(page); 1008c2ecf20Sopenharmony_ci free_unref_page(page); 1018c2ecf20Sopenharmony_ci} 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_cistatic void __put_compound_page(struct page *page) 1048c2ecf20Sopenharmony_ci{ 1058c2ecf20Sopenharmony_ci /* 1068c2ecf20Sopenharmony_ci * __page_cache_release() is supposed to be called for thp, not for 1078c2ecf20Sopenharmony_ci * hugetlb. This is because hugetlb page does never have PageLRU set 1088c2ecf20Sopenharmony_ci * (it's never listed to any LRU lists) and no memcg routines should 1098c2ecf20Sopenharmony_ci * be called for hugetlb (it has a separate hugetlb_cgroup.) 1108c2ecf20Sopenharmony_ci */ 1118c2ecf20Sopenharmony_ci if (!PageHuge(page)) 1128c2ecf20Sopenharmony_ci __page_cache_release(page); 1138c2ecf20Sopenharmony_ci destroy_compound_page(page); 1148c2ecf20Sopenharmony_ci} 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_civoid __put_page(struct page *page) 1178c2ecf20Sopenharmony_ci{ 1188c2ecf20Sopenharmony_ci if (is_zone_device_page(page)) { 1198c2ecf20Sopenharmony_ci put_dev_pagemap(page->pgmap); 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci /* 1228c2ecf20Sopenharmony_ci * The page belongs to the device that created pgmap. Do 1238c2ecf20Sopenharmony_ci * not return it to page allocator. 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_ci return; 1268c2ecf20Sopenharmony_ci } 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci if (unlikely(PageCompound(page))) 1298c2ecf20Sopenharmony_ci __put_compound_page(page); 1308c2ecf20Sopenharmony_ci else 1318c2ecf20Sopenharmony_ci __put_single_page(page); 1328c2ecf20Sopenharmony_ci} 1338c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__put_page); 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci/** 1368c2ecf20Sopenharmony_ci * put_pages_list() - release a list of pages 1378c2ecf20Sopenharmony_ci * @pages: list of pages threaded on page->lru 1388c2ecf20Sopenharmony_ci * 1398c2ecf20Sopenharmony_ci * Release a list of pages which are strung together on page.lru. Currently 1408c2ecf20Sopenharmony_ci * used by read_cache_pages() and related error recovery code. 1418c2ecf20Sopenharmony_ci */ 1428c2ecf20Sopenharmony_civoid put_pages_list(struct list_head *pages) 1438c2ecf20Sopenharmony_ci{ 1448c2ecf20Sopenharmony_ci while (!list_empty(pages)) { 1458c2ecf20Sopenharmony_ci struct page *victim; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci victim = lru_to_page(pages); 1488c2ecf20Sopenharmony_ci list_del(&victim->lru); 1498c2ecf20Sopenharmony_ci put_page(victim); 1508c2ecf20Sopenharmony_ci } 1518c2ecf20Sopenharmony_ci} 1528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(put_pages_list); 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci/* 1558c2ecf20Sopenharmony_ci * get_kernel_pages() - pin kernel pages in memory 1568c2ecf20Sopenharmony_ci * @kiov: An array of struct kvec structures 1578c2ecf20Sopenharmony_ci * @nr_segs: number of segments to pin 1588c2ecf20Sopenharmony_ci * @write: pinning for read/write, currently ignored 1598c2ecf20Sopenharmony_ci * @pages: array that receives pointers to the pages pinned. 1608c2ecf20Sopenharmony_ci * Should be at least nr_segs long. 1618c2ecf20Sopenharmony_ci * 1628c2ecf20Sopenharmony_ci * Returns number of pages pinned. This may be fewer than the number 1638c2ecf20Sopenharmony_ci * requested. If nr_pages is 0 or negative, returns 0. If no pages 1648c2ecf20Sopenharmony_ci * were pinned, returns -errno. Each page returned must be released 1658c2ecf20Sopenharmony_ci * with a put_page() call when it is finished with. 1668c2ecf20Sopenharmony_ci */ 1678c2ecf20Sopenharmony_ciint get_kernel_pages(const struct kvec *kiov, int nr_segs, int write, 1688c2ecf20Sopenharmony_ci struct page **pages) 1698c2ecf20Sopenharmony_ci{ 1708c2ecf20Sopenharmony_ci int seg; 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci for (seg = 0; seg < nr_segs; seg++) { 1738c2ecf20Sopenharmony_ci if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE)) 1748c2ecf20Sopenharmony_ci return seg; 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci pages[seg] = kmap_to_page(kiov[seg].iov_base); 1778c2ecf20Sopenharmony_ci get_page(pages[seg]); 1788c2ecf20Sopenharmony_ci } 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci return seg; 1818c2ecf20Sopenharmony_ci} 1828c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(get_kernel_pages); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci/* 1858c2ecf20Sopenharmony_ci * get_kernel_page() - pin a kernel page in memory 1868c2ecf20Sopenharmony_ci * @start: starting kernel address 1878c2ecf20Sopenharmony_ci * @write: pinning for read/write, currently ignored 1888c2ecf20Sopenharmony_ci * @pages: array that receives pointer to the page pinned. 1898c2ecf20Sopenharmony_ci * Must be at least nr_segs long. 1908c2ecf20Sopenharmony_ci * 1918c2ecf20Sopenharmony_ci * Returns 1 if page is pinned. If the page was not pinned, returns 1928c2ecf20Sopenharmony_ci * -errno. The page returned must be released with a put_page() call 1938c2ecf20Sopenharmony_ci * when it is finished with. 1948c2ecf20Sopenharmony_ci */ 1958c2ecf20Sopenharmony_ciint get_kernel_page(unsigned long start, int write, struct page **pages) 1968c2ecf20Sopenharmony_ci{ 1978c2ecf20Sopenharmony_ci const struct kvec kiov = { 1988c2ecf20Sopenharmony_ci .iov_base = (void *)start, 1998c2ecf20Sopenharmony_ci .iov_len = PAGE_SIZE 2008c2ecf20Sopenharmony_ci }; 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci return get_kernel_pages(&kiov, 1, write, pages); 2038c2ecf20Sopenharmony_ci} 2048c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(get_kernel_page); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_cistatic void pagevec_lru_move_fn(struct pagevec *pvec, 2078c2ecf20Sopenharmony_ci void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg), 2088c2ecf20Sopenharmony_ci void *arg) 2098c2ecf20Sopenharmony_ci{ 2108c2ecf20Sopenharmony_ci int i; 2118c2ecf20Sopenharmony_ci struct pglist_data *pgdat = NULL; 2128c2ecf20Sopenharmony_ci struct lruvec *lruvec; 2138c2ecf20Sopenharmony_ci unsigned long flags = 0; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(pvec); i++) { 2168c2ecf20Sopenharmony_ci struct page *page = pvec->pages[i]; 2178c2ecf20Sopenharmony_ci struct pglist_data *pagepgdat = page_pgdat(page); 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci if (pagepgdat != pgdat) { 2208c2ecf20Sopenharmony_ci if (pgdat) 2218c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pgdat->lru_lock, flags); 2228c2ecf20Sopenharmony_ci pgdat = pagepgdat; 2238c2ecf20Sopenharmony_ci spin_lock_irqsave(&pgdat->lru_lock, flags); 2248c2ecf20Sopenharmony_ci } 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci lruvec = mem_cgroup_page_lruvec(page, pgdat); 2278c2ecf20Sopenharmony_ci (*move_fn)(page, lruvec, arg); 2288c2ecf20Sopenharmony_ci } 2298c2ecf20Sopenharmony_ci if (pgdat) 2308c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pgdat->lru_lock, flags); 2318c2ecf20Sopenharmony_ci release_pages(pvec->pages, pvec->nr); 2328c2ecf20Sopenharmony_ci pagevec_reinit(pvec); 2338c2ecf20Sopenharmony_ci} 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_cistatic void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, 2368c2ecf20Sopenharmony_ci void *arg) 2378c2ecf20Sopenharmony_ci{ 2388c2ecf20Sopenharmony_ci int *pgmoved = arg; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci if (PageLRU(page) && !PageUnevictable(page)) { 2418c2ecf20Sopenharmony_ci del_page_from_lru_list(page, lruvec, page_lru(page)); 2428c2ecf20Sopenharmony_ci ClearPageActive(page); 2438c2ecf20Sopenharmony_ci add_page_to_lru_list_tail(page, lruvec, page_lru(page)); 2448c2ecf20Sopenharmony_ci (*pgmoved) += thp_nr_pages(page); 2458c2ecf20Sopenharmony_ci } 2468c2ecf20Sopenharmony_ci} 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci/* 2498c2ecf20Sopenharmony_ci * pagevec_move_tail() must be called with IRQ disabled. 2508c2ecf20Sopenharmony_ci * Otherwise this may cause nasty races. 2518c2ecf20Sopenharmony_ci */ 2528c2ecf20Sopenharmony_cistatic void pagevec_move_tail(struct pagevec *pvec) 2538c2ecf20Sopenharmony_ci{ 2548c2ecf20Sopenharmony_ci int pgmoved = 0; 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved); 2578c2ecf20Sopenharmony_ci __count_vm_events(PGROTATED, pgmoved); 2588c2ecf20Sopenharmony_ci} 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci/* 2618c2ecf20Sopenharmony_ci * Writeback is about to end against a page which has been marked for immediate 2628c2ecf20Sopenharmony_ci * reclaim. If it still appears to be reclaimable, move it to the tail of the 2638c2ecf20Sopenharmony_ci * inactive list. 2648c2ecf20Sopenharmony_ci */ 2658c2ecf20Sopenharmony_civoid rotate_reclaimable_page(struct page *page) 2668c2ecf20Sopenharmony_ci{ 2678c2ecf20Sopenharmony_ci if (!PageLocked(page) && !PageDirty(page) && 2688c2ecf20Sopenharmony_ci !PageUnevictable(page) && PageLRU(page)) { 2698c2ecf20Sopenharmony_ci struct pagevec *pvec; 2708c2ecf20Sopenharmony_ci unsigned long flags; 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci get_page(page); 2738c2ecf20Sopenharmony_ci local_lock_irqsave(&lru_rotate.lock, flags); 2748c2ecf20Sopenharmony_ci pvec = this_cpu_ptr(&lru_rotate.pvec); 2758c2ecf20Sopenharmony_ci if (!pagevec_add(pvec, page) || PageCompound(page)) 2768c2ecf20Sopenharmony_ci pagevec_move_tail(pvec); 2778c2ecf20Sopenharmony_ci local_unlock_irqrestore(&lru_rotate.lock, flags); 2788c2ecf20Sopenharmony_ci } 2798c2ecf20Sopenharmony_ci} 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_civoid lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) 2828c2ecf20Sopenharmony_ci{ 2838c2ecf20Sopenharmony_ci do { 2848c2ecf20Sopenharmony_ci unsigned long lrusize; 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci /* Record cost event */ 2878c2ecf20Sopenharmony_ci if (file) 2888c2ecf20Sopenharmony_ci lruvec->file_cost += nr_pages; 2898c2ecf20Sopenharmony_ci else 2908c2ecf20Sopenharmony_ci lruvec->anon_cost += nr_pages; 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci /* 2938c2ecf20Sopenharmony_ci * Decay previous events 2948c2ecf20Sopenharmony_ci * 2958c2ecf20Sopenharmony_ci * Because workloads change over time (and to avoid 2968c2ecf20Sopenharmony_ci * overflow) we keep these statistics as a floating 2978c2ecf20Sopenharmony_ci * average, which ends up weighing recent refaults 2988c2ecf20Sopenharmony_ci * more than old ones. 2998c2ecf20Sopenharmony_ci */ 3008c2ecf20Sopenharmony_ci lrusize = lruvec_page_state(lruvec, NR_INACTIVE_ANON) + 3018c2ecf20Sopenharmony_ci lruvec_page_state(lruvec, NR_ACTIVE_ANON) + 3028c2ecf20Sopenharmony_ci lruvec_page_state(lruvec, NR_INACTIVE_FILE) + 3038c2ecf20Sopenharmony_ci lruvec_page_state(lruvec, NR_ACTIVE_FILE); 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci if (lruvec->file_cost + lruvec->anon_cost > lrusize / 4) { 3068c2ecf20Sopenharmony_ci lruvec->file_cost /= 2; 3078c2ecf20Sopenharmony_ci lruvec->anon_cost /= 2; 3088c2ecf20Sopenharmony_ci } 3098c2ecf20Sopenharmony_ci } while ((lruvec = parent_lruvec(lruvec))); 3108c2ecf20Sopenharmony_ci} 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_civoid lru_note_cost_page(struct page *page) 3138c2ecf20Sopenharmony_ci{ 3148c2ecf20Sopenharmony_ci#ifdef CONFIG_HYPERHOLD_FILE_LRU 3158c2ecf20Sopenharmony_ci if (page_is_file_lru(page)) { 3168c2ecf20Sopenharmony_ci lru_note_cost(&(page_pgdat(page)->__lruvec), 1, thp_nr_pages(page)); 3178c2ecf20Sopenharmony_ci return; 3188c2ecf20Sopenharmony_ci } 3198c2ecf20Sopenharmony_ci#endif 3208c2ecf20Sopenharmony_ci lru_note_cost(mem_cgroup_page_lruvec(page, page_pgdat(page)), 3218c2ecf20Sopenharmony_ci page_is_file_lru(page), thp_nr_pages(page)); 3228c2ecf20Sopenharmony_ci} 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_cistatic void __activate_page(struct page *page, struct lruvec *lruvec, 3258c2ecf20Sopenharmony_ci void *arg) 3268c2ecf20Sopenharmony_ci{ 3278c2ecf20Sopenharmony_ci if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 3288c2ecf20Sopenharmony_ci int lru = page_lru_base_type(page); 3298c2ecf20Sopenharmony_ci int nr_pages = thp_nr_pages(page); 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci del_page_from_lru_list(page, lruvec, lru); 3328c2ecf20Sopenharmony_ci SetPageActive(page); 3338c2ecf20Sopenharmony_ci lru += LRU_ACTIVE; 3348c2ecf20Sopenharmony_ci add_page_to_lru_list(page, lruvec, lru); 3358c2ecf20Sopenharmony_ci trace_mm_lru_activate(page); 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci __count_vm_events(PGACTIVATE, nr_pages); 3388c2ecf20Sopenharmony_ci __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, 3398c2ecf20Sopenharmony_ci nr_pages); 3408c2ecf20Sopenharmony_ci } 3418c2ecf20Sopenharmony_ci} 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 3448c2ecf20Sopenharmony_cistatic void activate_page_drain(int cpu) 3458c2ecf20Sopenharmony_ci{ 3468c2ecf20Sopenharmony_ci struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu); 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci if (pagevec_count(pvec)) 3498c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, __activate_page, NULL); 3508c2ecf20Sopenharmony_ci} 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_cistatic bool need_activate_page_drain(int cpu) 3538c2ecf20Sopenharmony_ci{ 3548c2ecf20Sopenharmony_ci return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; 3558c2ecf20Sopenharmony_ci} 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_cistatic void activate_page(struct page *page) 3588c2ecf20Sopenharmony_ci{ 3598c2ecf20Sopenharmony_ci page = compound_head(page); 3608c2ecf20Sopenharmony_ci if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 3618c2ecf20Sopenharmony_ci struct pagevec *pvec; 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci local_lock(&lru_pvecs.lock); 3648c2ecf20Sopenharmony_ci pvec = this_cpu_ptr(&lru_pvecs.activate_page); 3658c2ecf20Sopenharmony_ci get_page(page); 3668c2ecf20Sopenharmony_ci if (!pagevec_add(pvec, page) || PageCompound(page)) 3678c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, __activate_page, NULL); 3688c2ecf20Sopenharmony_ci local_unlock(&lru_pvecs.lock); 3698c2ecf20Sopenharmony_ci } 3708c2ecf20Sopenharmony_ci} 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci#else 3738c2ecf20Sopenharmony_cistatic inline void activate_page_drain(int cpu) 3748c2ecf20Sopenharmony_ci{ 3758c2ecf20Sopenharmony_ci} 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_cistatic void activate_page(struct page *page) 3788c2ecf20Sopenharmony_ci{ 3798c2ecf20Sopenharmony_ci pg_data_t *pgdat = page_pgdat(page); 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci page = compound_head(page); 3828c2ecf20Sopenharmony_ci spin_lock_irq(&pgdat->lru_lock); 3838c2ecf20Sopenharmony_ci __activate_page(page, mem_cgroup_page_lruvec(page, pgdat), NULL); 3848c2ecf20Sopenharmony_ci spin_unlock_irq(&pgdat->lru_lock); 3858c2ecf20Sopenharmony_ci} 3868c2ecf20Sopenharmony_ci#endif 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_cistatic void __lru_cache_activate_page(struct page *page) 3898c2ecf20Sopenharmony_ci{ 3908c2ecf20Sopenharmony_ci struct pagevec *pvec; 3918c2ecf20Sopenharmony_ci int i; 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci local_lock(&lru_pvecs.lock); 3948c2ecf20Sopenharmony_ci pvec = this_cpu_ptr(&lru_pvecs.lru_add); 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci /* 3978c2ecf20Sopenharmony_ci * Search backwards on the optimistic assumption that the page being 3988c2ecf20Sopenharmony_ci * activated has just been added to this pagevec. Note that only 3998c2ecf20Sopenharmony_ci * the local pagevec is examined as a !PageLRU page could be in the 4008c2ecf20Sopenharmony_ci * process of being released, reclaimed, migrated or on a remote 4018c2ecf20Sopenharmony_ci * pagevec that is currently being drained. Furthermore, marking 4028c2ecf20Sopenharmony_ci * a remote pagevec's page PageActive potentially hits a race where 4038c2ecf20Sopenharmony_ci * a page is marked PageActive just after it is added to the inactive 4048c2ecf20Sopenharmony_ci * list causing accounting errors and BUG_ON checks to trigger. 4058c2ecf20Sopenharmony_ci */ 4068c2ecf20Sopenharmony_ci for (i = pagevec_count(pvec) - 1; i >= 0; i--) { 4078c2ecf20Sopenharmony_ci struct page *pagevec_page = pvec->pages[i]; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci if (pagevec_page == page) { 4108c2ecf20Sopenharmony_ci SetPageActive(page); 4118c2ecf20Sopenharmony_ci break; 4128c2ecf20Sopenharmony_ci } 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci local_unlock(&lru_pvecs.lock); 4168c2ecf20Sopenharmony_ci} 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci/* 4198c2ecf20Sopenharmony_ci * Mark a page as having seen activity. 4208c2ecf20Sopenharmony_ci * 4218c2ecf20Sopenharmony_ci * inactive,unreferenced -> inactive,referenced 4228c2ecf20Sopenharmony_ci * inactive,referenced -> active,unreferenced 4238c2ecf20Sopenharmony_ci * active,unreferenced -> active,referenced 4248c2ecf20Sopenharmony_ci * 4258c2ecf20Sopenharmony_ci * When a newly allocated page is not yet visible, so safe for non-atomic ops, 4268c2ecf20Sopenharmony_ci * __SetPageReferenced(page) may be substituted for mark_page_accessed(page). 4278c2ecf20Sopenharmony_ci */ 4288c2ecf20Sopenharmony_civoid mark_page_accessed(struct page *page) 4298c2ecf20Sopenharmony_ci{ 4308c2ecf20Sopenharmony_ci page = compound_head(page); 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci if (!PageReferenced(page)) { 4338c2ecf20Sopenharmony_ci SetPageReferenced(page); 4348c2ecf20Sopenharmony_ci } else if (PageUnevictable(page)) { 4358c2ecf20Sopenharmony_ci /* 4368c2ecf20Sopenharmony_ci * Unevictable pages are on the "LRU_UNEVICTABLE" list. But, 4378c2ecf20Sopenharmony_ci * this list is never rotated or maintained, so marking an 4388c2ecf20Sopenharmony_ci * evictable page accessed has no effect. 4398c2ecf20Sopenharmony_ci */ 4408c2ecf20Sopenharmony_ci } else if (!PageActive(page)) { 4418c2ecf20Sopenharmony_ci /* 4428c2ecf20Sopenharmony_ci * If the page is on the LRU, queue it for activation via 4438c2ecf20Sopenharmony_ci * lru_pvecs.activate_page. Otherwise, assume the page is on a 4448c2ecf20Sopenharmony_ci * pagevec, mark it active and it'll be moved to the active 4458c2ecf20Sopenharmony_ci * LRU on the next drain. 4468c2ecf20Sopenharmony_ci */ 4478c2ecf20Sopenharmony_ci if (PageLRU(page)) 4488c2ecf20Sopenharmony_ci activate_page(page); 4498c2ecf20Sopenharmony_ci else 4508c2ecf20Sopenharmony_ci __lru_cache_activate_page(page); 4518c2ecf20Sopenharmony_ci ClearPageReferenced(page); 4528c2ecf20Sopenharmony_ci workingset_activation(page); 4538c2ecf20Sopenharmony_ci } 4548c2ecf20Sopenharmony_ci if (page_is_idle(page)) 4558c2ecf20Sopenharmony_ci clear_page_idle(page); 4568c2ecf20Sopenharmony_ci} 4578c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mark_page_accessed); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci/** 4608c2ecf20Sopenharmony_ci * lru_cache_add - add a page to a page list 4618c2ecf20Sopenharmony_ci * @page: the page to be added to the LRU. 4628c2ecf20Sopenharmony_ci * 4638c2ecf20Sopenharmony_ci * Queue the page for addition to the LRU via pagevec. The decision on whether 4648c2ecf20Sopenharmony_ci * to add the page to the [in]active [file|anon] list is deferred until the 4658c2ecf20Sopenharmony_ci * pagevec is drained. This gives a chance for the caller of lru_cache_add() 4668c2ecf20Sopenharmony_ci * have the page added to the active list using mark_page_accessed(). 4678c2ecf20Sopenharmony_ci */ 4688c2ecf20Sopenharmony_civoid lru_cache_add(struct page *page) 4698c2ecf20Sopenharmony_ci{ 4708c2ecf20Sopenharmony_ci struct pagevec *pvec; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); 4738c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageLRU(page), page); 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci get_page(page); 4768c2ecf20Sopenharmony_ci local_lock(&lru_pvecs.lock); 4778c2ecf20Sopenharmony_ci pvec = this_cpu_ptr(&lru_pvecs.lru_add); 4788c2ecf20Sopenharmony_ci if (!pagevec_add(pvec, page) || PageCompound(page)) 4798c2ecf20Sopenharmony_ci __pagevec_lru_add(pvec); 4808c2ecf20Sopenharmony_ci local_unlock(&lru_pvecs.lock); 4818c2ecf20Sopenharmony_ci} 4828c2ecf20Sopenharmony_ciEXPORT_SYMBOL(lru_cache_add); 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci/** 4858c2ecf20Sopenharmony_ci * lru_cache_add_inactive_or_unevictable 4868c2ecf20Sopenharmony_ci * @page: the page to be added to LRU 4878c2ecf20Sopenharmony_ci * @vma: vma in which page is mapped for determining reclaimability 4888c2ecf20Sopenharmony_ci * 4898c2ecf20Sopenharmony_ci * Place @page on the inactive or unevictable LRU list, depending on its 4908c2ecf20Sopenharmony_ci * evictability. 4918c2ecf20Sopenharmony_ci */ 4928c2ecf20Sopenharmony_civoid lru_cache_add_inactive_or_unevictable(struct page *page, 4938c2ecf20Sopenharmony_ci struct vm_area_struct *vma) 4948c2ecf20Sopenharmony_ci{ 4958c2ecf20Sopenharmony_ci bool unevictable; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageLRU(page), page); 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED; 5008c2ecf20Sopenharmony_ci if (unlikely(unevictable) && !TestSetPageMlocked(page)) { 5018c2ecf20Sopenharmony_ci int nr_pages = thp_nr_pages(page); 5028c2ecf20Sopenharmony_ci /* 5038c2ecf20Sopenharmony_ci * We use the irq-unsafe __mod_zone_page_stat because this 5048c2ecf20Sopenharmony_ci * counter is not modified from interrupt context, and the pte 5058c2ecf20Sopenharmony_ci * lock is held(spinlock), which implies preemption disabled. 5068c2ecf20Sopenharmony_ci */ 5078c2ecf20Sopenharmony_ci __mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); 5088c2ecf20Sopenharmony_ci count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); 5098c2ecf20Sopenharmony_ci } 5108c2ecf20Sopenharmony_ci lru_cache_add(page); 5118c2ecf20Sopenharmony_ci} 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci/* 5148c2ecf20Sopenharmony_ci * If the page can not be invalidated, it is moved to the 5158c2ecf20Sopenharmony_ci * inactive list to speed up its reclaim. It is moved to the 5168c2ecf20Sopenharmony_ci * head of the list, rather than the tail, to give the flusher 5178c2ecf20Sopenharmony_ci * threads some time to write it out, as this is much more 5188c2ecf20Sopenharmony_ci * effective than the single-page writeout from reclaim. 5198c2ecf20Sopenharmony_ci * 5208c2ecf20Sopenharmony_ci * If the page isn't page_mapped and dirty/writeback, the page 5218c2ecf20Sopenharmony_ci * could reclaim asap using PG_reclaim. 5228c2ecf20Sopenharmony_ci * 5238c2ecf20Sopenharmony_ci * 1. active, mapped page -> none 5248c2ecf20Sopenharmony_ci * 2. active, dirty/writeback page -> inactive, head, PG_reclaim 5258c2ecf20Sopenharmony_ci * 3. inactive, mapped page -> none 5268c2ecf20Sopenharmony_ci * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim 5278c2ecf20Sopenharmony_ci * 5. inactive, clean -> inactive, tail 5288c2ecf20Sopenharmony_ci * 6. Others -> none 5298c2ecf20Sopenharmony_ci * 5308c2ecf20Sopenharmony_ci * In 4, why it moves inactive's head, the VM expects the page would 5318c2ecf20Sopenharmony_ci * be write it out by flusher threads as this is much more effective 5328c2ecf20Sopenharmony_ci * than the single-page writeout from reclaim. 5338c2ecf20Sopenharmony_ci */ 5348c2ecf20Sopenharmony_cistatic void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, 5358c2ecf20Sopenharmony_ci void *arg) 5368c2ecf20Sopenharmony_ci{ 5378c2ecf20Sopenharmony_ci int lru; 5388c2ecf20Sopenharmony_ci bool active; 5398c2ecf20Sopenharmony_ci int nr_pages = thp_nr_pages(page); 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci if (!PageLRU(page)) 5428c2ecf20Sopenharmony_ci return; 5438c2ecf20Sopenharmony_ci 5448c2ecf20Sopenharmony_ci if (PageUnevictable(page)) 5458c2ecf20Sopenharmony_ci return; 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci /* Some processes are using the page */ 5488c2ecf20Sopenharmony_ci if (page_mapped(page)) 5498c2ecf20Sopenharmony_ci return; 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci active = PageActive(page); 5528c2ecf20Sopenharmony_ci lru = page_lru_base_type(page); 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci del_page_from_lru_list(page, lruvec, lru + active); 5558c2ecf20Sopenharmony_ci ClearPageActive(page); 5568c2ecf20Sopenharmony_ci ClearPageReferenced(page); 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci if (PageWriteback(page) || PageDirty(page)) { 5598c2ecf20Sopenharmony_ci /* 5608c2ecf20Sopenharmony_ci * PG_reclaim could be raced with end_page_writeback 5618c2ecf20Sopenharmony_ci * It can make readahead confusing. But race window 5628c2ecf20Sopenharmony_ci * is _really_ small and it's non-critical problem. 5638c2ecf20Sopenharmony_ci */ 5648c2ecf20Sopenharmony_ci add_page_to_lru_list(page, lruvec, lru); 5658c2ecf20Sopenharmony_ci SetPageReclaim(page); 5668c2ecf20Sopenharmony_ci } else { 5678c2ecf20Sopenharmony_ci /* 5688c2ecf20Sopenharmony_ci * The page's writeback ends up during pagevec 5698c2ecf20Sopenharmony_ci * We moves tha page into tail of inactive. 5708c2ecf20Sopenharmony_ci */ 5718c2ecf20Sopenharmony_ci add_page_to_lru_list_tail(page, lruvec, lru); 5728c2ecf20Sopenharmony_ci __count_vm_events(PGROTATED, nr_pages); 5738c2ecf20Sopenharmony_ci } 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci if (active) { 5768c2ecf20Sopenharmony_ci __count_vm_events(PGDEACTIVATE, nr_pages); 5778c2ecf20Sopenharmony_ci __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, 5788c2ecf20Sopenharmony_ci nr_pages); 5798c2ecf20Sopenharmony_ci } 5808c2ecf20Sopenharmony_ci} 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_cistatic void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, 5838c2ecf20Sopenharmony_ci void *arg) 5848c2ecf20Sopenharmony_ci{ 5858c2ecf20Sopenharmony_ci if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { 5868c2ecf20Sopenharmony_ci int lru = page_lru_base_type(page); 5878c2ecf20Sopenharmony_ci int nr_pages = thp_nr_pages(page); 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); 5908c2ecf20Sopenharmony_ci ClearPageActive(page); 5918c2ecf20Sopenharmony_ci ClearPageReferenced(page); 5928c2ecf20Sopenharmony_ci add_page_to_lru_list(page, lruvec, lru); 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci __count_vm_events(PGDEACTIVATE, nr_pages); 5958c2ecf20Sopenharmony_ci __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, 5968c2ecf20Sopenharmony_ci nr_pages); 5978c2ecf20Sopenharmony_ci } 5988c2ecf20Sopenharmony_ci} 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_cistatic void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, 6018c2ecf20Sopenharmony_ci void *arg) 6028c2ecf20Sopenharmony_ci{ 6038c2ecf20Sopenharmony_ci if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && 6048c2ecf20Sopenharmony_ci !PageSwapCache(page) && !PageUnevictable(page)) { 6058c2ecf20Sopenharmony_ci bool active = PageActive(page); 6068c2ecf20Sopenharmony_ci int nr_pages = thp_nr_pages(page); 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci del_page_from_lru_list(page, lruvec, 6098c2ecf20Sopenharmony_ci LRU_INACTIVE_ANON + active); 6108c2ecf20Sopenharmony_ci ClearPageActive(page); 6118c2ecf20Sopenharmony_ci ClearPageReferenced(page); 6128c2ecf20Sopenharmony_ci /* 6138c2ecf20Sopenharmony_ci * Lazyfree pages are clean anonymous pages. They have 6148c2ecf20Sopenharmony_ci * PG_swapbacked flag cleared, to distinguish them from normal 6158c2ecf20Sopenharmony_ci * anonymous pages 6168c2ecf20Sopenharmony_ci */ 6178c2ecf20Sopenharmony_ci ClearPageSwapBacked(page); 6188c2ecf20Sopenharmony_ci add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE); 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci __count_vm_events(PGLAZYFREE, nr_pages); 6218c2ecf20Sopenharmony_ci __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, 6228c2ecf20Sopenharmony_ci nr_pages); 6238c2ecf20Sopenharmony_ci } 6248c2ecf20Sopenharmony_ci} 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci/* 6278c2ecf20Sopenharmony_ci * Drain pages out of the cpu's pagevecs. 6288c2ecf20Sopenharmony_ci * Either "cpu" is the current CPU, and preemption has already been 6298c2ecf20Sopenharmony_ci * disabled; or "cpu" is being hot-unplugged, and is already dead. 6308c2ecf20Sopenharmony_ci */ 6318c2ecf20Sopenharmony_civoid lru_add_drain_cpu(int cpu) 6328c2ecf20Sopenharmony_ci{ 6338c2ecf20Sopenharmony_ci struct pagevec *pvec = &per_cpu(lru_pvecs.lru_add, cpu); 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci if (pagevec_count(pvec)) 6368c2ecf20Sopenharmony_ci __pagevec_lru_add(pvec); 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci pvec = &per_cpu(lru_rotate.pvec, cpu); 6398c2ecf20Sopenharmony_ci /* Disabling interrupts below acts as a compiler barrier. */ 6408c2ecf20Sopenharmony_ci if (data_race(pagevec_count(pvec))) { 6418c2ecf20Sopenharmony_ci unsigned long flags; 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci /* No harm done if a racing interrupt already did this */ 6448c2ecf20Sopenharmony_ci local_lock_irqsave(&lru_rotate.lock, flags); 6458c2ecf20Sopenharmony_ci pagevec_move_tail(pvec); 6468c2ecf20Sopenharmony_ci local_unlock_irqrestore(&lru_rotate.lock, flags); 6478c2ecf20Sopenharmony_ci } 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu); 6508c2ecf20Sopenharmony_ci if (pagevec_count(pvec)) 6518c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_ci pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu); 6548c2ecf20Sopenharmony_ci if (pagevec_count(pvec)) 6558c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); 6568c2ecf20Sopenharmony_ci 6578c2ecf20Sopenharmony_ci pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu); 6588c2ecf20Sopenharmony_ci if (pagevec_count(pvec)) 6598c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); 6608c2ecf20Sopenharmony_ci 6618c2ecf20Sopenharmony_ci activate_page_drain(cpu); 6628c2ecf20Sopenharmony_ci} 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci/** 6658c2ecf20Sopenharmony_ci * deactivate_file_page - forcefully deactivate a file page 6668c2ecf20Sopenharmony_ci * @page: page to deactivate 6678c2ecf20Sopenharmony_ci * 6688c2ecf20Sopenharmony_ci * This function hints the VM that @page is a good reclaim candidate, 6698c2ecf20Sopenharmony_ci * for example if its invalidation fails due to the page being dirty 6708c2ecf20Sopenharmony_ci * or under writeback. 6718c2ecf20Sopenharmony_ci */ 6728c2ecf20Sopenharmony_civoid deactivate_file_page(struct page *page) 6738c2ecf20Sopenharmony_ci{ 6748c2ecf20Sopenharmony_ci /* 6758c2ecf20Sopenharmony_ci * In a workload with many unevictable page such as mprotect, 6768c2ecf20Sopenharmony_ci * unevictable page deactivation for accelerating reclaim is pointless. 6778c2ecf20Sopenharmony_ci */ 6788c2ecf20Sopenharmony_ci if (PageUnevictable(page)) 6798c2ecf20Sopenharmony_ci return; 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci if (likely(get_page_unless_zero(page))) { 6828c2ecf20Sopenharmony_ci struct pagevec *pvec; 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci local_lock(&lru_pvecs.lock); 6858c2ecf20Sopenharmony_ci pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file); 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci if (!pagevec_add(pvec, page) || PageCompound(page)) 6888c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); 6898c2ecf20Sopenharmony_ci local_unlock(&lru_pvecs.lock); 6908c2ecf20Sopenharmony_ci } 6918c2ecf20Sopenharmony_ci} 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci/* 6948c2ecf20Sopenharmony_ci * deactivate_page - deactivate a page 6958c2ecf20Sopenharmony_ci * @page: page to deactivate 6968c2ecf20Sopenharmony_ci * 6978c2ecf20Sopenharmony_ci * deactivate_page() moves @page to the inactive list if @page was on the active 6988c2ecf20Sopenharmony_ci * list and was not an unevictable page. This is done to accelerate the reclaim 6998c2ecf20Sopenharmony_ci * of @page. 7008c2ecf20Sopenharmony_ci */ 7018c2ecf20Sopenharmony_civoid deactivate_page(struct page *page) 7028c2ecf20Sopenharmony_ci{ 7038c2ecf20Sopenharmony_ci if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { 7048c2ecf20Sopenharmony_ci struct pagevec *pvec; 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci local_lock(&lru_pvecs.lock); 7078c2ecf20Sopenharmony_ci pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate); 7088c2ecf20Sopenharmony_ci get_page(page); 7098c2ecf20Sopenharmony_ci if (!pagevec_add(pvec, page) || PageCompound(page)) 7108c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); 7118c2ecf20Sopenharmony_ci local_unlock(&lru_pvecs.lock); 7128c2ecf20Sopenharmony_ci } 7138c2ecf20Sopenharmony_ci} 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ci/** 7168c2ecf20Sopenharmony_ci * mark_page_lazyfree - make an anon page lazyfree 7178c2ecf20Sopenharmony_ci * @page: page to deactivate 7188c2ecf20Sopenharmony_ci * 7198c2ecf20Sopenharmony_ci * mark_page_lazyfree() moves @page to the inactive file list. 7208c2ecf20Sopenharmony_ci * This is done to accelerate the reclaim of @page. 7218c2ecf20Sopenharmony_ci */ 7228c2ecf20Sopenharmony_civoid mark_page_lazyfree(struct page *page) 7238c2ecf20Sopenharmony_ci{ 7248c2ecf20Sopenharmony_ci if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && 7258c2ecf20Sopenharmony_ci !PageSwapCache(page) && !PageUnevictable(page)) { 7268c2ecf20Sopenharmony_ci struct pagevec *pvec; 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci local_lock(&lru_pvecs.lock); 7298c2ecf20Sopenharmony_ci pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree); 7308c2ecf20Sopenharmony_ci get_page(page); 7318c2ecf20Sopenharmony_ci if (!pagevec_add(pvec, page) || PageCompound(page)) 7328c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); 7338c2ecf20Sopenharmony_ci local_unlock(&lru_pvecs.lock); 7348c2ecf20Sopenharmony_ci } 7358c2ecf20Sopenharmony_ci} 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_civoid lru_add_drain(void) 7388c2ecf20Sopenharmony_ci{ 7398c2ecf20Sopenharmony_ci local_lock(&lru_pvecs.lock); 7408c2ecf20Sopenharmony_ci lru_add_drain_cpu(smp_processor_id()); 7418c2ecf20Sopenharmony_ci local_unlock(&lru_pvecs.lock); 7428c2ecf20Sopenharmony_ci} 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_civoid lru_add_drain_cpu_zone(struct zone *zone) 7458c2ecf20Sopenharmony_ci{ 7468c2ecf20Sopenharmony_ci local_lock(&lru_pvecs.lock); 7478c2ecf20Sopenharmony_ci lru_add_drain_cpu(smp_processor_id()); 7488c2ecf20Sopenharmony_ci drain_local_pages(zone); 7498c2ecf20Sopenharmony_ci local_unlock(&lru_pvecs.lock); 7508c2ecf20Sopenharmony_ci} 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 7538c2ecf20Sopenharmony_ci 7548c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); 7558c2ecf20Sopenharmony_ci 7568c2ecf20Sopenharmony_cistatic void lru_add_drain_per_cpu(struct work_struct *dummy) 7578c2ecf20Sopenharmony_ci{ 7588c2ecf20Sopenharmony_ci lru_add_drain(); 7598c2ecf20Sopenharmony_ci} 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci/* 7628c2ecf20Sopenharmony_ci * Doesn't need any cpu hotplug locking because we do rely on per-cpu 7638c2ecf20Sopenharmony_ci * kworkers being shut down before our page_alloc_cpu_dead callback is 7648c2ecf20Sopenharmony_ci * executed on the offlined cpu. 7658c2ecf20Sopenharmony_ci * Calling this function with cpu hotplug locks held can actually lead 7668c2ecf20Sopenharmony_ci * to obscure indirect dependencies via WQ context. 7678c2ecf20Sopenharmony_ci */ 7688c2ecf20Sopenharmony_civoid lru_add_drain_all(void) 7698c2ecf20Sopenharmony_ci{ 7708c2ecf20Sopenharmony_ci /* 7718c2ecf20Sopenharmony_ci * lru_drain_gen - Global pages generation number 7728c2ecf20Sopenharmony_ci * 7738c2ecf20Sopenharmony_ci * (A) Definition: global lru_drain_gen = x implies that all generations 7748c2ecf20Sopenharmony_ci * 0 < n <= x are already *scheduled* for draining. 7758c2ecf20Sopenharmony_ci * 7768c2ecf20Sopenharmony_ci * This is an optimization for the highly-contended use case where a 7778c2ecf20Sopenharmony_ci * user space workload keeps constantly generating a flow of pages for 7788c2ecf20Sopenharmony_ci * each CPU. 7798c2ecf20Sopenharmony_ci */ 7808c2ecf20Sopenharmony_ci static unsigned int lru_drain_gen; 7818c2ecf20Sopenharmony_ci static struct cpumask has_work; 7828c2ecf20Sopenharmony_ci static DEFINE_MUTEX(lock); 7838c2ecf20Sopenharmony_ci unsigned cpu, this_gen; 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci /* 7868c2ecf20Sopenharmony_ci * Make sure nobody triggers this path before mm_percpu_wq is fully 7878c2ecf20Sopenharmony_ci * initialized. 7888c2ecf20Sopenharmony_ci */ 7898c2ecf20Sopenharmony_ci if (WARN_ON(!mm_percpu_wq)) 7908c2ecf20Sopenharmony_ci return; 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci /* 7938c2ecf20Sopenharmony_ci * Guarantee pagevec counter stores visible by this CPU are visible to 7948c2ecf20Sopenharmony_ci * other CPUs before loading the current drain generation. 7958c2ecf20Sopenharmony_ci */ 7968c2ecf20Sopenharmony_ci smp_mb(); 7978c2ecf20Sopenharmony_ci 7988c2ecf20Sopenharmony_ci /* 7998c2ecf20Sopenharmony_ci * (B) Locally cache global LRU draining generation number 8008c2ecf20Sopenharmony_ci * 8018c2ecf20Sopenharmony_ci * The read barrier ensures that the counter is loaded before the mutex 8028c2ecf20Sopenharmony_ci * is taken. It pairs with smp_mb() inside the mutex critical section 8038c2ecf20Sopenharmony_ci * at (D). 8048c2ecf20Sopenharmony_ci */ 8058c2ecf20Sopenharmony_ci this_gen = smp_load_acquire(&lru_drain_gen); 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci mutex_lock(&lock); 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci /* 8108c2ecf20Sopenharmony_ci * (C) Exit the draining operation if a newer generation, from another 8118c2ecf20Sopenharmony_ci * lru_add_drain_all(), was already scheduled for draining. Check (A). 8128c2ecf20Sopenharmony_ci */ 8138c2ecf20Sopenharmony_ci if (unlikely(this_gen != lru_drain_gen)) 8148c2ecf20Sopenharmony_ci goto done; 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci /* 8178c2ecf20Sopenharmony_ci * (D) Increment global generation number 8188c2ecf20Sopenharmony_ci * 8198c2ecf20Sopenharmony_ci * Pairs with smp_load_acquire() at (B), outside of the critical 8208c2ecf20Sopenharmony_ci * section. Use a full memory barrier to guarantee that the new global 8218c2ecf20Sopenharmony_ci * drain generation number is stored before loading pagevec counters. 8228c2ecf20Sopenharmony_ci * 8238c2ecf20Sopenharmony_ci * This pairing must be done here, before the for_each_online_cpu loop 8248c2ecf20Sopenharmony_ci * below which drains the page vectors. 8258c2ecf20Sopenharmony_ci * 8268c2ecf20Sopenharmony_ci * Let x, y, and z represent some system CPU numbers, where x < y < z. 8278c2ecf20Sopenharmony_ci * Assume CPU #z is is in the middle of the for_each_online_cpu loop 8288c2ecf20Sopenharmony_ci * below and has already reached CPU #y's per-cpu data. CPU #x comes 8298c2ecf20Sopenharmony_ci * along, adds some pages to its per-cpu vectors, then calls 8308c2ecf20Sopenharmony_ci * lru_add_drain_all(). 8318c2ecf20Sopenharmony_ci * 8328c2ecf20Sopenharmony_ci * If the paired barrier is done at any later step, e.g. after the 8338c2ecf20Sopenharmony_ci * loop, CPU #x will just exit at (C) and miss flushing out all of its 8348c2ecf20Sopenharmony_ci * added pages. 8358c2ecf20Sopenharmony_ci */ 8368c2ecf20Sopenharmony_ci WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1); 8378c2ecf20Sopenharmony_ci smp_mb(); 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci cpumask_clear(&has_work); 8408c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 8418c2ecf20Sopenharmony_ci struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_ci if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) || 8448c2ecf20Sopenharmony_ci data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) || 8458c2ecf20Sopenharmony_ci pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) || 8468c2ecf20Sopenharmony_ci pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) || 8478c2ecf20Sopenharmony_ci pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) || 8488c2ecf20Sopenharmony_ci need_activate_page_drain(cpu)) { 8498c2ecf20Sopenharmony_ci INIT_WORK(work, lru_add_drain_per_cpu); 8508c2ecf20Sopenharmony_ci queue_work_on(cpu, mm_percpu_wq, work); 8518c2ecf20Sopenharmony_ci __cpumask_set_cpu(cpu, &has_work); 8528c2ecf20Sopenharmony_ci } 8538c2ecf20Sopenharmony_ci } 8548c2ecf20Sopenharmony_ci 8558c2ecf20Sopenharmony_ci for_each_cpu(cpu, &has_work) 8568c2ecf20Sopenharmony_ci flush_work(&per_cpu(lru_add_drain_work, cpu)); 8578c2ecf20Sopenharmony_ci 8588c2ecf20Sopenharmony_cidone: 8598c2ecf20Sopenharmony_ci mutex_unlock(&lock); 8608c2ecf20Sopenharmony_ci} 8618c2ecf20Sopenharmony_ci#else 8628c2ecf20Sopenharmony_civoid lru_add_drain_all(void) 8638c2ecf20Sopenharmony_ci{ 8648c2ecf20Sopenharmony_ci lru_add_drain(); 8658c2ecf20Sopenharmony_ci} 8668c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */ 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci/** 8698c2ecf20Sopenharmony_ci * release_pages - batched put_page() 8708c2ecf20Sopenharmony_ci * @pages: array of pages to release 8718c2ecf20Sopenharmony_ci * @nr: number of pages 8728c2ecf20Sopenharmony_ci * 8738c2ecf20Sopenharmony_ci * Decrement the reference count on all the pages in @pages. If it 8748c2ecf20Sopenharmony_ci * fell to zero, remove the page from the LRU and free it. 8758c2ecf20Sopenharmony_ci */ 8768c2ecf20Sopenharmony_civoid release_pages(struct page **pages, int nr) 8778c2ecf20Sopenharmony_ci{ 8788c2ecf20Sopenharmony_ci int i; 8798c2ecf20Sopenharmony_ci LIST_HEAD(pages_to_free); 8808c2ecf20Sopenharmony_ci struct pglist_data *locked_pgdat = NULL; 8818c2ecf20Sopenharmony_ci struct lruvec *lruvec; 8828c2ecf20Sopenharmony_ci unsigned long flags; 8838c2ecf20Sopenharmony_ci unsigned int lock_batch; 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 8868c2ecf20Sopenharmony_ci struct page *page = pages[i]; 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci /* 8898c2ecf20Sopenharmony_ci * Make sure the IRQ-safe lock-holding time does not get 8908c2ecf20Sopenharmony_ci * excessive with a continuous string of pages from the 8918c2ecf20Sopenharmony_ci * same pgdat. The lock is held only if pgdat != NULL. 8928c2ecf20Sopenharmony_ci */ 8938c2ecf20Sopenharmony_ci if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) { 8948c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); 8958c2ecf20Sopenharmony_ci locked_pgdat = NULL; 8968c2ecf20Sopenharmony_ci } 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci page = compound_head(page); 8998c2ecf20Sopenharmony_ci if (is_huge_zero_page(page)) 9008c2ecf20Sopenharmony_ci continue; 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_ci if (is_zone_device_page(page)) { 9038c2ecf20Sopenharmony_ci if (locked_pgdat) { 9048c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&locked_pgdat->lru_lock, 9058c2ecf20Sopenharmony_ci flags); 9068c2ecf20Sopenharmony_ci locked_pgdat = NULL; 9078c2ecf20Sopenharmony_ci } 9088c2ecf20Sopenharmony_ci /* 9098c2ecf20Sopenharmony_ci * ZONE_DEVICE pages that return 'false' from 9108c2ecf20Sopenharmony_ci * page_is_devmap_managed() do not require special 9118c2ecf20Sopenharmony_ci * processing, and instead, expect a call to 9128c2ecf20Sopenharmony_ci * put_page_testzero(). 9138c2ecf20Sopenharmony_ci */ 9148c2ecf20Sopenharmony_ci if (page_is_devmap_managed(page)) { 9158c2ecf20Sopenharmony_ci put_devmap_managed_page(page); 9168c2ecf20Sopenharmony_ci continue; 9178c2ecf20Sopenharmony_ci } 9188c2ecf20Sopenharmony_ci } 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci if (!put_page_testzero(page)) 9218c2ecf20Sopenharmony_ci continue; 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci if (PageCompound(page)) { 9248c2ecf20Sopenharmony_ci if (locked_pgdat) { 9258c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); 9268c2ecf20Sopenharmony_ci locked_pgdat = NULL; 9278c2ecf20Sopenharmony_ci } 9288c2ecf20Sopenharmony_ci __put_compound_page(page); 9298c2ecf20Sopenharmony_ci continue; 9308c2ecf20Sopenharmony_ci } 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci if (PageLRU(page)) { 9338c2ecf20Sopenharmony_ci struct pglist_data *pgdat = page_pgdat(page); 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci if (pgdat != locked_pgdat) { 9368c2ecf20Sopenharmony_ci if (locked_pgdat) 9378c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&locked_pgdat->lru_lock, 9388c2ecf20Sopenharmony_ci flags); 9398c2ecf20Sopenharmony_ci lock_batch = 0; 9408c2ecf20Sopenharmony_ci locked_pgdat = pgdat; 9418c2ecf20Sopenharmony_ci spin_lock_irqsave(&locked_pgdat->lru_lock, flags); 9428c2ecf20Sopenharmony_ci } 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_ci lruvec = mem_cgroup_page_lruvec(page, locked_pgdat); 9458c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLRU(page), page); 9468c2ecf20Sopenharmony_ci __ClearPageLRU(page); 9478c2ecf20Sopenharmony_ci del_page_from_lru_list(page, lruvec, page_off_lru(page)); 9488c2ecf20Sopenharmony_ci } 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci __ClearPageWaiters(page); 9518c2ecf20Sopenharmony_ci 9528c2ecf20Sopenharmony_ci list_add(&page->lru, &pages_to_free); 9538c2ecf20Sopenharmony_ci } 9548c2ecf20Sopenharmony_ci if (locked_pgdat) 9558c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci mem_cgroup_uncharge_list(&pages_to_free); 9588c2ecf20Sopenharmony_ci free_unref_page_list(&pages_to_free); 9598c2ecf20Sopenharmony_ci} 9608c2ecf20Sopenharmony_ciEXPORT_SYMBOL(release_pages); 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci/* 9638c2ecf20Sopenharmony_ci * The pages which we're about to release may be in the deferred lru-addition 9648c2ecf20Sopenharmony_ci * queues. That would prevent them from really being freed right now. That's 9658c2ecf20Sopenharmony_ci * OK from a correctness point of view but is inefficient - those pages may be 9668c2ecf20Sopenharmony_ci * cache-warm and we want to give them back to the page allocator ASAP. 9678c2ecf20Sopenharmony_ci * 9688c2ecf20Sopenharmony_ci * So __pagevec_release() will drain those queues here. __pagevec_lru_add() 9698c2ecf20Sopenharmony_ci * and __pagevec_lru_add_active() call release_pages() directly to avoid 9708c2ecf20Sopenharmony_ci * mutual recursion. 9718c2ecf20Sopenharmony_ci */ 9728c2ecf20Sopenharmony_civoid __pagevec_release(struct pagevec *pvec) 9738c2ecf20Sopenharmony_ci{ 9748c2ecf20Sopenharmony_ci if (!pvec->percpu_pvec_drained) { 9758c2ecf20Sopenharmony_ci lru_add_drain(); 9768c2ecf20Sopenharmony_ci pvec->percpu_pvec_drained = true; 9778c2ecf20Sopenharmony_ci } 9788c2ecf20Sopenharmony_ci release_pages(pvec->pages, pagevec_count(pvec)); 9798c2ecf20Sopenharmony_ci pagevec_reinit(pvec); 9808c2ecf20Sopenharmony_ci} 9818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__pagevec_release); 9828c2ecf20Sopenharmony_ci 9838c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 9848c2ecf20Sopenharmony_ci/* used by __split_huge_page_refcount() */ 9858c2ecf20Sopenharmony_civoid lru_add_page_tail(struct page *page, struct page *page_tail, 9868c2ecf20Sopenharmony_ci struct lruvec *lruvec, struct list_head *list) 9878c2ecf20Sopenharmony_ci{ 9888c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageHead(page), page); 9898c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageCompound(page_tail), page); 9908c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageLRU(page_tail), page); 9918c2ecf20Sopenharmony_ci lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock); 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci if (!list) 9948c2ecf20Sopenharmony_ci SetPageLRU(page_tail); 9958c2ecf20Sopenharmony_ci 9968c2ecf20Sopenharmony_ci if (likely(PageLRU(page))) 9978c2ecf20Sopenharmony_ci list_add_tail(&page_tail->lru, &page->lru); 9988c2ecf20Sopenharmony_ci else if (list) { 9998c2ecf20Sopenharmony_ci /* page reclaim is reclaiming a huge page */ 10008c2ecf20Sopenharmony_ci get_page(page_tail); 10018c2ecf20Sopenharmony_ci list_add_tail(&page_tail->lru, list); 10028c2ecf20Sopenharmony_ci } else { 10038c2ecf20Sopenharmony_ci /* 10048c2ecf20Sopenharmony_ci * Head page has not yet been counted, as an hpage, 10058c2ecf20Sopenharmony_ci * so we must account for each subpage individually. 10068c2ecf20Sopenharmony_ci * 10078c2ecf20Sopenharmony_ci * Put page_tail on the list at the correct position 10088c2ecf20Sopenharmony_ci * so they all end up in order. 10098c2ecf20Sopenharmony_ci */ 10108c2ecf20Sopenharmony_ci add_page_to_lru_list_tail(page_tail, lruvec, 10118c2ecf20Sopenharmony_ci page_lru(page_tail)); 10128c2ecf20Sopenharmony_ci } 10138c2ecf20Sopenharmony_ci} 10148c2ecf20Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 10158c2ecf20Sopenharmony_ci 10168c2ecf20Sopenharmony_cistatic void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, 10178c2ecf20Sopenharmony_ci void *arg) 10188c2ecf20Sopenharmony_ci{ 10198c2ecf20Sopenharmony_ci enum lru_list lru; 10208c2ecf20Sopenharmony_ci int was_unevictable = TestClearPageUnevictable(page); 10218c2ecf20Sopenharmony_ci int nr_pages = thp_nr_pages(page); 10228c2ecf20Sopenharmony_ci 10238c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageLRU(page), page); 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_ci /* 10268c2ecf20Sopenharmony_ci * Page becomes evictable in two ways: 10278c2ecf20Sopenharmony_ci * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()]. 10288c2ecf20Sopenharmony_ci * 2) Before acquiring LRU lock to put the page to correct LRU and then 10298c2ecf20Sopenharmony_ci * a) do PageLRU check with lock [check_move_unevictable_pages] 10308c2ecf20Sopenharmony_ci * b) do PageLRU check before lock [clear_page_mlock] 10318c2ecf20Sopenharmony_ci * 10328c2ecf20Sopenharmony_ci * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need 10338c2ecf20Sopenharmony_ci * following strict ordering: 10348c2ecf20Sopenharmony_ci * 10358c2ecf20Sopenharmony_ci * #0: __pagevec_lru_add_fn #1: clear_page_mlock 10368c2ecf20Sopenharmony_ci * 10378c2ecf20Sopenharmony_ci * SetPageLRU() TestClearPageMlocked() 10388c2ecf20Sopenharmony_ci * smp_mb() // explicit ordering // above provides strict 10398c2ecf20Sopenharmony_ci * // ordering 10408c2ecf20Sopenharmony_ci * PageMlocked() PageLRU() 10418c2ecf20Sopenharmony_ci * 10428c2ecf20Sopenharmony_ci * 10438c2ecf20Sopenharmony_ci * if '#1' does not observe setting of PG_lru by '#0' and fails 10448c2ecf20Sopenharmony_ci * isolation, the explicit barrier will make sure that page_evictable 10458c2ecf20Sopenharmony_ci * check will put the page in correct LRU. Without smp_mb(), SetPageLRU 10468c2ecf20Sopenharmony_ci * can be reordered after PageMlocked check and can make '#1' to fail 10478c2ecf20Sopenharmony_ci * the isolation of the page whose Mlocked bit is cleared (#0 is also 10488c2ecf20Sopenharmony_ci * looking at the same page) and the evictable page will be stranded 10498c2ecf20Sopenharmony_ci * in an unevictable LRU. 10508c2ecf20Sopenharmony_ci */ 10518c2ecf20Sopenharmony_ci SetPageLRU(page); 10528c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 10538c2ecf20Sopenharmony_ci 10548c2ecf20Sopenharmony_ci if (page_evictable(page)) { 10558c2ecf20Sopenharmony_ci lru = page_lru(page); 10568c2ecf20Sopenharmony_ci if (was_unevictable) 10578c2ecf20Sopenharmony_ci __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); 10588c2ecf20Sopenharmony_ci } else { 10598c2ecf20Sopenharmony_ci lru = LRU_UNEVICTABLE; 10608c2ecf20Sopenharmony_ci ClearPageActive(page); 10618c2ecf20Sopenharmony_ci SetPageUnevictable(page); 10628c2ecf20Sopenharmony_ci if (!was_unevictable) 10638c2ecf20Sopenharmony_ci __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); 10648c2ecf20Sopenharmony_ci } 10658c2ecf20Sopenharmony_ci 10668c2ecf20Sopenharmony_ci add_page_to_lru_list(page, lruvec, lru); 10678c2ecf20Sopenharmony_ci trace_mm_lru_insertion(page, lru); 10688c2ecf20Sopenharmony_ci} 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_ci/* 10718c2ecf20Sopenharmony_ci * Add the passed pages to the LRU, then drop the caller's refcount 10728c2ecf20Sopenharmony_ci * on them. Reinitialises the caller's pagevec. 10738c2ecf20Sopenharmony_ci */ 10748c2ecf20Sopenharmony_civoid __pagevec_lru_add(struct pagevec *pvec) 10758c2ecf20Sopenharmony_ci{ 10768c2ecf20Sopenharmony_ci pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL); 10778c2ecf20Sopenharmony_ci} 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci/** 10808c2ecf20Sopenharmony_ci * pagevec_lookup_entries - gang pagecache lookup 10818c2ecf20Sopenharmony_ci * @pvec: Where the resulting entries are placed 10828c2ecf20Sopenharmony_ci * @mapping: The address_space to search 10838c2ecf20Sopenharmony_ci * @start: The starting entry index 10848c2ecf20Sopenharmony_ci * @nr_entries: The maximum number of pages 10858c2ecf20Sopenharmony_ci * @indices: The cache indices corresponding to the entries in @pvec 10868c2ecf20Sopenharmony_ci * 10878c2ecf20Sopenharmony_ci * pagevec_lookup_entries() will search for and return a group of up 10888c2ecf20Sopenharmony_ci * to @nr_pages pages and shadow entries in the mapping. All 10898c2ecf20Sopenharmony_ci * entries are placed in @pvec. pagevec_lookup_entries() takes a 10908c2ecf20Sopenharmony_ci * reference against actual pages in @pvec. 10918c2ecf20Sopenharmony_ci * 10928c2ecf20Sopenharmony_ci * The search returns a group of mapping-contiguous entries with 10938c2ecf20Sopenharmony_ci * ascending indexes. There may be holes in the indices due to 10948c2ecf20Sopenharmony_ci * not-present entries. 10958c2ecf20Sopenharmony_ci * 10968c2ecf20Sopenharmony_ci * Only one subpage of a Transparent Huge Page is returned in one call: 10978c2ecf20Sopenharmony_ci * allowing truncate_inode_pages_range() to evict the whole THP without 10988c2ecf20Sopenharmony_ci * cycling through a pagevec of extra references. 10998c2ecf20Sopenharmony_ci * 11008c2ecf20Sopenharmony_ci * pagevec_lookup_entries() returns the number of entries which were 11018c2ecf20Sopenharmony_ci * found. 11028c2ecf20Sopenharmony_ci */ 11038c2ecf20Sopenharmony_ciunsigned pagevec_lookup_entries(struct pagevec *pvec, 11048c2ecf20Sopenharmony_ci struct address_space *mapping, 11058c2ecf20Sopenharmony_ci pgoff_t start, unsigned nr_entries, 11068c2ecf20Sopenharmony_ci pgoff_t *indices) 11078c2ecf20Sopenharmony_ci{ 11088c2ecf20Sopenharmony_ci pvec->nr = find_get_entries(mapping, start, nr_entries, 11098c2ecf20Sopenharmony_ci pvec->pages, indices); 11108c2ecf20Sopenharmony_ci return pagevec_count(pvec); 11118c2ecf20Sopenharmony_ci} 11128c2ecf20Sopenharmony_ci 11138c2ecf20Sopenharmony_ci/** 11148c2ecf20Sopenharmony_ci * pagevec_remove_exceptionals - pagevec exceptionals pruning 11158c2ecf20Sopenharmony_ci * @pvec: The pagevec to prune 11168c2ecf20Sopenharmony_ci * 11178c2ecf20Sopenharmony_ci * pagevec_lookup_entries() fills both pages and exceptional radix 11188c2ecf20Sopenharmony_ci * tree entries into the pagevec. This function prunes all 11198c2ecf20Sopenharmony_ci * exceptionals from @pvec without leaving holes, so that it can be 11208c2ecf20Sopenharmony_ci * passed on to page-only pagevec operations. 11218c2ecf20Sopenharmony_ci */ 11228c2ecf20Sopenharmony_civoid pagevec_remove_exceptionals(struct pagevec *pvec) 11238c2ecf20Sopenharmony_ci{ 11248c2ecf20Sopenharmony_ci int i, j; 11258c2ecf20Sopenharmony_ci 11268c2ecf20Sopenharmony_ci for (i = 0, j = 0; i < pagevec_count(pvec); i++) { 11278c2ecf20Sopenharmony_ci struct page *page = pvec->pages[i]; 11288c2ecf20Sopenharmony_ci if (!xa_is_value(page)) 11298c2ecf20Sopenharmony_ci pvec->pages[j++] = page; 11308c2ecf20Sopenharmony_ci } 11318c2ecf20Sopenharmony_ci pvec->nr = j; 11328c2ecf20Sopenharmony_ci} 11338c2ecf20Sopenharmony_ci 11348c2ecf20Sopenharmony_ci/** 11358c2ecf20Sopenharmony_ci * pagevec_lookup_range - gang pagecache lookup 11368c2ecf20Sopenharmony_ci * @pvec: Where the resulting pages are placed 11378c2ecf20Sopenharmony_ci * @mapping: The address_space to search 11388c2ecf20Sopenharmony_ci * @start: The starting page index 11398c2ecf20Sopenharmony_ci * @end: The final page index 11408c2ecf20Sopenharmony_ci * 11418c2ecf20Sopenharmony_ci * pagevec_lookup_range() will search for & return a group of up to PAGEVEC_SIZE 11428c2ecf20Sopenharmony_ci * pages in the mapping starting from index @start and upto index @end 11438c2ecf20Sopenharmony_ci * (inclusive). The pages are placed in @pvec. pagevec_lookup() takes a 11448c2ecf20Sopenharmony_ci * reference against the pages in @pvec. 11458c2ecf20Sopenharmony_ci * 11468c2ecf20Sopenharmony_ci * The search returns a group of mapping-contiguous pages with ascending 11478c2ecf20Sopenharmony_ci * indexes. There may be holes in the indices due to not-present pages. We 11488c2ecf20Sopenharmony_ci * also update @start to index the next page for the traversal. 11498c2ecf20Sopenharmony_ci * 11508c2ecf20Sopenharmony_ci * pagevec_lookup_range() returns the number of pages which were found. If this 11518c2ecf20Sopenharmony_ci * number is smaller than PAGEVEC_SIZE, the end of specified range has been 11528c2ecf20Sopenharmony_ci * reached. 11538c2ecf20Sopenharmony_ci */ 11548c2ecf20Sopenharmony_ciunsigned pagevec_lookup_range(struct pagevec *pvec, 11558c2ecf20Sopenharmony_ci struct address_space *mapping, pgoff_t *start, pgoff_t end) 11568c2ecf20Sopenharmony_ci{ 11578c2ecf20Sopenharmony_ci pvec->nr = find_get_pages_range(mapping, start, end, PAGEVEC_SIZE, 11588c2ecf20Sopenharmony_ci pvec->pages); 11598c2ecf20Sopenharmony_ci return pagevec_count(pvec); 11608c2ecf20Sopenharmony_ci} 11618c2ecf20Sopenharmony_ciEXPORT_SYMBOL(pagevec_lookup_range); 11628c2ecf20Sopenharmony_ci 11638c2ecf20Sopenharmony_ciunsigned pagevec_lookup_range_tag(struct pagevec *pvec, 11648c2ecf20Sopenharmony_ci struct address_space *mapping, pgoff_t *index, pgoff_t end, 11658c2ecf20Sopenharmony_ci xa_mark_t tag) 11668c2ecf20Sopenharmony_ci{ 11678c2ecf20Sopenharmony_ci pvec->nr = find_get_pages_range_tag(mapping, index, end, tag, 11688c2ecf20Sopenharmony_ci PAGEVEC_SIZE, pvec->pages); 11698c2ecf20Sopenharmony_ci return pagevec_count(pvec); 11708c2ecf20Sopenharmony_ci} 11718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(pagevec_lookup_range_tag); 11728c2ecf20Sopenharmony_ci 11738c2ecf20Sopenharmony_ciunsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, 11748c2ecf20Sopenharmony_ci struct address_space *mapping, pgoff_t *index, pgoff_t end, 11758c2ecf20Sopenharmony_ci xa_mark_t tag, unsigned max_pages) 11768c2ecf20Sopenharmony_ci{ 11778c2ecf20Sopenharmony_ci pvec->nr = find_get_pages_range_tag(mapping, index, end, tag, 11788c2ecf20Sopenharmony_ci min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages); 11798c2ecf20Sopenharmony_ci return pagevec_count(pvec); 11808c2ecf20Sopenharmony_ci} 11818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(pagevec_lookup_range_nr_tag); 11828c2ecf20Sopenharmony_ci/* 11838c2ecf20Sopenharmony_ci * Perform any setup for the swap system 11848c2ecf20Sopenharmony_ci */ 11858c2ecf20Sopenharmony_civoid __init swap_setup(void) 11868c2ecf20Sopenharmony_ci{ 11878c2ecf20Sopenharmony_ci unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT); 11888c2ecf20Sopenharmony_ci 11898c2ecf20Sopenharmony_ci /* Use a smaller cluster for small-memory machines */ 11908c2ecf20Sopenharmony_ci if (megs < 16) 11918c2ecf20Sopenharmony_ci page_cluster = 2; 11928c2ecf20Sopenharmony_ci else 11938c2ecf20Sopenharmony_ci page_cluster = 3; 11948c2ecf20Sopenharmony_ci /* 11958c2ecf20Sopenharmony_ci * Right now other parts of the system means that we 11968c2ecf20Sopenharmony_ci * _really_ don't want to cluster much more 11978c2ecf20Sopenharmony_ci */ 11988c2ecf20Sopenharmony_ci} 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci#ifdef CONFIG_DEV_PAGEMAP_OPS 12018c2ecf20Sopenharmony_civoid put_devmap_managed_page(struct page *page) 12028c2ecf20Sopenharmony_ci{ 12038c2ecf20Sopenharmony_ci int count; 12048c2ecf20Sopenharmony_ci 12058c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!page_is_devmap_managed(page))) 12068c2ecf20Sopenharmony_ci return; 12078c2ecf20Sopenharmony_ci 12088c2ecf20Sopenharmony_ci count = page_ref_dec_return(page); 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci /* 12118c2ecf20Sopenharmony_ci * devmap page refcounts are 1-based, rather than 0-based: if 12128c2ecf20Sopenharmony_ci * refcount is 1, then the page is free and the refcount is 12138c2ecf20Sopenharmony_ci * stable because nobody holds a reference on the page. 12148c2ecf20Sopenharmony_ci */ 12158c2ecf20Sopenharmony_ci if (count == 1) 12168c2ecf20Sopenharmony_ci free_devmap_managed_page(page); 12178c2ecf20Sopenharmony_ci else if (!count) 12188c2ecf20Sopenharmony_ci __put_page(page); 12198c2ecf20Sopenharmony_ci} 12208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(put_devmap_managed_page); 12218c2ecf20Sopenharmony_ci#endif 1222