18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/mm/swap_state.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 68c2ecf20Sopenharmony_ci * Swap reorganised 29.12.95, Stephen Tweedie 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Rewritten to use page cache, (C) 1998 Stephen Tweedie 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci#include <linux/mm.h> 118c2ecf20Sopenharmony_ci#include <linux/gfp.h> 128c2ecf20Sopenharmony_ci#include <linux/kernel_stat.h> 138c2ecf20Sopenharmony_ci#include <linux/swap.h> 148c2ecf20Sopenharmony_ci#include <linux/swapops.h> 158c2ecf20Sopenharmony_ci#include <linux/init.h> 168c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 178c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 188c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 198c2ecf20Sopenharmony_ci#include <linux/pagevec.h> 208c2ecf20Sopenharmony_ci#include <linux/migrate.h> 218c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 228c2ecf20Sopenharmony_ci#include <linux/swap_slots.h> 238c2ecf20Sopenharmony_ci#include <linux/huge_mm.h> 248c2ecf20Sopenharmony_ci#include <linux/shmem_fs.h> 258c2ecf20Sopenharmony_ci#include "internal.h" 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci/* 288c2ecf20Sopenharmony_ci * swapper_space is a fiction, retained to simplify the path through 298c2ecf20Sopenharmony_ci * vmscan's shrink_page_list. 308c2ecf20Sopenharmony_ci */ 318c2ecf20Sopenharmony_cistatic const struct address_space_operations swap_aops = { 328c2ecf20Sopenharmony_ci .writepage = swap_writepage, 338c2ecf20Sopenharmony_ci .set_page_dirty = swap_set_page_dirty, 348c2ecf20Sopenharmony_ci#ifdef CONFIG_MIGRATION 358c2ecf20Sopenharmony_ci .migratepage = migrate_page, 368c2ecf20Sopenharmony_ci#endif 378c2ecf20Sopenharmony_ci}; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cistruct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; 408c2ecf20Sopenharmony_cistatic unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; 418c2ecf20Sopenharmony_cistatic bool enable_vma_readahead __read_mostly = true; 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) 448c2ecf20Sopenharmony_ci#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) 458c2ecf20Sopenharmony_ci#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK 468c2ecf20Sopenharmony_ci#define SWAP_RA_WIN_MASK (~PAGE_MASK & ~SWAP_RA_HITS_MASK) 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci#define SWAP_RA_HITS(v) ((v) & SWAP_RA_HITS_MASK) 498c2ecf20Sopenharmony_ci#define SWAP_RA_WIN(v) (((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT) 508c2ecf20Sopenharmony_ci#define SWAP_RA_ADDR(v) ((v) & PAGE_MASK) 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#define SWAP_RA_VAL(addr, win, hits) \ 538c2ecf20Sopenharmony_ci (((addr) & PAGE_MASK) | \ 548c2ecf20Sopenharmony_ci (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) | \ 558c2ecf20Sopenharmony_ci ((hits) & SWAP_RA_HITS_MASK)) 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci/* Initial readahead hits is 4 to start up with a small window */ 588c2ecf20Sopenharmony_ci#define GET_SWAP_RA_VAL(vma) \ 598c2ecf20Sopenharmony_ci (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++) 628c2ecf20Sopenharmony_ci#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr)) 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_cistatic struct { 658c2ecf20Sopenharmony_ci unsigned long add_total; 668c2ecf20Sopenharmony_ci unsigned long del_total; 678c2ecf20Sopenharmony_ci unsigned long find_success; 688c2ecf20Sopenharmony_ci unsigned long find_total; 698c2ecf20Sopenharmony_ci} swap_cache_info; 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ciunsigned long total_swapcache_pages(void) 728c2ecf20Sopenharmony_ci{ 738c2ecf20Sopenharmony_ci unsigned int i, j, nr; 748c2ecf20Sopenharmony_ci unsigned long ret = 0; 758c2ecf20Sopenharmony_ci struct address_space *spaces; 768c2ecf20Sopenharmony_ci struct swap_info_struct *si; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci for (i = 0; i < MAX_SWAPFILES; i++) { 798c2ecf20Sopenharmony_ci swp_entry_t entry = swp_entry(i, 1); 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci /* Avoid get_swap_device() to warn for bad swap entry */ 828c2ecf20Sopenharmony_ci if (!swp_swap_info(entry)) 838c2ecf20Sopenharmony_ci continue; 848c2ecf20Sopenharmony_ci /* Prevent swapoff to free swapper_spaces */ 858c2ecf20Sopenharmony_ci si = get_swap_device(entry); 868c2ecf20Sopenharmony_ci if (!si) 878c2ecf20Sopenharmony_ci continue; 888c2ecf20Sopenharmony_ci nr = nr_swapper_spaces[i]; 898c2ecf20Sopenharmony_ci spaces = swapper_spaces[i]; 908c2ecf20Sopenharmony_ci for (j = 0; j < nr; j++) 918c2ecf20Sopenharmony_ci ret += spaces[j].nrpages; 928c2ecf20Sopenharmony_ci put_swap_device(si); 938c2ecf20Sopenharmony_ci } 948c2ecf20Sopenharmony_ci return ret; 958c2ecf20Sopenharmony_ci} 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_cistatic atomic_t swapin_readahead_hits = ATOMIC_INIT(4); 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_civoid show_swap_cache_info(void) 1008c2ecf20Sopenharmony_ci{ 1018c2ecf20Sopenharmony_ci printk("%lu pages in swap cache\n", total_swapcache_pages()); 1028c2ecf20Sopenharmony_ci printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", 1038c2ecf20Sopenharmony_ci swap_cache_info.add_total, swap_cache_info.del_total, 1048c2ecf20Sopenharmony_ci swap_cache_info.find_success, swap_cache_info.find_total); 1058c2ecf20Sopenharmony_ci printk("Free swap = %ldkB\n", 1068c2ecf20Sopenharmony_ci get_nr_swap_pages() << (PAGE_SHIFT - 10)); 1078c2ecf20Sopenharmony_ci printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 1088c2ecf20Sopenharmony_ci} 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_civoid *get_shadow_from_swap_cache(swp_entry_t entry) 1118c2ecf20Sopenharmony_ci{ 1128c2ecf20Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 1138c2ecf20Sopenharmony_ci pgoff_t idx = swp_offset(entry); 1148c2ecf20Sopenharmony_ci struct page *page; 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci page = find_get_entry(address_space, idx); 1178c2ecf20Sopenharmony_ci if (xa_is_value(page)) 1188c2ecf20Sopenharmony_ci return page; 1198c2ecf20Sopenharmony_ci if (page) 1208c2ecf20Sopenharmony_ci put_page(page); 1218c2ecf20Sopenharmony_ci return NULL; 1228c2ecf20Sopenharmony_ci} 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci/* 1258c2ecf20Sopenharmony_ci * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, 1268c2ecf20Sopenharmony_ci * but sets SwapCache flag and private instead of mapping and index. 1278c2ecf20Sopenharmony_ci */ 1288c2ecf20Sopenharmony_ciint add_to_swap_cache(struct page *page, swp_entry_t entry, 1298c2ecf20Sopenharmony_ci gfp_t gfp, void **shadowp) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 1328c2ecf20Sopenharmony_ci pgoff_t idx = swp_offset(entry); 1338c2ecf20Sopenharmony_ci XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page)); 1348c2ecf20Sopenharmony_ci unsigned long i, nr = thp_nr_pages(page); 1358c2ecf20Sopenharmony_ci void *old; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 1388c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageSwapCache(page), page); 1398c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageSwapBacked(page), page); 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci page_ref_add(page, nr); 1428c2ecf20Sopenharmony_ci SetPageSwapCache(page); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci do { 1458c2ecf20Sopenharmony_ci unsigned long nr_shadows = 0; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci xas_lock_irq(&xas); 1488c2ecf20Sopenharmony_ci xas_create_range(&xas); 1498c2ecf20Sopenharmony_ci if (xas_error(&xas)) 1508c2ecf20Sopenharmony_ci goto unlock; 1518c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 1528c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); 1538c2ecf20Sopenharmony_ci old = xas_load(&xas); 1548c2ecf20Sopenharmony_ci if (xa_is_value(old)) { 1558c2ecf20Sopenharmony_ci nr_shadows++; 1568c2ecf20Sopenharmony_ci if (shadowp) 1578c2ecf20Sopenharmony_ci *shadowp = old; 1588c2ecf20Sopenharmony_ci } 1598c2ecf20Sopenharmony_ci set_page_private(page + i, entry.val + i); 1608c2ecf20Sopenharmony_ci xas_store(&xas, page); 1618c2ecf20Sopenharmony_ci xas_next(&xas); 1628c2ecf20Sopenharmony_ci } 1638c2ecf20Sopenharmony_ci address_space->nrexceptional -= nr_shadows; 1648c2ecf20Sopenharmony_ci address_space->nrpages += nr; 1658c2ecf20Sopenharmony_ci __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); 1668c2ecf20Sopenharmony_ci ADD_CACHE_INFO(add_total, nr); 1678c2ecf20Sopenharmony_ciunlock: 1688c2ecf20Sopenharmony_ci xas_unlock_irq(&xas); 1698c2ecf20Sopenharmony_ci } while (xas_nomem(&xas, gfp)); 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci if (!xas_error(&xas)) 1728c2ecf20Sopenharmony_ci return 0; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci ClearPageSwapCache(page); 1758c2ecf20Sopenharmony_ci page_ref_sub(page, nr); 1768c2ecf20Sopenharmony_ci return xas_error(&xas); 1778c2ecf20Sopenharmony_ci} 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci/* 1808c2ecf20Sopenharmony_ci * This must be called only on pages that have 1818c2ecf20Sopenharmony_ci * been verified to be in the swap cache. 1828c2ecf20Sopenharmony_ci */ 1838c2ecf20Sopenharmony_civoid __delete_from_swap_cache(struct page *page, 1848c2ecf20Sopenharmony_ci swp_entry_t entry, void *shadow) 1858c2ecf20Sopenharmony_ci{ 1868c2ecf20Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 1878c2ecf20Sopenharmony_ci int i, nr = thp_nr_pages(page); 1888c2ecf20Sopenharmony_ci pgoff_t idx = swp_offset(entry); 1898c2ecf20Sopenharmony_ci XA_STATE(xas, &address_space->i_pages, idx); 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 1928c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageSwapCache(page), page); 1938c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageWriteback(page), page); 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 1968c2ecf20Sopenharmony_ci void *entry = xas_store(&xas, shadow); 1978c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(entry != page, entry); 1988c2ecf20Sopenharmony_ci set_page_private(page + i, 0); 1998c2ecf20Sopenharmony_ci xas_next(&xas); 2008c2ecf20Sopenharmony_ci } 2018c2ecf20Sopenharmony_ci ClearPageSwapCache(page); 2028c2ecf20Sopenharmony_ci if (shadow) 2038c2ecf20Sopenharmony_ci address_space->nrexceptional += nr; 2048c2ecf20Sopenharmony_ci address_space->nrpages -= nr; 2058c2ecf20Sopenharmony_ci __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); 2068c2ecf20Sopenharmony_ci ADD_CACHE_INFO(del_total, nr); 2078c2ecf20Sopenharmony_ci} 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci/** 2108c2ecf20Sopenharmony_ci * add_to_swap - allocate swap space for a page 2118c2ecf20Sopenharmony_ci * @page: page we want to move to swap 2128c2ecf20Sopenharmony_ci * 2138c2ecf20Sopenharmony_ci * Allocate swap space for the page and add the page to the 2148c2ecf20Sopenharmony_ci * swap cache. Caller needs to hold the page lock. 2158c2ecf20Sopenharmony_ci */ 2168c2ecf20Sopenharmony_ciint add_to_swap(struct page *page) 2178c2ecf20Sopenharmony_ci{ 2188c2ecf20Sopenharmony_ci swp_entry_t entry; 2198c2ecf20Sopenharmony_ci int err; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 2228c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageUptodate(page), page); 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci entry = get_swap_page(page); 2258c2ecf20Sopenharmony_ci if (!entry.val) 2268c2ecf20Sopenharmony_ci return 0; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci /* 2298c2ecf20Sopenharmony_ci * XArray node allocations from PF_MEMALLOC contexts could 2308c2ecf20Sopenharmony_ci * completely exhaust the page allocator. __GFP_NOMEMALLOC 2318c2ecf20Sopenharmony_ci * stops emergency reserves from being allocated. 2328c2ecf20Sopenharmony_ci * 2338c2ecf20Sopenharmony_ci * TODO: this could cause a theoretical memory reclaim 2348c2ecf20Sopenharmony_ci * deadlock in the swap out path. 2358c2ecf20Sopenharmony_ci */ 2368c2ecf20Sopenharmony_ci /* 2378c2ecf20Sopenharmony_ci * Add it to the swap cache. 2388c2ecf20Sopenharmony_ci */ 2398c2ecf20Sopenharmony_ci err = add_to_swap_cache(page, entry, 2408c2ecf20Sopenharmony_ci __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); 2418c2ecf20Sopenharmony_ci if (err) 2428c2ecf20Sopenharmony_ci /* 2438c2ecf20Sopenharmony_ci * add_to_swap_cache() doesn't return -EEXIST, so we can safely 2448c2ecf20Sopenharmony_ci * clear SWAP_HAS_CACHE flag. 2458c2ecf20Sopenharmony_ci */ 2468c2ecf20Sopenharmony_ci goto fail; 2478c2ecf20Sopenharmony_ci /* 2488c2ecf20Sopenharmony_ci * Normally the page will be dirtied in unmap because its pte should be 2498c2ecf20Sopenharmony_ci * dirty. A special case is MADV_FREE page. The page's pte could have 2508c2ecf20Sopenharmony_ci * dirty bit cleared but the page's SwapBacked bit is still set because 2518c2ecf20Sopenharmony_ci * clearing the dirty bit and SwapBacked bit has no lock protected. For 2528c2ecf20Sopenharmony_ci * such page, unmap will not set dirty bit for it, so page reclaim will 2538c2ecf20Sopenharmony_ci * not write the page out. This can cause data corruption when the page 2548c2ecf20Sopenharmony_ci * is swap in later. Always setting the dirty bit for the page solves 2558c2ecf20Sopenharmony_ci * the problem. 2568c2ecf20Sopenharmony_ci */ 2578c2ecf20Sopenharmony_ci set_page_dirty(page); 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci return 1; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_cifail: 2628c2ecf20Sopenharmony_ci put_swap_page(page, entry); 2638c2ecf20Sopenharmony_ci return 0; 2648c2ecf20Sopenharmony_ci} 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci/* 2678c2ecf20Sopenharmony_ci * This must be called only on pages that have 2688c2ecf20Sopenharmony_ci * been verified to be in the swap cache and locked. 2698c2ecf20Sopenharmony_ci * It will never put the page into the free list, 2708c2ecf20Sopenharmony_ci * the caller has a reference on the page. 2718c2ecf20Sopenharmony_ci */ 2728c2ecf20Sopenharmony_civoid delete_from_swap_cache(struct page *page) 2738c2ecf20Sopenharmony_ci{ 2748c2ecf20Sopenharmony_ci swp_entry_t entry = { .val = page_private(page) }; 2758c2ecf20Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci xa_lock_irq(&address_space->i_pages); 2788c2ecf20Sopenharmony_ci __delete_from_swap_cache(page, entry, NULL); 2798c2ecf20Sopenharmony_ci xa_unlock_irq(&address_space->i_pages); 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci put_swap_page(page, entry); 2828c2ecf20Sopenharmony_ci page_ref_sub(page, thp_nr_pages(page)); 2838c2ecf20Sopenharmony_ci} 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_civoid clear_shadow_from_swap_cache(int type, unsigned long begin, 2868c2ecf20Sopenharmony_ci unsigned long end) 2878c2ecf20Sopenharmony_ci{ 2888c2ecf20Sopenharmony_ci unsigned long curr = begin; 2898c2ecf20Sopenharmony_ci void *old; 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci for (;;) { 2928c2ecf20Sopenharmony_ci unsigned long nr_shadows = 0; 2938c2ecf20Sopenharmony_ci swp_entry_t entry = swp_entry(type, curr); 2948c2ecf20Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 2958c2ecf20Sopenharmony_ci XA_STATE(xas, &address_space->i_pages, curr); 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci xa_lock_irq(&address_space->i_pages); 2988c2ecf20Sopenharmony_ci xas_for_each(&xas, old, end) { 2998c2ecf20Sopenharmony_ci if (!xa_is_value(old)) 3008c2ecf20Sopenharmony_ci continue; 3018c2ecf20Sopenharmony_ci xas_store(&xas, NULL); 3028c2ecf20Sopenharmony_ci nr_shadows++; 3038c2ecf20Sopenharmony_ci } 3048c2ecf20Sopenharmony_ci address_space->nrexceptional -= nr_shadows; 3058c2ecf20Sopenharmony_ci xa_unlock_irq(&address_space->i_pages); 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci /* search the next swapcache until we meet end */ 3088c2ecf20Sopenharmony_ci curr >>= SWAP_ADDRESS_SPACE_SHIFT; 3098c2ecf20Sopenharmony_ci curr++; 3108c2ecf20Sopenharmony_ci curr <<= SWAP_ADDRESS_SPACE_SHIFT; 3118c2ecf20Sopenharmony_ci if (curr > end) 3128c2ecf20Sopenharmony_ci break; 3138c2ecf20Sopenharmony_ci } 3148c2ecf20Sopenharmony_ci} 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci/* 3178c2ecf20Sopenharmony_ci * If we are the only user, then try to free up the swap cache. 3188c2ecf20Sopenharmony_ci * 3198c2ecf20Sopenharmony_ci * Its ok to check for PageSwapCache without the page lock 3208c2ecf20Sopenharmony_ci * here because we are going to recheck again inside 3218c2ecf20Sopenharmony_ci * try_to_free_swap() _with_ the lock. 3228c2ecf20Sopenharmony_ci * - Marcelo 3238c2ecf20Sopenharmony_ci */ 3248c2ecf20Sopenharmony_cistatic inline void free_swap_cache(struct page *page) 3258c2ecf20Sopenharmony_ci{ 3268c2ecf20Sopenharmony_ci if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) { 3278c2ecf20Sopenharmony_ci try_to_free_swap(page); 3288c2ecf20Sopenharmony_ci unlock_page(page); 3298c2ecf20Sopenharmony_ci } 3308c2ecf20Sopenharmony_ci} 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci/* 3338c2ecf20Sopenharmony_ci * Perform a free_page(), also freeing any swap cache associated with 3348c2ecf20Sopenharmony_ci * this page if it is the last user of the page. 3358c2ecf20Sopenharmony_ci */ 3368c2ecf20Sopenharmony_civoid free_page_and_swap_cache(struct page *page) 3378c2ecf20Sopenharmony_ci{ 3388c2ecf20Sopenharmony_ci free_swap_cache(page); 3398c2ecf20Sopenharmony_ci if (!is_huge_zero_page(page)) 3408c2ecf20Sopenharmony_ci put_page(page); 3418c2ecf20Sopenharmony_ci} 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci/* 3448c2ecf20Sopenharmony_ci * Passed an array of pages, drop them all from swapcache and then release 3458c2ecf20Sopenharmony_ci * them. They are removed from the LRU and freed if this is their last use. 3468c2ecf20Sopenharmony_ci */ 3478c2ecf20Sopenharmony_civoid free_pages_and_swap_cache(struct page **pages, int nr) 3488c2ecf20Sopenharmony_ci{ 3498c2ecf20Sopenharmony_ci struct page **pagep = pages; 3508c2ecf20Sopenharmony_ci int i; 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci lru_add_drain(); 3538c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) 3548c2ecf20Sopenharmony_ci free_swap_cache(pagep[i]); 3558c2ecf20Sopenharmony_ci release_pages(pagep, nr); 3568c2ecf20Sopenharmony_ci} 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_cistatic inline bool swap_use_vma_readahead(void) 3598c2ecf20Sopenharmony_ci{ 3608c2ecf20Sopenharmony_ci return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap); 3618c2ecf20Sopenharmony_ci} 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci/* 3648c2ecf20Sopenharmony_ci * Lookup a swap entry in the swap cache. A found page will be returned 3658c2ecf20Sopenharmony_ci * unlocked and with its refcount incremented - we rely on the kernel 3668c2ecf20Sopenharmony_ci * lock getting page table operations atomic even if we drop the page 3678c2ecf20Sopenharmony_ci * lock before returning. 3688c2ecf20Sopenharmony_ci */ 3698c2ecf20Sopenharmony_cistruct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, 3708c2ecf20Sopenharmony_ci unsigned long addr) 3718c2ecf20Sopenharmony_ci{ 3728c2ecf20Sopenharmony_ci struct page *page; 3738c2ecf20Sopenharmony_ci struct swap_info_struct *si; 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci si = get_swap_device(entry); 3768c2ecf20Sopenharmony_ci if (!si) 3778c2ecf20Sopenharmony_ci return NULL; 3788c2ecf20Sopenharmony_ci page = find_get_page(swap_address_space(entry), swp_offset(entry)); 3798c2ecf20Sopenharmony_ci put_swap_device(si); 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci INC_CACHE_INFO(find_total); 3828c2ecf20Sopenharmony_ci if (page) { 3838c2ecf20Sopenharmony_ci bool vma_ra = swap_use_vma_readahead(); 3848c2ecf20Sopenharmony_ci bool readahead; 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci INC_CACHE_INFO(find_success); 3878c2ecf20Sopenharmony_ci /* 3888c2ecf20Sopenharmony_ci * At the moment, we don't support PG_readahead for anon THP 3898c2ecf20Sopenharmony_ci * so let's bail out rather than confusing the readahead stat. 3908c2ecf20Sopenharmony_ci */ 3918c2ecf20Sopenharmony_ci if (unlikely(PageTransCompound(page))) 3928c2ecf20Sopenharmony_ci return page; 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci readahead = TestClearPageReadahead(page); 3958c2ecf20Sopenharmony_ci if (vma && vma_ra) { 3968c2ecf20Sopenharmony_ci unsigned long ra_val; 3978c2ecf20Sopenharmony_ci int win, hits; 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci ra_val = GET_SWAP_RA_VAL(vma); 4008c2ecf20Sopenharmony_ci win = SWAP_RA_WIN(ra_val); 4018c2ecf20Sopenharmony_ci hits = SWAP_RA_HITS(ra_val); 4028c2ecf20Sopenharmony_ci if (readahead) 4038c2ecf20Sopenharmony_ci hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); 4048c2ecf20Sopenharmony_ci atomic_long_set(&vma->swap_readahead_info, 4058c2ecf20Sopenharmony_ci SWAP_RA_VAL(addr, win, hits)); 4068c2ecf20Sopenharmony_ci } 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci if (readahead) { 4098c2ecf20Sopenharmony_ci count_vm_event(SWAP_RA_HIT); 4108c2ecf20Sopenharmony_ci if (!vma || !vma_ra) 4118c2ecf20Sopenharmony_ci atomic_inc(&swapin_readahead_hits); 4128c2ecf20Sopenharmony_ci } 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci return page; 4168c2ecf20Sopenharmony_ci} 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci/** 4198c2ecf20Sopenharmony_ci * find_get_incore_page - Find and get a page from the page or swap caches. 4208c2ecf20Sopenharmony_ci * @mapping: The address_space to search. 4218c2ecf20Sopenharmony_ci * @index: The page cache index. 4228c2ecf20Sopenharmony_ci * 4238c2ecf20Sopenharmony_ci * This differs from find_get_page() in that it will also look for the 4248c2ecf20Sopenharmony_ci * page in the swap cache. 4258c2ecf20Sopenharmony_ci * 4268c2ecf20Sopenharmony_ci * Return: The found page or %NULL. 4278c2ecf20Sopenharmony_ci */ 4288c2ecf20Sopenharmony_cistruct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) 4298c2ecf20Sopenharmony_ci{ 4308c2ecf20Sopenharmony_ci swp_entry_t swp; 4318c2ecf20Sopenharmony_ci struct swap_info_struct *si; 4328c2ecf20Sopenharmony_ci struct page *page = find_get_entry(mapping, index); 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci if (!page) 4358c2ecf20Sopenharmony_ci return page; 4368c2ecf20Sopenharmony_ci if (!xa_is_value(page)) 4378c2ecf20Sopenharmony_ci return find_subpage(page, index); 4388c2ecf20Sopenharmony_ci if (!shmem_mapping(mapping)) 4398c2ecf20Sopenharmony_ci return NULL; 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci swp = radix_to_swp_entry(page); 4428c2ecf20Sopenharmony_ci /* Prevent swapoff from happening to us */ 4438c2ecf20Sopenharmony_ci si = get_swap_device(swp); 4448c2ecf20Sopenharmony_ci if (!si) 4458c2ecf20Sopenharmony_ci return NULL; 4468c2ecf20Sopenharmony_ci page = find_get_page(swap_address_space(swp), swp_offset(swp)); 4478c2ecf20Sopenharmony_ci put_swap_device(si); 4488c2ecf20Sopenharmony_ci return page; 4498c2ecf20Sopenharmony_ci} 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_cistruct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 4528c2ecf20Sopenharmony_ci struct vm_area_struct *vma, unsigned long addr, 4538c2ecf20Sopenharmony_ci bool *new_page_allocated) 4548c2ecf20Sopenharmony_ci{ 4558c2ecf20Sopenharmony_ci struct swap_info_struct *si; 4568c2ecf20Sopenharmony_ci struct page *page; 4578c2ecf20Sopenharmony_ci void *shadow = NULL; 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci *new_page_allocated = false; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci for (;;) { 4628c2ecf20Sopenharmony_ci int err; 4638c2ecf20Sopenharmony_ci /* 4648c2ecf20Sopenharmony_ci * First check the swap cache. Since this is normally 4658c2ecf20Sopenharmony_ci * called after lookup_swap_cache() failed, re-calling 4668c2ecf20Sopenharmony_ci * that would confuse statistics. 4678c2ecf20Sopenharmony_ci */ 4688c2ecf20Sopenharmony_ci si = get_swap_device(entry); 4698c2ecf20Sopenharmony_ci if (!si) 4708c2ecf20Sopenharmony_ci return NULL; 4718c2ecf20Sopenharmony_ci page = find_get_page(swap_address_space(entry), 4728c2ecf20Sopenharmony_ci swp_offset(entry)); 4738c2ecf20Sopenharmony_ci put_swap_device(si); 4748c2ecf20Sopenharmony_ci if (page) 4758c2ecf20Sopenharmony_ci return page; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci /* 4788c2ecf20Sopenharmony_ci * Just skip read ahead for unused swap slot. 4798c2ecf20Sopenharmony_ci * During swap_off when swap_slot_cache is disabled, 4808c2ecf20Sopenharmony_ci * we have to handle the race between putting 4818c2ecf20Sopenharmony_ci * swap entry in swap cache and marking swap slot 4828c2ecf20Sopenharmony_ci * as SWAP_HAS_CACHE. That's done in later part of code or 4838c2ecf20Sopenharmony_ci * else swap_off will be aborted if we return NULL. 4848c2ecf20Sopenharmony_ci */ 4858c2ecf20Sopenharmony_ci if (!__swp_swapcount(entry) && swap_slot_cache_enabled) 4868c2ecf20Sopenharmony_ci return NULL; 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci /* 4898c2ecf20Sopenharmony_ci * Get a new page to read into from swap. Allocate it now, 4908c2ecf20Sopenharmony_ci * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will 4918c2ecf20Sopenharmony_ci * cause any racers to loop around until we add it to cache. 4928c2ecf20Sopenharmony_ci */ 4938c2ecf20Sopenharmony_ci page = alloc_page_vma(gfp_mask, vma, addr); 4948c2ecf20Sopenharmony_ci if (!page) 4958c2ecf20Sopenharmony_ci return NULL; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci /* 4988c2ecf20Sopenharmony_ci * Swap entry may have been freed since our caller observed it. 4998c2ecf20Sopenharmony_ci */ 5008c2ecf20Sopenharmony_ci err = swapcache_prepare(entry); 5018c2ecf20Sopenharmony_ci if (!err) 5028c2ecf20Sopenharmony_ci break; 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci put_page(page); 5058c2ecf20Sopenharmony_ci if (err != -EEXIST) 5068c2ecf20Sopenharmony_ci return NULL; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_ci /* 5098c2ecf20Sopenharmony_ci * We might race against __delete_from_swap_cache(), and 5108c2ecf20Sopenharmony_ci * stumble across a swap_map entry whose SWAP_HAS_CACHE 5118c2ecf20Sopenharmony_ci * has not yet been cleared. Or race against another 5128c2ecf20Sopenharmony_ci * __read_swap_cache_async(), which has set SWAP_HAS_CACHE 5138c2ecf20Sopenharmony_ci * in swap_map, but not yet added its page to swap cache. 5148c2ecf20Sopenharmony_ci */ 5158c2ecf20Sopenharmony_ci schedule_timeout_uninterruptible(1); 5168c2ecf20Sopenharmony_ci } 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci /* 5198c2ecf20Sopenharmony_ci * The swap entry is ours to swap in. Prepare the new page. 5208c2ecf20Sopenharmony_ci */ 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci __SetPageLocked(page); 5238c2ecf20Sopenharmony_ci __SetPageSwapBacked(page); 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci /* May fail (-ENOMEM) if XArray node allocation failed. */ 5268c2ecf20Sopenharmony_ci if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) { 5278c2ecf20Sopenharmony_ci put_swap_page(page, entry); 5288c2ecf20Sopenharmony_ci goto fail_unlock; 5298c2ecf20Sopenharmony_ci } 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci if (mem_cgroup_charge(page, NULL, gfp_mask)) { 5328c2ecf20Sopenharmony_ci delete_from_swap_cache(page); 5338c2ecf20Sopenharmony_ci goto fail_unlock; 5348c2ecf20Sopenharmony_ci } 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci if (shadow) 5378c2ecf20Sopenharmony_ci workingset_refault(page, shadow); 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci /* Caller will initiate read into locked page */ 5408c2ecf20Sopenharmony_ci SetPageWorkingset(page); 5418c2ecf20Sopenharmony_ci lru_cache_add(page); 5428c2ecf20Sopenharmony_ci *new_page_allocated = true; 5438c2ecf20Sopenharmony_ci return page; 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_cifail_unlock: 5468c2ecf20Sopenharmony_ci unlock_page(page); 5478c2ecf20Sopenharmony_ci put_page(page); 5488c2ecf20Sopenharmony_ci return NULL; 5498c2ecf20Sopenharmony_ci} 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci/* 5528c2ecf20Sopenharmony_ci * Locate a page of swap in physical memory, reserving swap cache space 5538c2ecf20Sopenharmony_ci * and reading the disk if it is not already cached. 5548c2ecf20Sopenharmony_ci * A failure return means that either the page allocation failed or that 5558c2ecf20Sopenharmony_ci * the swap entry is no longer in use. 5568c2ecf20Sopenharmony_ci */ 5578c2ecf20Sopenharmony_cistruct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 5588c2ecf20Sopenharmony_ci struct vm_area_struct *vma, unsigned long addr, bool do_poll) 5598c2ecf20Sopenharmony_ci{ 5608c2ecf20Sopenharmony_ci bool page_was_allocated; 5618c2ecf20Sopenharmony_ci struct page *retpage = __read_swap_cache_async(entry, gfp_mask, 5628c2ecf20Sopenharmony_ci vma, addr, &page_was_allocated); 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci if (page_was_allocated) 5658c2ecf20Sopenharmony_ci swap_readpage(retpage, do_poll); 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci return retpage; 5688c2ecf20Sopenharmony_ci} 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_cistatic unsigned int __swapin_nr_pages(unsigned long prev_offset, 5718c2ecf20Sopenharmony_ci unsigned long offset, 5728c2ecf20Sopenharmony_ci int hits, 5738c2ecf20Sopenharmony_ci int max_pages, 5748c2ecf20Sopenharmony_ci int prev_win) 5758c2ecf20Sopenharmony_ci{ 5768c2ecf20Sopenharmony_ci unsigned int pages, last_ra; 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci /* 5798c2ecf20Sopenharmony_ci * This heuristic has been found to work well on both sequential and 5808c2ecf20Sopenharmony_ci * random loads, swapping to hard disk or to SSD: please don't ask 5818c2ecf20Sopenharmony_ci * what the "+ 2" means, it just happens to work well, that's all. 5828c2ecf20Sopenharmony_ci */ 5838c2ecf20Sopenharmony_ci pages = hits + 2; 5848c2ecf20Sopenharmony_ci if (pages == 2) { 5858c2ecf20Sopenharmony_ci /* 5868c2ecf20Sopenharmony_ci * We can have no readahead hits to judge by: but must not get 5878c2ecf20Sopenharmony_ci * stuck here forever, so check for an adjacent offset instead 5888c2ecf20Sopenharmony_ci * (and don't even bother to check whether swap type is same). 5898c2ecf20Sopenharmony_ci */ 5908c2ecf20Sopenharmony_ci if (offset != prev_offset + 1 && offset != prev_offset - 1) 5918c2ecf20Sopenharmony_ci pages = 1; 5928c2ecf20Sopenharmony_ci } else { 5938c2ecf20Sopenharmony_ci unsigned int roundup = 4; 5948c2ecf20Sopenharmony_ci while (roundup < pages) 5958c2ecf20Sopenharmony_ci roundup <<= 1; 5968c2ecf20Sopenharmony_ci pages = roundup; 5978c2ecf20Sopenharmony_ci } 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci if (pages > max_pages) 6008c2ecf20Sopenharmony_ci pages = max_pages; 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci /* Don't shrink readahead too fast */ 6038c2ecf20Sopenharmony_ci last_ra = prev_win / 2; 6048c2ecf20Sopenharmony_ci if (pages < last_ra) 6058c2ecf20Sopenharmony_ci pages = last_ra; 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci return pages; 6088c2ecf20Sopenharmony_ci} 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_cistatic unsigned long swapin_nr_pages(unsigned long offset) 6118c2ecf20Sopenharmony_ci{ 6128c2ecf20Sopenharmony_ci static unsigned long prev_offset; 6138c2ecf20Sopenharmony_ci unsigned int hits, pages, max_pages; 6148c2ecf20Sopenharmony_ci static atomic_t last_readahead_pages; 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci max_pages = 1 << READ_ONCE(page_cluster); 6178c2ecf20Sopenharmony_ci if (max_pages <= 1) 6188c2ecf20Sopenharmony_ci return 1; 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci hits = atomic_xchg(&swapin_readahead_hits, 0); 6218c2ecf20Sopenharmony_ci pages = __swapin_nr_pages(READ_ONCE(prev_offset), offset, hits, 6228c2ecf20Sopenharmony_ci max_pages, 6238c2ecf20Sopenharmony_ci atomic_read(&last_readahead_pages)); 6248c2ecf20Sopenharmony_ci if (!hits) 6258c2ecf20Sopenharmony_ci WRITE_ONCE(prev_offset, offset); 6268c2ecf20Sopenharmony_ci atomic_set(&last_readahead_pages, pages); 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci return pages; 6298c2ecf20Sopenharmony_ci} 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci/** 6328c2ecf20Sopenharmony_ci * swap_cluster_readahead - swap in pages in hope we need them soon 6338c2ecf20Sopenharmony_ci * @entry: swap entry of this memory 6348c2ecf20Sopenharmony_ci * @gfp_mask: memory allocation flags 6358c2ecf20Sopenharmony_ci * @vmf: fault information 6368c2ecf20Sopenharmony_ci * 6378c2ecf20Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin. 6388c2ecf20Sopenharmony_ci * 6398c2ecf20Sopenharmony_ci * Primitive swap readahead code. We simply read an aligned block of 6408c2ecf20Sopenharmony_ci * (1 << page_cluster) entries in the swap area. This method is chosen 6418c2ecf20Sopenharmony_ci * because it doesn't cost us any seek time. We also make sure to queue 6428c2ecf20Sopenharmony_ci * the 'original' request together with the readahead ones... 6438c2ecf20Sopenharmony_ci * 6448c2ecf20Sopenharmony_ci * This has been extended to use the NUMA policies from the mm triggering 6458c2ecf20Sopenharmony_ci * the readahead. 6468c2ecf20Sopenharmony_ci * 6478c2ecf20Sopenharmony_ci * Caller must hold read mmap_lock if vmf->vma is not NULL. 6488c2ecf20Sopenharmony_ci */ 6498c2ecf20Sopenharmony_cistruct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, 6508c2ecf20Sopenharmony_ci struct vm_fault *vmf) 6518c2ecf20Sopenharmony_ci{ 6528c2ecf20Sopenharmony_ci struct page *page; 6538c2ecf20Sopenharmony_ci unsigned long entry_offset = swp_offset(entry); 6548c2ecf20Sopenharmony_ci unsigned long offset = entry_offset; 6558c2ecf20Sopenharmony_ci unsigned long start_offset, end_offset; 6568c2ecf20Sopenharmony_ci unsigned long mask; 6578c2ecf20Sopenharmony_ci struct swap_info_struct *si = swp_swap_info(entry); 6588c2ecf20Sopenharmony_ci struct blk_plug plug; 6598c2ecf20Sopenharmony_ci bool do_poll = true, page_allocated; 6608c2ecf20Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 6618c2ecf20Sopenharmony_ci unsigned long addr = vmf->address; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci mask = swapin_nr_pages(offset) - 1; 6648c2ecf20Sopenharmony_ci if (!mask) 6658c2ecf20Sopenharmony_ci goto skip; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci /* Test swap type to make sure the dereference is safe */ 6688c2ecf20Sopenharmony_ci if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) { 6698c2ecf20Sopenharmony_ci struct inode *inode = si->swap_file->f_mapping->host; 6708c2ecf20Sopenharmony_ci if (inode_read_congested(inode)) 6718c2ecf20Sopenharmony_ci goto skip; 6728c2ecf20Sopenharmony_ci } 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci do_poll = false; 6758c2ecf20Sopenharmony_ci /* Read a page_cluster sized and aligned cluster around offset. */ 6768c2ecf20Sopenharmony_ci start_offset = offset & ~mask; 6778c2ecf20Sopenharmony_ci end_offset = offset | mask; 6788c2ecf20Sopenharmony_ci if (!start_offset) /* First page is swap header. */ 6798c2ecf20Sopenharmony_ci start_offset++; 6808c2ecf20Sopenharmony_ci if (end_offset >= si->max) 6818c2ecf20Sopenharmony_ci end_offset = si->max - 1; 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci blk_start_plug(&plug); 6848c2ecf20Sopenharmony_ci for (offset = start_offset; offset <= end_offset ; offset++) { 6858c2ecf20Sopenharmony_ci /* Ok, do the async read-ahead now */ 6868c2ecf20Sopenharmony_ci page = __read_swap_cache_async( 6878c2ecf20Sopenharmony_ci swp_entry(swp_type(entry), offset), 6888c2ecf20Sopenharmony_ci gfp_mask, vma, addr, &page_allocated); 6898c2ecf20Sopenharmony_ci if (!page) 6908c2ecf20Sopenharmony_ci continue; 6918c2ecf20Sopenharmony_ci if (page_allocated) { 6928c2ecf20Sopenharmony_ci swap_readpage(page, false); 6938c2ecf20Sopenharmony_ci if (offset != entry_offset) { 6948c2ecf20Sopenharmony_ci SetPageReadahead(page); 6958c2ecf20Sopenharmony_ci count_vm_event(SWAP_RA); 6968c2ecf20Sopenharmony_ci } 6978c2ecf20Sopenharmony_ci } 6988c2ecf20Sopenharmony_ci put_page(page); 6998c2ecf20Sopenharmony_ci } 7008c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci lru_add_drain(); /* Push any new pages onto the LRU now */ 7038c2ecf20Sopenharmony_ciskip: 7048c2ecf20Sopenharmony_ci return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll); 7058c2ecf20Sopenharmony_ci} 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ciint init_swap_address_space(unsigned int type, unsigned long nr_pages) 7088c2ecf20Sopenharmony_ci{ 7098c2ecf20Sopenharmony_ci struct address_space *spaces, *space; 7108c2ecf20Sopenharmony_ci unsigned int i, nr; 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 7138c2ecf20Sopenharmony_ci spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL); 7148c2ecf20Sopenharmony_ci if (!spaces) 7158c2ecf20Sopenharmony_ci return -ENOMEM; 7168c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 7178c2ecf20Sopenharmony_ci space = spaces + i; 7188c2ecf20Sopenharmony_ci xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ); 7198c2ecf20Sopenharmony_ci atomic_set(&space->i_mmap_writable, 0); 7208c2ecf20Sopenharmony_ci space->a_ops = &swap_aops; 7218c2ecf20Sopenharmony_ci /* swap cache doesn't use writeback related tags */ 7228c2ecf20Sopenharmony_ci mapping_set_no_writeback_tags(space); 7238c2ecf20Sopenharmony_ci } 7248c2ecf20Sopenharmony_ci nr_swapper_spaces[type] = nr; 7258c2ecf20Sopenharmony_ci swapper_spaces[type] = spaces; 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci return 0; 7288c2ecf20Sopenharmony_ci} 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_civoid exit_swap_address_space(unsigned int type) 7318c2ecf20Sopenharmony_ci{ 7328c2ecf20Sopenharmony_ci kvfree(swapper_spaces[type]); 7338c2ecf20Sopenharmony_ci nr_swapper_spaces[type] = 0; 7348c2ecf20Sopenharmony_ci swapper_spaces[type] = NULL; 7358c2ecf20Sopenharmony_ci} 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_cistatic inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, 7388c2ecf20Sopenharmony_ci unsigned long faddr, 7398c2ecf20Sopenharmony_ci unsigned long lpfn, 7408c2ecf20Sopenharmony_ci unsigned long rpfn, 7418c2ecf20Sopenharmony_ci unsigned long *start, 7428c2ecf20Sopenharmony_ci unsigned long *end) 7438c2ecf20Sopenharmony_ci{ 7448c2ecf20Sopenharmony_ci *start = max3(lpfn, PFN_DOWN(vma->vm_start), 7458c2ecf20Sopenharmony_ci PFN_DOWN(faddr & PMD_MASK)); 7468c2ecf20Sopenharmony_ci *end = min3(rpfn, PFN_DOWN(vma->vm_end), 7478c2ecf20Sopenharmony_ci PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); 7488c2ecf20Sopenharmony_ci} 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_cistatic void swap_ra_info(struct vm_fault *vmf, 7518c2ecf20Sopenharmony_ci struct vma_swap_readahead *ra_info) 7528c2ecf20Sopenharmony_ci{ 7538c2ecf20Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 7548c2ecf20Sopenharmony_ci unsigned long ra_val; 7558c2ecf20Sopenharmony_ci swp_entry_t entry; 7568c2ecf20Sopenharmony_ci unsigned long faddr, pfn, fpfn; 7578c2ecf20Sopenharmony_ci unsigned long start, end; 7588c2ecf20Sopenharmony_ci pte_t *pte, *orig_pte; 7598c2ecf20Sopenharmony_ci unsigned int max_win, hits, prev_win, win, left; 7608c2ecf20Sopenharmony_ci#ifndef CONFIG_64BIT 7618c2ecf20Sopenharmony_ci pte_t *tpte; 7628c2ecf20Sopenharmony_ci#endif 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), 7658c2ecf20Sopenharmony_ci SWAP_RA_ORDER_CEILING); 7668c2ecf20Sopenharmony_ci if (max_win == 1) { 7678c2ecf20Sopenharmony_ci ra_info->win = 1; 7688c2ecf20Sopenharmony_ci return; 7698c2ecf20Sopenharmony_ci } 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci faddr = vmf->address; 7728c2ecf20Sopenharmony_ci orig_pte = pte = pte_offset_map(vmf->pmd, faddr); 7738c2ecf20Sopenharmony_ci entry = pte_to_swp_entry(*pte); 7748c2ecf20Sopenharmony_ci if ((unlikely(non_swap_entry(entry)))) { 7758c2ecf20Sopenharmony_ci pte_unmap(orig_pte); 7768c2ecf20Sopenharmony_ci return; 7778c2ecf20Sopenharmony_ci } 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci fpfn = PFN_DOWN(faddr); 7808c2ecf20Sopenharmony_ci ra_val = GET_SWAP_RA_VAL(vma); 7818c2ecf20Sopenharmony_ci pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val)); 7828c2ecf20Sopenharmony_ci prev_win = SWAP_RA_WIN(ra_val); 7838c2ecf20Sopenharmony_ci hits = SWAP_RA_HITS(ra_val); 7848c2ecf20Sopenharmony_ci ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits, 7858c2ecf20Sopenharmony_ci max_win, prev_win); 7868c2ecf20Sopenharmony_ci atomic_long_set(&vma->swap_readahead_info, 7878c2ecf20Sopenharmony_ci SWAP_RA_VAL(faddr, win, 0)); 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci if (win == 1) { 7908c2ecf20Sopenharmony_ci pte_unmap(orig_pte); 7918c2ecf20Sopenharmony_ci return; 7928c2ecf20Sopenharmony_ci } 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci /* Copy the PTEs because the page table may be unmapped */ 7958c2ecf20Sopenharmony_ci if (fpfn == pfn + 1) 7968c2ecf20Sopenharmony_ci swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end); 7978c2ecf20Sopenharmony_ci else if (pfn == fpfn + 1) 7988c2ecf20Sopenharmony_ci swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1, 7998c2ecf20Sopenharmony_ci &start, &end); 8008c2ecf20Sopenharmony_ci else { 8018c2ecf20Sopenharmony_ci left = (win - 1) / 2; 8028c2ecf20Sopenharmony_ci swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, 8038c2ecf20Sopenharmony_ci &start, &end); 8048c2ecf20Sopenharmony_ci } 8058c2ecf20Sopenharmony_ci ra_info->nr_pte = end - start; 8068c2ecf20Sopenharmony_ci ra_info->offset = fpfn - start; 8078c2ecf20Sopenharmony_ci pte -= ra_info->offset; 8088c2ecf20Sopenharmony_ci#ifdef CONFIG_64BIT 8098c2ecf20Sopenharmony_ci ra_info->ptes = pte; 8108c2ecf20Sopenharmony_ci#else 8118c2ecf20Sopenharmony_ci tpte = ra_info->ptes; 8128c2ecf20Sopenharmony_ci for (pfn = start; pfn != end; pfn++) 8138c2ecf20Sopenharmony_ci *tpte++ = *pte++; 8148c2ecf20Sopenharmony_ci#endif 8158c2ecf20Sopenharmony_ci pte_unmap(orig_pte); 8168c2ecf20Sopenharmony_ci} 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci/** 8198c2ecf20Sopenharmony_ci * swap_vma_readahead - swap in pages in hope we need them soon 8208c2ecf20Sopenharmony_ci * @fentry: swap entry of this memory 8218c2ecf20Sopenharmony_ci * @gfp_mask: memory allocation flags 8228c2ecf20Sopenharmony_ci * @vmf: fault information 8238c2ecf20Sopenharmony_ci * 8248c2ecf20Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin. 8258c2ecf20Sopenharmony_ci * 8268c2ecf20Sopenharmony_ci * Primitive swap readahead code. We simply read in a few pages whoes 8278c2ecf20Sopenharmony_ci * virtual addresses are around the fault address in the same vma. 8288c2ecf20Sopenharmony_ci * 8298c2ecf20Sopenharmony_ci * Caller must hold read mmap_lock if vmf->vma is not NULL. 8308c2ecf20Sopenharmony_ci * 8318c2ecf20Sopenharmony_ci */ 8328c2ecf20Sopenharmony_cistatic struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, 8338c2ecf20Sopenharmony_ci struct vm_fault *vmf) 8348c2ecf20Sopenharmony_ci{ 8358c2ecf20Sopenharmony_ci struct blk_plug plug; 8368c2ecf20Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 8378c2ecf20Sopenharmony_ci struct page *page; 8388c2ecf20Sopenharmony_ci pte_t *pte, pentry; 8398c2ecf20Sopenharmony_ci swp_entry_t entry; 8408c2ecf20Sopenharmony_ci unsigned int i; 8418c2ecf20Sopenharmony_ci bool page_allocated; 8428c2ecf20Sopenharmony_ci struct vma_swap_readahead ra_info = {0,}; 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci swap_ra_info(vmf, &ra_info); 8458c2ecf20Sopenharmony_ci if (ra_info.win == 1) 8468c2ecf20Sopenharmony_ci goto skip; 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci blk_start_plug(&plug); 8498c2ecf20Sopenharmony_ci for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte; 8508c2ecf20Sopenharmony_ci i++, pte++) { 8518c2ecf20Sopenharmony_ci pentry = *pte; 8528c2ecf20Sopenharmony_ci if (pte_none(pentry)) 8538c2ecf20Sopenharmony_ci continue; 8548c2ecf20Sopenharmony_ci if (pte_present(pentry)) 8558c2ecf20Sopenharmony_ci continue; 8568c2ecf20Sopenharmony_ci entry = pte_to_swp_entry(pentry); 8578c2ecf20Sopenharmony_ci if (unlikely(non_swap_entry(entry))) 8588c2ecf20Sopenharmony_ci continue; 8598c2ecf20Sopenharmony_ci page = __read_swap_cache_async(entry, gfp_mask, vma, 8608c2ecf20Sopenharmony_ci vmf->address, &page_allocated); 8618c2ecf20Sopenharmony_ci if (!page) 8628c2ecf20Sopenharmony_ci continue; 8638c2ecf20Sopenharmony_ci if (page_allocated) { 8648c2ecf20Sopenharmony_ci swap_readpage(page, false); 8658c2ecf20Sopenharmony_ci if (i != ra_info.offset) { 8668c2ecf20Sopenharmony_ci SetPageReadahead(page); 8678c2ecf20Sopenharmony_ci count_vm_event(SWAP_RA); 8688c2ecf20Sopenharmony_ci } 8698c2ecf20Sopenharmony_ci } 8708c2ecf20Sopenharmony_ci put_page(page); 8718c2ecf20Sopenharmony_ci } 8728c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 8738c2ecf20Sopenharmony_ci lru_add_drain(); 8748c2ecf20Sopenharmony_ciskip: 8758c2ecf20Sopenharmony_ci return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, 8768c2ecf20Sopenharmony_ci ra_info.win == 1); 8778c2ecf20Sopenharmony_ci} 8788c2ecf20Sopenharmony_ci 8798c2ecf20Sopenharmony_ci/** 8808c2ecf20Sopenharmony_ci * swapin_readahead - swap in pages in hope we need them soon 8818c2ecf20Sopenharmony_ci * @entry: swap entry of this memory 8828c2ecf20Sopenharmony_ci * @gfp_mask: memory allocation flags 8838c2ecf20Sopenharmony_ci * @vmf: fault information 8848c2ecf20Sopenharmony_ci * 8858c2ecf20Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin. 8868c2ecf20Sopenharmony_ci * 8878c2ecf20Sopenharmony_ci * It's a main entry function for swap readahead. By the configuration, 8888c2ecf20Sopenharmony_ci * it will read ahead blocks by cluster-based(ie, physical disk based) 8898c2ecf20Sopenharmony_ci * or vma-based(ie, virtual address based on faulty address) readahead. 8908c2ecf20Sopenharmony_ci */ 8918c2ecf20Sopenharmony_cistruct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, 8928c2ecf20Sopenharmony_ci struct vm_fault *vmf) 8938c2ecf20Sopenharmony_ci{ 8948c2ecf20Sopenharmony_ci return swap_use_vma_readahead() ? 8958c2ecf20Sopenharmony_ci swap_vma_readahead(entry, gfp_mask, vmf) : 8968c2ecf20Sopenharmony_ci swap_cluster_readahead(entry, gfp_mask, vmf); 8978c2ecf20Sopenharmony_ci} 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_ci#ifdef CONFIG_SYSFS 9008c2ecf20Sopenharmony_cistatic ssize_t vma_ra_enabled_show(struct kobject *kobj, 9018c2ecf20Sopenharmony_ci struct kobj_attribute *attr, char *buf) 9028c2ecf20Sopenharmony_ci{ 9038c2ecf20Sopenharmony_ci return sprintf(buf, "%s\n", enable_vma_readahead ? "true" : "false"); 9048c2ecf20Sopenharmony_ci} 9058c2ecf20Sopenharmony_cistatic ssize_t vma_ra_enabled_store(struct kobject *kobj, 9068c2ecf20Sopenharmony_ci struct kobj_attribute *attr, 9078c2ecf20Sopenharmony_ci const char *buf, size_t count) 9088c2ecf20Sopenharmony_ci{ 9098c2ecf20Sopenharmony_ci if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) 9108c2ecf20Sopenharmony_ci enable_vma_readahead = true; 9118c2ecf20Sopenharmony_ci else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) 9128c2ecf20Sopenharmony_ci enable_vma_readahead = false; 9138c2ecf20Sopenharmony_ci else 9148c2ecf20Sopenharmony_ci return -EINVAL; 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ci return count; 9178c2ecf20Sopenharmony_ci} 9188c2ecf20Sopenharmony_cistatic struct kobj_attribute vma_ra_enabled_attr = 9198c2ecf20Sopenharmony_ci __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, 9208c2ecf20Sopenharmony_ci vma_ra_enabled_store); 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_cistatic struct attribute *swap_attrs[] = { 9238c2ecf20Sopenharmony_ci &vma_ra_enabled_attr.attr, 9248c2ecf20Sopenharmony_ci NULL, 9258c2ecf20Sopenharmony_ci}; 9268c2ecf20Sopenharmony_ci 9278c2ecf20Sopenharmony_cistatic struct attribute_group swap_attr_group = { 9288c2ecf20Sopenharmony_ci .attrs = swap_attrs, 9298c2ecf20Sopenharmony_ci}; 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_cistatic int __init swap_init_sysfs(void) 9328c2ecf20Sopenharmony_ci{ 9338c2ecf20Sopenharmony_ci int err; 9348c2ecf20Sopenharmony_ci struct kobject *swap_kobj; 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ci swap_kobj = kobject_create_and_add("swap", mm_kobj); 9378c2ecf20Sopenharmony_ci if (!swap_kobj) { 9388c2ecf20Sopenharmony_ci pr_err("failed to create swap kobject\n"); 9398c2ecf20Sopenharmony_ci return -ENOMEM; 9408c2ecf20Sopenharmony_ci } 9418c2ecf20Sopenharmony_ci err = sysfs_create_group(swap_kobj, &swap_attr_group); 9428c2ecf20Sopenharmony_ci if (err) { 9438c2ecf20Sopenharmony_ci pr_err("failed to register swap group\n"); 9448c2ecf20Sopenharmony_ci goto delete_obj; 9458c2ecf20Sopenharmony_ci } 9468c2ecf20Sopenharmony_ci return 0; 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_cidelete_obj: 9498c2ecf20Sopenharmony_ci kobject_put(swap_kobj); 9508c2ecf20Sopenharmony_ci return err; 9518c2ecf20Sopenharmony_ci} 9528c2ecf20Sopenharmony_cisubsys_initcall(swap_init_sysfs); 9538c2ecf20Sopenharmony_ci#endif 954