162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/mm/swap_state.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 662306a36Sopenharmony_ci * Swap reorganised 29.12.95, Stephen Tweedie 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Rewritten to use page cache, (C) 1998 Stephen Tweedie 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci#include <linux/mm.h> 1162306a36Sopenharmony_ci#include <linux/gfp.h> 1262306a36Sopenharmony_ci#include <linux/kernel_stat.h> 1362306a36Sopenharmony_ci#include <linux/swap.h> 1462306a36Sopenharmony_ci#include <linux/swapops.h> 1562306a36Sopenharmony_ci#include <linux/init.h> 1662306a36Sopenharmony_ci#include <linux/pagemap.h> 1762306a36Sopenharmony_ci#include <linux/backing-dev.h> 1862306a36Sopenharmony_ci#include <linux/blkdev.h> 1962306a36Sopenharmony_ci#include <linux/migrate.h> 2062306a36Sopenharmony_ci#include <linux/vmalloc.h> 2162306a36Sopenharmony_ci#include <linux/swap_slots.h> 2262306a36Sopenharmony_ci#include <linux/huge_mm.h> 2362306a36Sopenharmony_ci#include <linux/shmem_fs.h> 2462306a36Sopenharmony_ci#include "internal.h" 2562306a36Sopenharmony_ci#include "swap.h" 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci/* 2862306a36Sopenharmony_ci * swapper_space is a fiction, retained to simplify the path through 2962306a36Sopenharmony_ci * vmscan's shrink_page_list. 3062306a36Sopenharmony_ci */ 3162306a36Sopenharmony_cistatic const struct address_space_operations swap_aops = { 3262306a36Sopenharmony_ci .writepage = swap_writepage, 3362306a36Sopenharmony_ci .dirty_folio = noop_dirty_folio, 3462306a36Sopenharmony_ci#ifdef CONFIG_MIGRATION 3562306a36Sopenharmony_ci .migrate_folio = migrate_folio, 3662306a36Sopenharmony_ci#endif 3762306a36Sopenharmony_ci}; 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_cistruct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; 4062306a36Sopenharmony_cistatic unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; 4162306a36Sopenharmony_cistatic bool enable_vma_readahead __read_mostly = true; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) 4462306a36Sopenharmony_ci#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) 4562306a36Sopenharmony_ci#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK 4662306a36Sopenharmony_ci#define SWAP_RA_WIN_MASK (~PAGE_MASK & ~SWAP_RA_HITS_MASK) 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci#define SWAP_RA_HITS(v) ((v) & SWAP_RA_HITS_MASK) 4962306a36Sopenharmony_ci#define SWAP_RA_WIN(v) (((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT) 5062306a36Sopenharmony_ci#define SWAP_RA_ADDR(v) ((v) & PAGE_MASK) 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#define SWAP_RA_VAL(addr, win, hits) \ 5362306a36Sopenharmony_ci (((addr) & PAGE_MASK) | \ 5462306a36Sopenharmony_ci (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) | \ 5562306a36Sopenharmony_ci ((hits) & SWAP_RA_HITS_MASK)) 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci/* Initial readahead hits is 4 to start up with a small window */ 5862306a36Sopenharmony_ci#define GET_SWAP_RA_VAL(vma) \ 5962306a36Sopenharmony_ci (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistatic atomic_t swapin_readahead_hits = ATOMIC_INIT(4); 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_civoid show_swap_cache_info(void) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci printk("%lu pages in swap cache\n", total_swapcache_pages()); 6662306a36Sopenharmony_ci printk("Free swap = %ldkB\n", K(get_nr_swap_pages())); 6762306a36Sopenharmony_ci printk("Total swap = %lukB\n", K(total_swap_pages)); 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_civoid *get_shadow_from_swap_cache(swp_entry_t entry) 7162306a36Sopenharmony_ci{ 7262306a36Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 7362306a36Sopenharmony_ci pgoff_t idx = swp_offset(entry); 7462306a36Sopenharmony_ci struct page *page; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci page = xa_load(&address_space->i_pages, idx); 7762306a36Sopenharmony_ci if (xa_is_value(page)) 7862306a36Sopenharmony_ci return page; 7962306a36Sopenharmony_ci return NULL; 8062306a36Sopenharmony_ci} 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci/* 8362306a36Sopenharmony_ci * add_to_swap_cache resembles filemap_add_folio on swapper_space, 8462306a36Sopenharmony_ci * but sets SwapCache flag and private instead of mapping and index. 8562306a36Sopenharmony_ci */ 8662306a36Sopenharmony_ciint add_to_swap_cache(struct folio *folio, swp_entry_t entry, 8762306a36Sopenharmony_ci gfp_t gfp, void **shadowp) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 9062306a36Sopenharmony_ci pgoff_t idx = swp_offset(entry); 9162306a36Sopenharmony_ci XA_STATE_ORDER(xas, &address_space->i_pages, idx, folio_order(folio)); 9262306a36Sopenharmony_ci unsigned long i, nr = folio_nr_pages(folio); 9362306a36Sopenharmony_ci void *old; 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci xas_set_update(&xas, workingset_update_node); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 9862306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); 9962306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci folio_ref_add(folio, nr); 10262306a36Sopenharmony_ci folio_set_swapcache(folio); 10362306a36Sopenharmony_ci folio->swap = entry; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci do { 10662306a36Sopenharmony_ci xas_lock_irq(&xas); 10762306a36Sopenharmony_ci xas_create_range(&xas); 10862306a36Sopenharmony_ci if (xas_error(&xas)) 10962306a36Sopenharmony_ci goto unlock; 11062306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 11162306a36Sopenharmony_ci VM_BUG_ON_FOLIO(xas.xa_index != idx + i, folio); 11262306a36Sopenharmony_ci old = xas_load(&xas); 11362306a36Sopenharmony_ci if (xa_is_value(old)) { 11462306a36Sopenharmony_ci if (shadowp) 11562306a36Sopenharmony_ci *shadowp = old; 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci xas_store(&xas, folio); 11862306a36Sopenharmony_ci xas_next(&xas); 11962306a36Sopenharmony_ci } 12062306a36Sopenharmony_ci address_space->nrpages += nr; 12162306a36Sopenharmony_ci __node_stat_mod_folio(folio, NR_FILE_PAGES, nr); 12262306a36Sopenharmony_ci __lruvec_stat_mod_folio(folio, NR_SWAPCACHE, nr); 12362306a36Sopenharmony_ciunlock: 12462306a36Sopenharmony_ci xas_unlock_irq(&xas); 12562306a36Sopenharmony_ci } while (xas_nomem(&xas, gfp)); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (!xas_error(&xas)) 12862306a36Sopenharmony_ci return 0; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci folio_clear_swapcache(folio); 13162306a36Sopenharmony_ci folio_ref_sub(folio, nr); 13262306a36Sopenharmony_ci return xas_error(&xas); 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci/* 13662306a36Sopenharmony_ci * This must be called only on folios that have 13762306a36Sopenharmony_ci * been verified to be in the swap cache. 13862306a36Sopenharmony_ci */ 13962306a36Sopenharmony_civoid __delete_from_swap_cache(struct folio *folio, 14062306a36Sopenharmony_ci swp_entry_t entry, void *shadow) 14162306a36Sopenharmony_ci{ 14262306a36Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 14362306a36Sopenharmony_ci int i; 14462306a36Sopenharmony_ci long nr = folio_nr_pages(folio); 14562306a36Sopenharmony_ci pgoff_t idx = swp_offset(entry); 14662306a36Sopenharmony_ci XA_STATE(xas, &address_space->i_pages, idx); 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci xas_set_update(&xas, workingset_update_node); 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 15162306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio); 15262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 15562306a36Sopenharmony_ci void *entry = xas_store(&xas, shadow); 15662306a36Sopenharmony_ci VM_BUG_ON_PAGE(entry != folio, entry); 15762306a36Sopenharmony_ci xas_next(&xas); 15862306a36Sopenharmony_ci } 15962306a36Sopenharmony_ci folio->swap.val = 0; 16062306a36Sopenharmony_ci folio_clear_swapcache(folio); 16162306a36Sopenharmony_ci address_space->nrpages -= nr; 16262306a36Sopenharmony_ci __node_stat_mod_folio(folio, NR_FILE_PAGES, -nr); 16362306a36Sopenharmony_ci __lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr); 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci/** 16762306a36Sopenharmony_ci * add_to_swap - allocate swap space for a folio 16862306a36Sopenharmony_ci * @folio: folio we want to move to swap 16962306a36Sopenharmony_ci * 17062306a36Sopenharmony_ci * Allocate swap space for the folio and add the folio to the 17162306a36Sopenharmony_ci * swap cache. 17262306a36Sopenharmony_ci * 17362306a36Sopenharmony_ci * Context: Caller needs to hold the folio lock. 17462306a36Sopenharmony_ci * Return: Whether the folio was added to the swap cache. 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_cibool add_to_swap(struct folio *folio) 17762306a36Sopenharmony_ci{ 17862306a36Sopenharmony_ci swp_entry_t entry; 17962306a36Sopenharmony_ci int err; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 18262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci entry = folio_alloc_swap(folio); 18562306a36Sopenharmony_ci if (!entry.val) 18662306a36Sopenharmony_ci return false; 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci /* 18962306a36Sopenharmony_ci * XArray node allocations from PF_MEMALLOC contexts could 19062306a36Sopenharmony_ci * completely exhaust the page allocator. __GFP_NOMEMALLOC 19162306a36Sopenharmony_ci * stops emergency reserves from being allocated. 19262306a36Sopenharmony_ci * 19362306a36Sopenharmony_ci * TODO: this could cause a theoretical memory reclaim 19462306a36Sopenharmony_ci * deadlock in the swap out path. 19562306a36Sopenharmony_ci */ 19662306a36Sopenharmony_ci /* 19762306a36Sopenharmony_ci * Add it to the swap cache. 19862306a36Sopenharmony_ci */ 19962306a36Sopenharmony_ci err = add_to_swap_cache(folio, entry, 20062306a36Sopenharmony_ci __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); 20162306a36Sopenharmony_ci if (err) 20262306a36Sopenharmony_ci /* 20362306a36Sopenharmony_ci * add_to_swap_cache() doesn't return -EEXIST, so we can safely 20462306a36Sopenharmony_ci * clear SWAP_HAS_CACHE flag. 20562306a36Sopenharmony_ci */ 20662306a36Sopenharmony_ci goto fail; 20762306a36Sopenharmony_ci /* 20862306a36Sopenharmony_ci * Normally the folio will be dirtied in unmap because its 20962306a36Sopenharmony_ci * pte should be dirty. A special case is MADV_FREE page. The 21062306a36Sopenharmony_ci * page's pte could have dirty bit cleared but the folio's 21162306a36Sopenharmony_ci * SwapBacked flag is still set because clearing the dirty bit 21262306a36Sopenharmony_ci * and SwapBacked flag has no lock protected. For such folio, 21362306a36Sopenharmony_ci * unmap will not set dirty bit for it, so folio reclaim will 21462306a36Sopenharmony_ci * not write the folio out. This can cause data corruption when 21562306a36Sopenharmony_ci * the folio is swapped in later. Always setting the dirty flag 21662306a36Sopenharmony_ci * for the folio solves the problem. 21762306a36Sopenharmony_ci */ 21862306a36Sopenharmony_ci folio_mark_dirty(folio); 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci return true; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_cifail: 22362306a36Sopenharmony_ci put_swap_folio(folio, entry); 22462306a36Sopenharmony_ci return false; 22562306a36Sopenharmony_ci} 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci/* 22862306a36Sopenharmony_ci * This must be called only on folios that have 22962306a36Sopenharmony_ci * been verified to be in the swap cache and locked. 23062306a36Sopenharmony_ci * It will never put the folio into the free list, 23162306a36Sopenharmony_ci * the caller has a reference on the folio. 23262306a36Sopenharmony_ci */ 23362306a36Sopenharmony_civoid delete_from_swap_cache(struct folio *folio) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci swp_entry_t entry = folio->swap; 23662306a36Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci xa_lock_irq(&address_space->i_pages); 23962306a36Sopenharmony_ci __delete_from_swap_cache(folio, entry, NULL); 24062306a36Sopenharmony_ci xa_unlock_irq(&address_space->i_pages); 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci put_swap_folio(folio, entry); 24362306a36Sopenharmony_ci folio_ref_sub(folio, folio_nr_pages(folio)); 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_civoid clear_shadow_from_swap_cache(int type, unsigned long begin, 24762306a36Sopenharmony_ci unsigned long end) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci unsigned long curr = begin; 25062306a36Sopenharmony_ci void *old; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci for (;;) { 25362306a36Sopenharmony_ci swp_entry_t entry = swp_entry(type, curr); 25462306a36Sopenharmony_ci struct address_space *address_space = swap_address_space(entry); 25562306a36Sopenharmony_ci XA_STATE(xas, &address_space->i_pages, curr); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci xas_set_update(&xas, workingset_update_node); 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci xa_lock_irq(&address_space->i_pages); 26062306a36Sopenharmony_ci xas_for_each(&xas, old, end) { 26162306a36Sopenharmony_ci if (!xa_is_value(old)) 26262306a36Sopenharmony_ci continue; 26362306a36Sopenharmony_ci xas_store(&xas, NULL); 26462306a36Sopenharmony_ci } 26562306a36Sopenharmony_ci xa_unlock_irq(&address_space->i_pages); 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci /* search the next swapcache until we meet end */ 26862306a36Sopenharmony_ci curr >>= SWAP_ADDRESS_SPACE_SHIFT; 26962306a36Sopenharmony_ci curr++; 27062306a36Sopenharmony_ci curr <<= SWAP_ADDRESS_SPACE_SHIFT; 27162306a36Sopenharmony_ci if (curr > end) 27262306a36Sopenharmony_ci break; 27362306a36Sopenharmony_ci } 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci/* 27762306a36Sopenharmony_ci * If we are the only user, then try to free up the swap cache. 27862306a36Sopenharmony_ci * 27962306a36Sopenharmony_ci * Its ok to check the swapcache flag without the folio lock 28062306a36Sopenharmony_ci * here because we are going to recheck again inside 28162306a36Sopenharmony_ci * folio_free_swap() _with_ the lock. 28262306a36Sopenharmony_ci * - Marcelo 28362306a36Sopenharmony_ci */ 28462306a36Sopenharmony_civoid free_swap_cache(struct page *page) 28562306a36Sopenharmony_ci{ 28662306a36Sopenharmony_ci struct folio *folio = page_folio(page); 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci if (folio_test_swapcache(folio) && !folio_mapped(folio) && 28962306a36Sopenharmony_ci folio_trylock(folio)) { 29062306a36Sopenharmony_ci folio_free_swap(folio); 29162306a36Sopenharmony_ci folio_unlock(folio); 29262306a36Sopenharmony_ci } 29362306a36Sopenharmony_ci} 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci/* 29662306a36Sopenharmony_ci * Perform a free_page(), also freeing any swap cache associated with 29762306a36Sopenharmony_ci * this page if it is the last user of the page. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_civoid free_page_and_swap_cache(struct page *page) 30062306a36Sopenharmony_ci{ 30162306a36Sopenharmony_ci free_swap_cache(page); 30262306a36Sopenharmony_ci if (!is_huge_zero_page(page)) 30362306a36Sopenharmony_ci put_page(page); 30462306a36Sopenharmony_ci} 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci/* 30762306a36Sopenharmony_ci * Passed an array of pages, drop them all from swapcache and then release 30862306a36Sopenharmony_ci * them. They are removed from the LRU and freed if this is their last use. 30962306a36Sopenharmony_ci */ 31062306a36Sopenharmony_civoid free_pages_and_swap_cache(struct encoded_page **pages, int nr) 31162306a36Sopenharmony_ci{ 31262306a36Sopenharmony_ci lru_add_drain(); 31362306a36Sopenharmony_ci for (int i = 0; i < nr; i++) 31462306a36Sopenharmony_ci free_swap_cache(encoded_page_ptr(pages[i])); 31562306a36Sopenharmony_ci release_pages(pages, nr); 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_cistatic inline bool swap_use_vma_readahead(void) 31962306a36Sopenharmony_ci{ 32062306a36Sopenharmony_ci return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap); 32162306a36Sopenharmony_ci} 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci/* 32462306a36Sopenharmony_ci * Lookup a swap entry in the swap cache. A found folio will be returned 32562306a36Sopenharmony_ci * unlocked and with its refcount incremented - we rely on the kernel 32662306a36Sopenharmony_ci * lock getting page table operations atomic even if we drop the folio 32762306a36Sopenharmony_ci * lock before returning. 32862306a36Sopenharmony_ci * 32962306a36Sopenharmony_ci * Caller must lock the swap device or hold a reference to keep it valid. 33062306a36Sopenharmony_ci */ 33162306a36Sopenharmony_cistruct folio *swap_cache_get_folio(swp_entry_t entry, 33262306a36Sopenharmony_ci struct vm_area_struct *vma, unsigned long addr) 33362306a36Sopenharmony_ci{ 33462306a36Sopenharmony_ci struct folio *folio; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci folio = filemap_get_folio(swap_address_space(entry), swp_offset(entry)); 33762306a36Sopenharmony_ci if (!IS_ERR(folio)) { 33862306a36Sopenharmony_ci bool vma_ra = swap_use_vma_readahead(); 33962306a36Sopenharmony_ci bool readahead; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci /* 34262306a36Sopenharmony_ci * At the moment, we don't support PG_readahead for anon THP 34362306a36Sopenharmony_ci * so let's bail out rather than confusing the readahead stat. 34462306a36Sopenharmony_ci */ 34562306a36Sopenharmony_ci if (unlikely(folio_test_large(folio))) 34662306a36Sopenharmony_ci return folio; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci readahead = folio_test_clear_readahead(folio); 34962306a36Sopenharmony_ci if (vma && vma_ra) { 35062306a36Sopenharmony_ci unsigned long ra_val; 35162306a36Sopenharmony_ci int win, hits; 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci ra_val = GET_SWAP_RA_VAL(vma); 35462306a36Sopenharmony_ci win = SWAP_RA_WIN(ra_val); 35562306a36Sopenharmony_ci hits = SWAP_RA_HITS(ra_val); 35662306a36Sopenharmony_ci if (readahead) 35762306a36Sopenharmony_ci hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); 35862306a36Sopenharmony_ci atomic_long_set(&vma->swap_readahead_info, 35962306a36Sopenharmony_ci SWAP_RA_VAL(addr, win, hits)); 36062306a36Sopenharmony_ci } 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci if (readahead) { 36362306a36Sopenharmony_ci count_vm_event(SWAP_RA_HIT); 36462306a36Sopenharmony_ci if (!vma || !vma_ra) 36562306a36Sopenharmony_ci atomic_inc(&swapin_readahead_hits); 36662306a36Sopenharmony_ci } 36762306a36Sopenharmony_ci } else { 36862306a36Sopenharmony_ci folio = NULL; 36962306a36Sopenharmony_ci } 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci return folio; 37262306a36Sopenharmony_ci} 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci/** 37562306a36Sopenharmony_ci * filemap_get_incore_folio - Find and get a folio from the page or swap caches. 37662306a36Sopenharmony_ci * @mapping: The address_space to search. 37762306a36Sopenharmony_ci * @index: The page cache index. 37862306a36Sopenharmony_ci * 37962306a36Sopenharmony_ci * This differs from filemap_get_folio() in that it will also look for the 38062306a36Sopenharmony_ci * folio in the swap cache. 38162306a36Sopenharmony_ci * 38262306a36Sopenharmony_ci * Return: The found folio or %NULL. 38362306a36Sopenharmony_ci */ 38462306a36Sopenharmony_cistruct folio *filemap_get_incore_folio(struct address_space *mapping, 38562306a36Sopenharmony_ci pgoff_t index) 38662306a36Sopenharmony_ci{ 38762306a36Sopenharmony_ci swp_entry_t swp; 38862306a36Sopenharmony_ci struct swap_info_struct *si; 38962306a36Sopenharmony_ci struct folio *folio = filemap_get_entry(mapping, index); 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci if (!folio) 39262306a36Sopenharmony_ci return ERR_PTR(-ENOENT); 39362306a36Sopenharmony_ci if (!xa_is_value(folio)) 39462306a36Sopenharmony_ci return folio; 39562306a36Sopenharmony_ci if (!shmem_mapping(mapping)) 39662306a36Sopenharmony_ci return ERR_PTR(-ENOENT); 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci swp = radix_to_swp_entry(folio); 39962306a36Sopenharmony_ci /* There might be swapin error entries in shmem mapping. */ 40062306a36Sopenharmony_ci if (non_swap_entry(swp)) 40162306a36Sopenharmony_ci return ERR_PTR(-ENOENT); 40262306a36Sopenharmony_ci /* Prevent swapoff from happening to us */ 40362306a36Sopenharmony_ci si = get_swap_device(swp); 40462306a36Sopenharmony_ci if (!si) 40562306a36Sopenharmony_ci return ERR_PTR(-ENOENT); 40662306a36Sopenharmony_ci index = swp_offset(swp); 40762306a36Sopenharmony_ci folio = filemap_get_folio(swap_address_space(swp), index); 40862306a36Sopenharmony_ci put_swap_device(si); 40962306a36Sopenharmony_ci return folio; 41062306a36Sopenharmony_ci} 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_cistruct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 41362306a36Sopenharmony_ci struct vm_area_struct *vma, unsigned long addr, 41462306a36Sopenharmony_ci bool *new_page_allocated) 41562306a36Sopenharmony_ci{ 41662306a36Sopenharmony_ci struct swap_info_struct *si; 41762306a36Sopenharmony_ci struct folio *folio; 41862306a36Sopenharmony_ci struct page *page; 41962306a36Sopenharmony_ci void *shadow = NULL; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci *new_page_allocated = false; 42262306a36Sopenharmony_ci si = get_swap_device(entry); 42362306a36Sopenharmony_ci if (!si) 42462306a36Sopenharmony_ci return NULL; 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci for (;;) { 42762306a36Sopenharmony_ci int err; 42862306a36Sopenharmony_ci /* 42962306a36Sopenharmony_ci * First check the swap cache. Since this is normally 43062306a36Sopenharmony_ci * called after swap_cache_get_folio() failed, re-calling 43162306a36Sopenharmony_ci * that would confuse statistics. 43262306a36Sopenharmony_ci */ 43362306a36Sopenharmony_ci folio = filemap_get_folio(swap_address_space(entry), 43462306a36Sopenharmony_ci swp_offset(entry)); 43562306a36Sopenharmony_ci if (!IS_ERR(folio)) { 43662306a36Sopenharmony_ci page = folio_file_page(folio, swp_offset(entry)); 43762306a36Sopenharmony_ci goto got_page; 43862306a36Sopenharmony_ci } 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci /* 44162306a36Sopenharmony_ci * Just skip read ahead for unused swap slot. 44262306a36Sopenharmony_ci * During swap_off when swap_slot_cache is disabled, 44362306a36Sopenharmony_ci * we have to handle the race between putting 44462306a36Sopenharmony_ci * swap entry in swap cache and marking swap slot 44562306a36Sopenharmony_ci * as SWAP_HAS_CACHE. That's done in later part of code or 44662306a36Sopenharmony_ci * else swap_off will be aborted if we return NULL. 44762306a36Sopenharmony_ci */ 44862306a36Sopenharmony_ci if (!swap_swapcount(si, entry) && swap_slot_cache_enabled) 44962306a36Sopenharmony_ci goto fail_put_swap; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci /* 45262306a36Sopenharmony_ci * Get a new page to read into from swap. Allocate it now, 45362306a36Sopenharmony_ci * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will 45462306a36Sopenharmony_ci * cause any racers to loop around until we add it to cache. 45562306a36Sopenharmony_ci */ 45662306a36Sopenharmony_ci folio = vma_alloc_folio(gfp_mask, 0, vma, addr, false); 45762306a36Sopenharmony_ci if (!folio) 45862306a36Sopenharmony_ci goto fail_put_swap; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci /* 46162306a36Sopenharmony_ci * Swap entry may have been freed since our caller observed it. 46262306a36Sopenharmony_ci */ 46362306a36Sopenharmony_ci err = swapcache_prepare(entry); 46462306a36Sopenharmony_ci if (!err) 46562306a36Sopenharmony_ci break; 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci folio_put(folio); 46862306a36Sopenharmony_ci if (err != -EEXIST) 46962306a36Sopenharmony_ci goto fail_put_swap; 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci /* 47262306a36Sopenharmony_ci * We might race against __delete_from_swap_cache(), and 47362306a36Sopenharmony_ci * stumble across a swap_map entry whose SWAP_HAS_CACHE 47462306a36Sopenharmony_ci * has not yet been cleared. Or race against another 47562306a36Sopenharmony_ci * __read_swap_cache_async(), which has set SWAP_HAS_CACHE 47662306a36Sopenharmony_ci * in swap_map, but not yet added its page to swap cache. 47762306a36Sopenharmony_ci */ 47862306a36Sopenharmony_ci schedule_timeout_uninterruptible(1); 47962306a36Sopenharmony_ci } 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci /* 48262306a36Sopenharmony_ci * The swap entry is ours to swap in. Prepare the new page. 48362306a36Sopenharmony_ci */ 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci __folio_set_locked(folio); 48662306a36Sopenharmony_ci __folio_set_swapbacked(folio); 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry)) 48962306a36Sopenharmony_ci goto fail_unlock; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci /* May fail (-ENOMEM) if XArray node allocation failed. */ 49262306a36Sopenharmony_ci if (add_to_swap_cache(folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) 49362306a36Sopenharmony_ci goto fail_unlock; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci mem_cgroup_swapin_uncharge_swap(entry); 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci if (shadow) 49862306a36Sopenharmony_ci workingset_refault(folio, shadow); 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci /* Caller will initiate read into locked folio */ 50162306a36Sopenharmony_ci folio_add_lru(folio); 50262306a36Sopenharmony_ci *new_page_allocated = true; 50362306a36Sopenharmony_ci page = &folio->page; 50462306a36Sopenharmony_cigot_page: 50562306a36Sopenharmony_ci put_swap_device(si); 50662306a36Sopenharmony_ci return page; 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_cifail_unlock: 50962306a36Sopenharmony_ci put_swap_folio(folio, entry); 51062306a36Sopenharmony_ci folio_unlock(folio); 51162306a36Sopenharmony_ci folio_put(folio); 51262306a36Sopenharmony_cifail_put_swap: 51362306a36Sopenharmony_ci put_swap_device(si); 51462306a36Sopenharmony_ci return NULL; 51562306a36Sopenharmony_ci} 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci/* 51862306a36Sopenharmony_ci * Locate a page of swap in physical memory, reserving swap cache space 51962306a36Sopenharmony_ci * and reading the disk if it is not already cached. 52062306a36Sopenharmony_ci * A failure return means that either the page allocation failed or that 52162306a36Sopenharmony_ci * the swap entry is no longer in use. 52262306a36Sopenharmony_ci * 52362306a36Sopenharmony_ci * get/put_swap_device() aren't needed to call this function, because 52462306a36Sopenharmony_ci * __read_swap_cache_async() call them and swap_readpage() holds the 52562306a36Sopenharmony_ci * swap cache folio lock. 52662306a36Sopenharmony_ci */ 52762306a36Sopenharmony_cistruct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 52862306a36Sopenharmony_ci struct vm_area_struct *vma, 52962306a36Sopenharmony_ci unsigned long addr, struct swap_iocb **plug) 53062306a36Sopenharmony_ci{ 53162306a36Sopenharmony_ci bool page_was_allocated; 53262306a36Sopenharmony_ci struct page *retpage = __read_swap_cache_async(entry, gfp_mask, 53362306a36Sopenharmony_ci vma, addr, &page_was_allocated); 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci if (page_was_allocated) 53662306a36Sopenharmony_ci swap_readpage(retpage, false, plug); 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci return retpage; 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_cistatic unsigned int __swapin_nr_pages(unsigned long prev_offset, 54262306a36Sopenharmony_ci unsigned long offset, 54362306a36Sopenharmony_ci int hits, 54462306a36Sopenharmony_ci int max_pages, 54562306a36Sopenharmony_ci int prev_win) 54662306a36Sopenharmony_ci{ 54762306a36Sopenharmony_ci unsigned int pages, last_ra; 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci /* 55062306a36Sopenharmony_ci * This heuristic has been found to work well on both sequential and 55162306a36Sopenharmony_ci * random loads, swapping to hard disk or to SSD: please don't ask 55262306a36Sopenharmony_ci * what the "+ 2" means, it just happens to work well, that's all. 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_ci pages = hits + 2; 55562306a36Sopenharmony_ci if (pages == 2) { 55662306a36Sopenharmony_ci /* 55762306a36Sopenharmony_ci * We can have no readahead hits to judge by: but must not get 55862306a36Sopenharmony_ci * stuck here forever, so check for an adjacent offset instead 55962306a36Sopenharmony_ci * (and don't even bother to check whether swap type is same). 56062306a36Sopenharmony_ci */ 56162306a36Sopenharmony_ci if (offset != prev_offset + 1 && offset != prev_offset - 1) 56262306a36Sopenharmony_ci pages = 1; 56362306a36Sopenharmony_ci } else { 56462306a36Sopenharmony_ci unsigned int roundup = 4; 56562306a36Sopenharmony_ci while (roundup < pages) 56662306a36Sopenharmony_ci roundup <<= 1; 56762306a36Sopenharmony_ci pages = roundup; 56862306a36Sopenharmony_ci } 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci if (pages > max_pages) 57162306a36Sopenharmony_ci pages = max_pages; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci /* Don't shrink readahead too fast */ 57462306a36Sopenharmony_ci last_ra = prev_win / 2; 57562306a36Sopenharmony_ci if (pages < last_ra) 57662306a36Sopenharmony_ci pages = last_ra; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci return pages; 57962306a36Sopenharmony_ci} 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_cistatic unsigned long swapin_nr_pages(unsigned long offset) 58262306a36Sopenharmony_ci{ 58362306a36Sopenharmony_ci static unsigned long prev_offset; 58462306a36Sopenharmony_ci unsigned int hits, pages, max_pages; 58562306a36Sopenharmony_ci static atomic_t last_readahead_pages; 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci max_pages = 1 << READ_ONCE(page_cluster); 58862306a36Sopenharmony_ci if (max_pages <= 1) 58962306a36Sopenharmony_ci return 1; 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci hits = atomic_xchg(&swapin_readahead_hits, 0); 59262306a36Sopenharmony_ci pages = __swapin_nr_pages(READ_ONCE(prev_offset), offset, hits, 59362306a36Sopenharmony_ci max_pages, 59462306a36Sopenharmony_ci atomic_read(&last_readahead_pages)); 59562306a36Sopenharmony_ci if (!hits) 59662306a36Sopenharmony_ci WRITE_ONCE(prev_offset, offset); 59762306a36Sopenharmony_ci atomic_set(&last_readahead_pages, pages); 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci return pages; 60062306a36Sopenharmony_ci} 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci/** 60362306a36Sopenharmony_ci * swap_cluster_readahead - swap in pages in hope we need them soon 60462306a36Sopenharmony_ci * @entry: swap entry of this memory 60562306a36Sopenharmony_ci * @gfp_mask: memory allocation flags 60662306a36Sopenharmony_ci * @vmf: fault information 60762306a36Sopenharmony_ci * 60862306a36Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin. 60962306a36Sopenharmony_ci * 61062306a36Sopenharmony_ci * Primitive swap readahead code. We simply read an aligned block of 61162306a36Sopenharmony_ci * (1 << page_cluster) entries in the swap area. This method is chosen 61262306a36Sopenharmony_ci * because it doesn't cost us any seek time. We also make sure to queue 61362306a36Sopenharmony_ci * the 'original' request together with the readahead ones... 61462306a36Sopenharmony_ci * 61562306a36Sopenharmony_ci * This has been extended to use the NUMA policies from the mm triggering 61662306a36Sopenharmony_ci * the readahead. 61762306a36Sopenharmony_ci * 61862306a36Sopenharmony_ci * Caller must hold read mmap_lock if vmf->vma is not NULL. 61962306a36Sopenharmony_ci */ 62062306a36Sopenharmony_cistruct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, 62162306a36Sopenharmony_ci struct vm_fault *vmf) 62262306a36Sopenharmony_ci{ 62362306a36Sopenharmony_ci struct page *page; 62462306a36Sopenharmony_ci unsigned long entry_offset = swp_offset(entry); 62562306a36Sopenharmony_ci unsigned long offset = entry_offset; 62662306a36Sopenharmony_ci unsigned long start_offset, end_offset; 62762306a36Sopenharmony_ci unsigned long mask; 62862306a36Sopenharmony_ci struct swap_info_struct *si = swp_swap_info(entry); 62962306a36Sopenharmony_ci struct blk_plug plug; 63062306a36Sopenharmony_ci struct swap_iocb *splug = NULL; 63162306a36Sopenharmony_ci bool page_allocated; 63262306a36Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 63362306a36Sopenharmony_ci unsigned long addr = vmf->address; 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci mask = swapin_nr_pages(offset) - 1; 63662306a36Sopenharmony_ci if (!mask) 63762306a36Sopenharmony_ci goto skip; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci /* Read a page_cluster sized and aligned cluster around offset. */ 64062306a36Sopenharmony_ci start_offset = offset & ~mask; 64162306a36Sopenharmony_ci end_offset = offset | mask; 64262306a36Sopenharmony_ci if (!start_offset) /* First page is swap header. */ 64362306a36Sopenharmony_ci start_offset++; 64462306a36Sopenharmony_ci if (end_offset >= si->max) 64562306a36Sopenharmony_ci end_offset = si->max - 1; 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci blk_start_plug(&plug); 64862306a36Sopenharmony_ci for (offset = start_offset; offset <= end_offset ; offset++) { 64962306a36Sopenharmony_ci /* Ok, do the async read-ahead now */ 65062306a36Sopenharmony_ci page = __read_swap_cache_async( 65162306a36Sopenharmony_ci swp_entry(swp_type(entry), offset), 65262306a36Sopenharmony_ci gfp_mask, vma, addr, &page_allocated); 65362306a36Sopenharmony_ci if (!page) 65462306a36Sopenharmony_ci continue; 65562306a36Sopenharmony_ci if (page_allocated) { 65662306a36Sopenharmony_ci swap_readpage(page, false, &splug); 65762306a36Sopenharmony_ci if (offset != entry_offset) { 65862306a36Sopenharmony_ci SetPageReadahead(page); 65962306a36Sopenharmony_ci count_vm_event(SWAP_RA); 66062306a36Sopenharmony_ci } 66162306a36Sopenharmony_ci } 66262306a36Sopenharmony_ci put_page(page); 66362306a36Sopenharmony_ci } 66462306a36Sopenharmony_ci blk_finish_plug(&plug); 66562306a36Sopenharmony_ci swap_read_unplug(splug); 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci lru_add_drain(); /* Push any new pages onto the LRU now */ 66862306a36Sopenharmony_ciskip: 66962306a36Sopenharmony_ci /* The page was likely read above, so no need for plugging here */ 67062306a36Sopenharmony_ci return read_swap_cache_async(entry, gfp_mask, vma, addr, NULL); 67162306a36Sopenharmony_ci} 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ciint init_swap_address_space(unsigned int type, unsigned long nr_pages) 67462306a36Sopenharmony_ci{ 67562306a36Sopenharmony_ci struct address_space *spaces, *space; 67662306a36Sopenharmony_ci unsigned int i, nr; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 67962306a36Sopenharmony_ci spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL); 68062306a36Sopenharmony_ci if (!spaces) 68162306a36Sopenharmony_ci return -ENOMEM; 68262306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 68362306a36Sopenharmony_ci space = spaces + i; 68462306a36Sopenharmony_ci xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ); 68562306a36Sopenharmony_ci atomic_set(&space->i_mmap_writable, 0); 68662306a36Sopenharmony_ci space->a_ops = &swap_aops; 68762306a36Sopenharmony_ci /* swap cache doesn't use writeback related tags */ 68862306a36Sopenharmony_ci mapping_set_no_writeback_tags(space); 68962306a36Sopenharmony_ci } 69062306a36Sopenharmony_ci nr_swapper_spaces[type] = nr; 69162306a36Sopenharmony_ci swapper_spaces[type] = spaces; 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci return 0; 69462306a36Sopenharmony_ci} 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_civoid exit_swap_address_space(unsigned int type) 69762306a36Sopenharmony_ci{ 69862306a36Sopenharmony_ci int i; 69962306a36Sopenharmony_ci struct address_space *spaces = swapper_spaces[type]; 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci for (i = 0; i < nr_swapper_spaces[type]; i++) 70262306a36Sopenharmony_ci VM_WARN_ON_ONCE(!mapping_empty(&spaces[i])); 70362306a36Sopenharmony_ci kvfree(spaces); 70462306a36Sopenharmony_ci nr_swapper_spaces[type] = 0; 70562306a36Sopenharmony_ci swapper_spaces[type] = NULL; 70662306a36Sopenharmony_ci} 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci#define SWAP_RA_ORDER_CEILING 5 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_cistruct vma_swap_readahead { 71162306a36Sopenharmony_ci unsigned short win; 71262306a36Sopenharmony_ci unsigned short offset; 71362306a36Sopenharmony_ci unsigned short nr_pte; 71462306a36Sopenharmony_ci}; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_cistatic void swap_ra_info(struct vm_fault *vmf, 71762306a36Sopenharmony_ci struct vma_swap_readahead *ra_info) 71862306a36Sopenharmony_ci{ 71962306a36Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 72062306a36Sopenharmony_ci unsigned long ra_val; 72162306a36Sopenharmony_ci unsigned long faddr, pfn, fpfn, lpfn, rpfn; 72262306a36Sopenharmony_ci unsigned long start, end; 72362306a36Sopenharmony_ci unsigned int max_win, hits, prev_win, win; 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ci max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), 72662306a36Sopenharmony_ci SWAP_RA_ORDER_CEILING); 72762306a36Sopenharmony_ci if (max_win == 1) { 72862306a36Sopenharmony_ci ra_info->win = 1; 72962306a36Sopenharmony_ci return; 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci faddr = vmf->address; 73362306a36Sopenharmony_ci fpfn = PFN_DOWN(faddr); 73462306a36Sopenharmony_ci ra_val = GET_SWAP_RA_VAL(vma); 73562306a36Sopenharmony_ci pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val)); 73662306a36Sopenharmony_ci prev_win = SWAP_RA_WIN(ra_val); 73762306a36Sopenharmony_ci hits = SWAP_RA_HITS(ra_val); 73862306a36Sopenharmony_ci ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits, 73962306a36Sopenharmony_ci max_win, prev_win); 74062306a36Sopenharmony_ci atomic_long_set(&vma->swap_readahead_info, 74162306a36Sopenharmony_ci SWAP_RA_VAL(faddr, win, 0)); 74262306a36Sopenharmony_ci if (win == 1) 74362306a36Sopenharmony_ci return; 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci if (fpfn == pfn + 1) { 74662306a36Sopenharmony_ci lpfn = fpfn; 74762306a36Sopenharmony_ci rpfn = fpfn + win; 74862306a36Sopenharmony_ci } else if (pfn == fpfn + 1) { 74962306a36Sopenharmony_ci lpfn = fpfn - win + 1; 75062306a36Sopenharmony_ci rpfn = fpfn + 1; 75162306a36Sopenharmony_ci } else { 75262306a36Sopenharmony_ci unsigned int left = (win - 1) / 2; 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci lpfn = fpfn - left; 75562306a36Sopenharmony_ci rpfn = fpfn + win - left; 75662306a36Sopenharmony_ci } 75762306a36Sopenharmony_ci start = max3(lpfn, PFN_DOWN(vma->vm_start), 75862306a36Sopenharmony_ci PFN_DOWN(faddr & PMD_MASK)); 75962306a36Sopenharmony_ci end = min3(rpfn, PFN_DOWN(vma->vm_end), 76062306a36Sopenharmony_ci PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci ra_info->nr_pte = end - start; 76362306a36Sopenharmony_ci ra_info->offset = fpfn - start; 76462306a36Sopenharmony_ci} 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci/** 76762306a36Sopenharmony_ci * swap_vma_readahead - swap in pages in hope we need them soon 76862306a36Sopenharmony_ci * @fentry: swap entry of this memory 76962306a36Sopenharmony_ci * @gfp_mask: memory allocation flags 77062306a36Sopenharmony_ci * @vmf: fault information 77162306a36Sopenharmony_ci * 77262306a36Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin. 77362306a36Sopenharmony_ci * 77462306a36Sopenharmony_ci * Primitive swap readahead code. We simply read in a few pages whose 77562306a36Sopenharmony_ci * virtual addresses are around the fault address in the same vma. 77662306a36Sopenharmony_ci * 77762306a36Sopenharmony_ci * Caller must hold read mmap_lock if vmf->vma is not NULL. 77862306a36Sopenharmony_ci * 77962306a36Sopenharmony_ci */ 78062306a36Sopenharmony_cistatic struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, 78162306a36Sopenharmony_ci struct vm_fault *vmf) 78262306a36Sopenharmony_ci{ 78362306a36Sopenharmony_ci struct blk_plug plug; 78462306a36Sopenharmony_ci struct swap_iocb *splug = NULL; 78562306a36Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 78662306a36Sopenharmony_ci struct page *page; 78762306a36Sopenharmony_ci pte_t *pte = NULL, pentry; 78862306a36Sopenharmony_ci unsigned long addr; 78962306a36Sopenharmony_ci swp_entry_t entry; 79062306a36Sopenharmony_ci unsigned int i; 79162306a36Sopenharmony_ci bool page_allocated; 79262306a36Sopenharmony_ci struct vma_swap_readahead ra_info = { 79362306a36Sopenharmony_ci .win = 1, 79462306a36Sopenharmony_ci }; 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci swap_ra_info(vmf, &ra_info); 79762306a36Sopenharmony_ci if (ra_info.win == 1) 79862306a36Sopenharmony_ci goto skip; 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci addr = vmf->address - (ra_info.offset * PAGE_SIZE); 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci blk_start_plug(&plug); 80362306a36Sopenharmony_ci for (i = 0; i < ra_info.nr_pte; i++, addr += PAGE_SIZE) { 80462306a36Sopenharmony_ci if (!pte++) { 80562306a36Sopenharmony_ci pte = pte_offset_map(vmf->pmd, addr); 80662306a36Sopenharmony_ci if (!pte) 80762306a36Sopenharmony_ci break; 80862306a36Sopenharmony_ci } 80962306a36Sopenharmony_ci pentry = ptep_get_lockless(pte); 81062306a36Sopenharmony_ci if (!is_swap_pte(pentry)) 81162306a36Sopenharmony_ci continue; 81262306a36Sopenharmony_ci entry = pte_to_swp_entry(pentry); 81362306a36Sopenharmony_ci if (unlikely(non_swap_entry(entry))) 81462306a36Sopenharmony_ci continue; 81562306a36Sopenharmony_ci pte_unmap(pte); 81662306a36Sopenharmony_ci pte = NULL; 81762306a36Sopenharmony_ci page = __read_swap_cache_async(entry, gfp_mask, vma, 81862306a36Sopenharmony_ci addr, &page_allocated); 81962306a36Sopenharmony_ci if (!page) 82062306a36Sopenharmony_ci continue; 82162306a36Sopenharmony_ci if (page_allocated) { 82262306a36Sopenharmony_ci swap_readpage(page, false, &splug); 82362306a36Sopenharmony_ci if (i != ra_info.offset) { 82462306a36Sopenharmony_ci SetPageReadahead(page); 82562306a36Sopenharmony_ci count_vm_event(SWAP_RA); 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci } 82862306a36Sopenharmony_ci put_page(page); 82962306a36Sopenharmony_ci } 83062306a36Sopenharmony_ci if (pte) 83162306a36Sopenharmony_ci pte_unmap(pte); 83262306a36Sopenharmony_ci blk_finish_plug(&plug); 83362306a36Sopenharmony_ci swap_read_unplug(splug); 83462306a36Sopenharmony_ci lru_add_drain(); 83562306a36Sopenharmony_ciskip: 83662306a36Sopenharmony_ci /* The page was likely read above, so no need for plugging here */ 83762306a36Sopenharmony_ci return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, 83862306a36Sopenharmony_ci NULL); 83962306a36Sopenharmony_ci} 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci/** 84262306a36Sopenharmony_ci * swapin_readahead - swap in pages in hope we need them soon 84362306a36Sopenharmony_ci * @entry: swap entry of this memory 84462306a36Sopenharmony_ci * @gfp_mask: memory allocation flags 84562306a36Sopenharmony_ci * @vmf: fault information 84662306a36Sopenharmony_ci * 84762306a36Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin. 84862306a36Sopenharmony_ci * 84962306a36Sopenharmony_ci * It's a main entry function for swap readahead. By the configuration, 85062306a36Sopenharmony_ci * it will read ahead blocks by cluster-based(ie, physical disk based) 85162306a36Sopenharmony_ci * or vma-based(ie, virtual address based on faulty address) readahead. 85262306a36Sopenharmony_ci */ 85362306a36Sopenharmony_cistruct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, 85462306a36Sopenharmony_ci struct vm_fault *vmf) 85562306a36Sopenharmony_ci{ 85662306a36Sopenharmony_ci return swap_use_vma_readahead() ? 85762306a36Sopenharmony_ci swap_vma_readahead(entry, gfp_mask, vmf) : 85862306a36Sopenharmony_ci swap_cluster_readahead(entry, gfp_mask, vmf); 85962306a36Sopenharmony_ci} 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci#ifdef CONFIG_SYSFS 86262306a36Sopenharmony_cistatic ssize_t vma_ra_enabled_show(struct kobject *kobj, 86362306a36Sopenharmony_ci struct kobj_attribute *attr, char *buf) 86462306a36Sopenharmony_ci{ 86562306a36Sopenharmony_ci return sysfs_emit(buf, "%s\n", 86662306a36Sopenharmony_ci enable_vma_readahead ? "true" : "false"); 86762306a36Sopenharmony_ci} 86862306a36Sopenharmony_cistatic ssize_t vma_ra_enabled_store(struct kobject *kobj, 86962306a36Sopenharmony_ci struct kobj_attribute *attr, 87062306a36Sopenharmony_ci const char *buf, size_t count) 87162306a36Sopenharmony_ci{ 87262306a36Sopenharmony_ci ssize_t ret; 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci ret = kstrtobool(buf, &enable_vma_readahead); 87562306a36Sopenharmony_ci if (ret) 87662306a36Sopenharmony_ci return ret; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci return count; 87962306a36Sopenharmony_ci} 88062306a36Sopenharmony_cistatic struct kobj_attribute vma_ra_enabled_attr = __ATTR_RW(vma_ra_enabled); 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_cistatic struct attribute *swap_attrs[] = { 88362306a36Sopenharmony_ci &vma_ra_enabled_attr.attr, 88462306a36Sopenharmony_ci NULL, 88562306a36Sopenharmony_ci}; 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_cistatic const struct attribute_group swap_attr_group = { 88862306a36Sopenharmony_ci .attrs = swap_attrs, 88962306a36Sopenharmony_ci}; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_cistatic int __init swap_init_sysfs(void) 89262306a36Sopenharmony_ci{ 89362306a36Sopenharmony_ci int err; 89462306a36Sopenharmony_ci struct kobject *swap_kobj; 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci swap_kobj = kobject_create_and_add("swap", mm_kobj); 89762306a36Sopenharmony_ci if (!swap_kobj) { 89862306a36Sopenharmony_ci pr_err("failed to create swap kobject\n"); 89962306a36Sopenharmony_ci return -ENOMEM; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci err = sysfs_create_group(swap_kobj, &swap_attr_group); 90262306a36Sopenharmony_ci if (err) { 90362306a36Sopenharmony_ci pr_err("failed to register swap group\n"); 90462306a36Sopenharmony_ci goto delete_obj; 90562306a36Sopenharmony_ci } 90662306a36Sopenharmony_ci return 0; 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_cidelete_obj: 90962306a36Sopenharmony_ci kobject_put(swap_kobj); 91062306a36Sopenharmony_ci return err; 91162306a36Sopenharmony_ci} 91262306a36Sopenharmony_cisubsys_initcall(swap_init_sysfs); 91362306a36Sopenharmony_ci#endif 914