18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  linux/mm/swap_state.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
68c2ecf20Sopenharmony_ci *  Swap reorganised 29.12.95, Stephen Tweedie
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci#include <linux/mm.h>
118c2ecf20Sopenharmony_ci#include <linux/gfp.h>
128c2ecf20Sopenharmony_ci#include <linux/kernel_stat.h>
138c2ecf20Sopenharmony_ci#include <linux/swap.h>
148c2ecf20Sopenharmony_ci#include <linux/swapops.h>
158c2ecf20Sopenharmony_ci#include <linux/init.h>
168c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
178c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
188c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
198c2ecf20Sopenharmony_ci#include <linux/pagevec.h>
208c2ecf20Sopenharmony_ci#include <linux/migrate.h>
218c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
228c2ecf20Sopenharmony_ci#include <linux/swap_slots.h>
238c2ecf20Sopenharmony_ci#include <linux/huge_mm.h>
248c2ecf20Sopenharmony_ci#include <linux/shmem_fs.h>
258c2ecf20Sopenharmony_ci#include "internal.h"
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci/*
288c2ecf20Sopenharmony_ci * swapper_space is a fiction, retained to simplify the path through
298c2ecf20Sopenharmony_ci * vmscan's shrink_page_list.
308c2ecf20Sopenharmony_ci */
318c2ecf20Sopenharmony_cistatic const struct address_space_operations swap_aops = {
328c2ecf20Sopenharmony_ci	.writepage	= swap_writepage,
338c2ecf20Sopenharmony_ci	.set_page_dirty	= swap_set_page_dirty,
348c2ecf20Sopenharmony_ci#ifdef CONFIG_MIGRATION
358c2ecf20Sopenharmony_ci	.migratepage	= migrate_page,
368c2ecf20Sopenharmony_ci#endif
378c2ecf20Sopenharmony_ci};
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_cistruct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
408c2ecf20Sopenharmony_cistatic unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
418c2ecf20Sopenharmony_cistatic bool enable_vma_readahead __read_mostly = true;
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#define SWAP_RA_WIN_SHIFT	(PAGE_SHIFT / 2)
448c2ecf20Sopenharmony_ci#define SWAP_RA_HITS_MASK	((1UL << SWAP_RA_WIN_SHIFT) - 1)
458c2ecf20Sopenharmony_ci#define SWAP_RA_HITS_MAX	SWAP_RA_HITS_MASK
468c2ecf20Sopenharmony_ci#define SWAP_RA_WIN_MASK	(~PAGE_MASK & ~SWAP_RA_HITS_MASK)
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci#define SWAP_RA_HITS(v)		((v) & SWAP_RA_HITS_MASK)
498c2ecf20Sopenharmony_ci#define SWAP_RA_WIN(v)		(((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT)
508c2ecf20Sopenharmony_ci#define SWAP_RA_ADDR(v)		((v) & PAGE_MASK)
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci#define SWAP_RA_VAL(addr, win, hits)				\
538c2ecf20Sopenharmony_ci	(((addr) & PAGE_MASK) |					\
548c2ecf20Sopenharmony_ci	 (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) |	\
558c2ecf20Sopenharmony_ci	 ((hits) & SWAP_RA_HITS_MASK))
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci/* Initial readahead hits is 4 to start up with a small window */
588c2ecf20Sopenharmony_ci#define GET_SWAP_RA_VAL(vma)					\
598c2ecf20Sopenharmony_ci	(atomic_long_read(&(vma)->swap_readahead_info) ? : 4)
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci#define INC_CACHE_INFO(x)	data_race(swap_cache_info.x++)
628c2ecf20Sopenharmony_ci#define ADD_CACHE_INFO(x, nr)	data_race(swap_cache_info.x += (nr))
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_cistatic struct {
658c2ecf20Sopenharmony_ci	unsigned long add_total;
668c2ecf20Sopenharmony_ci	unsigned long del_total;
678c2ecf20Sopenharmony_ci	unsigned long find_success;
688c2ecf20Sopenharmony_ci	unsigned long find_total;
698c2ecf20Sopenharmony_ci} swap_cache_info;
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ciunsigned long total_swapcache_pages(void)
728c2ecf20Sopenharmony_ci{
738c2ecf20Sopenharmony_ci	unsigned int i, j, nr;
748c2ecf20Sopenharmony_ci	unsigned long ret = 0;
758c2ecf20Sopenharmony_ci	struct address_space *spaces;
768c2ecf20Sopenharmony_ci	struct swap_info_struct *si;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	for (i = 0; i < MAX_SWAPFILES; i++) {
798c2ecf20Sopenharmony_ci		swp_entry_t entry = swp_entry(i, 1);
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci		/* Avoid get_swap_device() to warn for bad swap entry */
828c2ecf20Sopenharmony_ci		if (!swp_swap_info(entry))
838c2ecf20Sopenharmony_ci			continue;
848c2ecf20Sopenharmony_ci		/* Prevent swapoff to free swapper_spaces */
858c2ecf20Sopenharmony_ci		si = get_swap_device(entry);
868c2ecf20Sopenharmony_ci		if (!si)
878c2ecf20Sopenharmony_ci			continue;
888c2ecf20Sopenharmony_ci		nr = nr_swapper_spaces[i];
898c2ecf20Sopenharmony_ci		spaces = swapper_spaces[i];
908c2ecf20Sopenharmony_ci		for (j = 0; j < nr; j++)
918c2ecf20Sopenharmony_ci			ret += spaces[j].nrpages;
928c2ecf20Sopenharmony_ci		put_swap_device(si);
938c2ecf20Sopenharmony_ci	}
948c2ecf20Sopenharmony_ci	return ret;
958c2ecf20Sopenharmony_ci}
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_cistatic atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_civoid show_swap_cache_info(void)
1008c2ecf20Sopenharmony_ci{
1018c2ecf20Sopenharmony_ci	printk("%lu pages in swap cache\n", total_swapcache_pages());
1028c2ecf20Sopenharmony_ci	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
1038c2ecf20Sopenharmony_ci		swap_cache_info.add_total, swap_cache_info.del_total,
1048c2ecf20Sopenharmony_ci		swap_cache_info.find_success, swap_cache_info.find_total);
1058c2ecf20Sopenharmony_ci	printk("Free swap  = %ldkB\n",
1068c2ecf20Sopenharmony_ci		get_nr_swap_pages() << (PAGE_SHIFT - 10));
1078c2ecf20Sopenharmony_ci	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
1088c2ecf20Sopenharmony_ci}
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_civoid *get_shadow_from_swap_cache(swp_entry_t entry)
1118c2ecf20Sopenharmony_ci{
1128c2ecf20Sopenharmony_ci	struct address_space *address_space = swap_address_space(entry);
1138c2ecf20Sopenharmony_ci	pgoff_t idx = swp_offset(entry);
1148c2ecf20Sopenharmony_ci	struct page *page;
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	page = find_get_entry(address_space, idx);
1178c2ecf20Sopenharmony_ci	if (xa_is_value(page))
1188c2ecf20Sopenharmony_ci		return page;
1198c2ecf20Sopenharmony_ci	if (page)
1208c2ecf20Sopenharmony_ci		put_page(page);
1218c2ecf20Sopenharmony_ci	return NULL;
1228c2ecf20Sopenharmony_ci}
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci/*
1258c2ecf20Sopenharmony_ci * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
1268c2ecf20Sopenharmony_ci * but sets SwapCache flag and private instead of mapping and index.
1278c2ecf20Sopenharmony_ci */
1288c2ecf20Sopenharmony_ciint add_to_swap_cache(struct page *page, swp_entry_t entry,
1298c2ecf20Sopenharmony_ci			gfp_t gfp, void **shadowp)
1308c2ecf20Sopenharmony_ci{
1318c2ecf20Sopenharmony_ci	struct address_space *address_space = swap_address_space(entry);
1328c2ecf20Sopenharmony_ci	pgoff_t idx = swp_offset(entry);
1338c2ecf20Sopenharmony_ci	XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
1348c2ecf20Sopenharmony_ci	unsigned long i, nr = thp_nr_pages(page);
1358c2ecf20Sopenharmony_ci	void *old;
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(!PageLocked(page), page);
1388c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(PageSwapCache(page), page);
1398c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	page_ref_add(page, nr);
1428c2ecf20Sopenharmony_ci	SetPageSwapCache(page);
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci	do {
1458c2ecf20Sopenharmony_ci		unsigned long nr_shadows = 0;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci		xas_lock_irq(&xas);
1488c2ecf20Sopenharmony_ci		xas_create_range(&xas);
1498c2ecf20Sopenharmony_ci		if (xas_error(&xas))
1508c2ecf20Sopenharmony_ci			goto unlock;
1518c2ecf20Sopenharmony_ci		for (i = 0; i < nr; i++) {
1528c2ecf20Sopenharmony_ci			VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
1538c2ecf20Sopenharmony_ci			old = xas_load(&xas);
1548c2ecf20Sopenharmony_ci			if (xa_is_value(old)) {
1558c2ecf20Sopenharmony_ci				nr_shadows++;
1568c2ecf20Sopenharmony_ci				if (shadowp)
1578c2ecf20Sopenharmony_ci					*shadowp = old;
1588c2ecf20Sopenharmony_ci			}
1598c2ecf20Sopenharmony_ci			set_page_private(page + i, entry.val + i);
1608c2ecf20Sopenharmony_ci			xas_store(&xas, page);
1618c2ecf20Sopenharmony_ci			xas_next(&xas);
1628c2ecf20Sopenharmony_ci		}
1638c2ecf20Sopenharmony_ci		address_space->nrexceptional -= nr_shadows;
1648c2ecf20Sopenharmony_ci		address_space->nrpages += nr;
1658c2ecf20Sopenharmony_ci		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
1668c2ecf20Sopenharmony_ci		ADD_CACHE_INFO(add_total, nr);
1678c2ecf20Sopenharmony_ciunlock:
1688c2ecf20Sopenharmony_ci		xas_unlock_irq(&xas);
1698c2ecf20Sopenharmony_ci	} while (xas_nomem(&xas, gfp));
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	if (!xas_error(&xas))
1728c2ecf20Sopenharmony_ci		return 0;
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci	ClearPageSwapCache(page);
1758c2ecf20Sopenharmony_ci	page_ref_sub(page, nr);
1768c2ecf20Sopenharmony_ci	return xas_error(&xas);
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci/*
1808c2ecf20Sopenharmony_ci * This must be called only on pages that have
1818c2ecf20Sopenharmony_ci * been verified to be in the swap cache.
1828c2ecf20Sopenharmony_ci */
1838c2ecf20Sopenharmony_civoid __delete_from_swap_cache(struct page *page,
1848c2ecf20Sopenharmony_ci			swp_entry_t entry, void *shadow)
1858c2ecf20Sopenharmony_ci{
1868c2ecf20Sopenharmony_ci	struct address_space *address_space = swap_address_space(entry);
1878c2ecf20Sopenharmony_ci	int i, nr = thp_nr_pages(page);
1888c2ecf20Sopenharmony_ci	pgoff_t idx = swp_offset(entry);
1898c2ecf20Sopenharmony_ci	XA_STATE(xas, &address_space->i_pages, idx);
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(!PageLocked(page), page);
1928c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
1938c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(PageWriteback(page), page);
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++) {
1968c2ecf20Sopenharmony_ci		void *entry = xas_store(&xas, shadow);
1978c2ecf20Sopenharmony_ci		VM_BUG_ON_PAGE(entry != page, entry);
1988c2ecf20Sopenharmony_ci		set_page_private(page + i, 0);
1998c2ecf20Sopenharmony_ci		xas_next(&xas);
2008c2ecf20Sopenharmony_ci	}
2018c2ecf20Sopenharmony_ci	ClearPageSwapCache(page);
2028c2ecf20Sopenharmony_ci	if (shadow)
2038c2ecf20Sopenharmony_ci		address_space->nrexceptional += nr;
2048c2ecf20Sopenharmony_ci	address_space->nrpages -= nr;
2058c2ecf20Sopenharmony_ci	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
2068c2ecf20Sopenharmony_ci	ADD_CACHE_INFO(del_total, nr);
2078c2ecf20Sopenharmony_ci}
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci/**
2108c2ecf20Sopenharmony_ci * add_to_swap - allocate swap space for a page
2118c2ecf20Sopenharmony_ci * @page: page we want to move to swap
2128c2ecf20Sopenharmony_ci *
2138c2ecf20Sopenharmony_ci * Allocate swap space for the page and add the page to the
2148c2ecf20Sopenharmony_ci * swap cache.  Caller needs to hold the page lock.
2158c2ecf20Sopenharmony_ci */
2168c2ecf20Sopenharmony_ciint add_to_swap(struct page *page)
2178c2ecf20Sopenharmony_ci{
2188c2ecf20Sopenharmony_ci	swp_entry_t entry;
2198c2ecf20Sopenharmony_ci	int err;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(!PageLocked(page), page);
2228c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(!PageUptodate(page), page);
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	entry = get_swap_page(page);
2258c2ecf20Sopenharmony_ci	if (!entry.val)
2268c2ecf20Sopenharmony_ci		return 0;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	/*
2298c2ecf20Sopenharmony_ci	 * XArray node allocations from PF_MEMALLOC contexts could
2308c2ecf20Sopenharmony_ci	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
2318c2ecf20Sopenharmony_ci	 * stops emergency reserves from being allocated.
2328c2ecf20Sopenharmony_ci	 *
2338c2ecf20Sopenharmony_ci	 * TODO: this could cause a theoretical memory reclaim
2348c2ecf20Sopenharmony_ci	 * deadlock in the swap out path.
2358c2ecf20Sopenharmony_ci	 */
2368c2ecf20Sopenharmony_ci	/*
2378c2ecf20Sopenharmony_ci	 * Add it to the swap cache.
2388c2ecf20Sopenharmony_ci	 */
2398c2ecf20Sopenharmony_ci	err = add_to_swap_cache(page, entry,
2408c2ecf20Sopenharmony_ci			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL);
2418c2ecf20Sopenharmony_ci	if (err)
2428c2ecf20Sopenharmony_ci		/*
2438c2ecf20Sopenharmony_ci		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
2448c2ecf20Sopenharmony_ci		 * clear SWAP_HAS_CACHE flag.
2458c2ecf20Sopenharmony_ci		 */
2468c2ecf20Sopenharmony_ci		goto fail;
2478c2ecf20Sopenharmony_ci	/*
2488c2ecf20Sopenharmony_ci	 * Normally the page will be dirtied in unmap because its pte should be
2498c2ecf20Sopenharmony_ci	 * dirty. A special case is MADV_FREE page. The page's pte could have
2508c2ecf20Sopenharmony_ci	 * dirty bit cleared but the page's SwapBacked bit is still set because
2518c2ecf20Sopenharmony_ci	 * clearing the dirty bit and SwapBacked bit has no lock protected. For
2528c2ecf20Sopenharmony_ci	 * such page, unmap will not set dirty bit for it, so page reclaim will
2538c2ecf20Sopenharmony_ci	 * not write the page out. This can cause data corruption when the page
2548c2ecf20Sopenharmony_ci	 * is swap in later. Always setting the dirty bit for the page solves
2558c2ecf20Sopenharmony_ci	 * the problem.
2568c2ecf20Sopenharmony_ci	 */
2578c2ecf20Sopenharmony_ci	set_page_dirty(page);
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	return 1;
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_cifail:
2628c2ecf20Sopenharmony_ci	put_swap_page(page, entry);
2638c2ecf20Sopenharmony_ci	return 0;
2648c2ecf20Sopenharmony_ci}
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci/*
2678c2ecf20Sopenharmony_ci * This must be called only on pages that have
2688c2ecf20Sopenharmony_ci * been verified to be in the swap cache and locked.
2698c2ecf20Sopenharmony_ci * It will never put the page into the free list,
2708c2ecf20Sopenharmony_ci * the caller has a reference on the page.
2718c2ecf20Sopenharmony_ci */
2728c2ecf20Sopenharmony_civoid delete_from_swap_cache(struct page *page)
2738c2ecf20Sopenharmony_ci{
2748c2ecf20Sopenharmony_ci	swp_entry_t entry = { .val = page_private(page) };
2758c2ecf20Sopenharmony_ci	struct address_space *address_space = swap_address_space(entry);
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	xa_lock_irq(&address_space->i_pages);
2788c2ecf20Sopenharmony_ci	__delete_from_swap_cache(page, entry, NULL);
2798c2ecf20Sopenharmony_ci	xa_unlock_irq(&address_space->i_pages);
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci	put_swap_page(page, entry);
2828c2ecf20Sopenharmony_ci	page_ref_sub(page, thp_nr_pages(page));
2838c2ecf20Sopenharmony_ci}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_civoid clear_shadow_from_swap_cache(int type, unsigned long begin,
2868c2ecf20Sopenharmony_ci				unsigned long end)
2878c2ecf20Sopenharmony_ci{
2888c2ecf20Sopenharmony_ci	unsigned long curr = begin;
2898c2ecf20Sopenharmony_ci	void *old;
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	for (;;) {
2928c2ecf20Sopenharmony_ci		unsigned long nr_shadows = 0;
2938c2ecf20Sopenharmony_ci		swp_entry_t entry = swp_entry(type, curr);
2948c2ecf20Sopenharmony_ci		struct address_space *address_space = swap_address_space(entry);
2958c2ecf20Sopenharmony_ci		XA_STATE(xas, &address_space->i_pages, curr);
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci		xa_lock_irq(&address_space->i_pages);
2988c2ecf20Sopenharmony_ci		xas_for_each(&xas, old, end) {
2998c2ecf20Sopenharmony_ci			if (!xa_is_value(old))
3008c2ecf20Sopenharmony_ci				continue;
3018c2ecf20Sopenharmony_ci			xas_store(&xas, NULL);
3028c2ecf20Sopenharmony_ci			nr_shadows++;
3038c2ecf20Sopenharmony_ci		}
3048c2ecf20Sopenharmony_ci		address_space->nrexceptional -= nr_shadows;
3058c2ecf20Sopenharmony_ci		xa_unlock_irq(&address_space->i_pages);
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_ci		/* search the next swapcache until we meet end */
3088c2ecf20Sopenharmony_ci		curr >>= SWAP_ADDRESS_SPACE_SHIFT;
3098c2ecf20Sopenharmony_ci		curr++;
3108c2ecf20Sopenharmony_ci		curr <<= SWAP_ADDRESS_SPACE_SHIFT;
3118c2ecf20Sopenharmony_ci		if (curr > end)
3128c2ecf20Sopenharmony_ci			break;
3138c2ecf20Sopenharmony_ci	}
3148c2ecf20Sopenharmony_ci}
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci/*
3178c2ecf20Sopenharmony_ci * If we are the only user, then try to free up the swap cache.
3188c2ecf20Sopenharmony_ci *
3198c2ecf20Sopenharmony_ci * Its ok to check for PageSwapCache without the page lock
3208c2ecf20Sopenharmony_ci * here because we are going to recheck again inside
3218c2ecf20Sopenharmony_ci * try_to_free_swap() _with_ the lock.
3228c2ecf20Sopenharmony_ci * 					- Marcelo
3238c2ecf20Sopenharmony_ci */
3248c2ecf20Sopenharmony_cistatic inline void free_swap_cache(struct page *page)
3258c2ecf20Sopenharmony_ci{
3268c2ecf20Sopenharmony_ci	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
3278c2ecf20Sopenharmony_ci		try_to_free_swap(page);
3288c2ecf20Sopenharmony_ci		unlock_page(page);
3298c2ecf20Sopenharmony_ci	}
3308c2ecf20Sopenharmony_ci}
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci/*
3338c2ecf20Sopenharmony_ci * Perform a free_page(), also freeing any swap cache associated with
3348c2ecf20Sopenharmony_ci * this page if it is the last user of the page.
3358c2ecf20Sopenharmony_ci */
3368c2ecf20Sopenharmony_civoid free_page_and_swap_cache(struct page *page)
3378c2ecf20Sopenharmony_ci{
3388c2ecf20Sopenharmony_ci	free_swap_cache(page);
3398c2ecf20Sopenharmony_ci	if (!is_huge_zero_page(page))
3408c2ecf20Sopenharmony_ci		put_page(page);
3418c2ecf20Sopenharmony_ci}
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci/*
3448c2ecf20Sopenharmony_ci * Passed an array of pages, drop them all from swapcache and then release
3458c2ecf20Sopenharmony_ci * them.  They are removed from the LRU and freed if this is their last use.
3468c2ecf20Sopenharmony_ci */
3478c2ecf20Sopenharmony_civoid free_pages_and_swap_cache(struct page **pages, int nr)
3488c2ecf20Sopenharmony_ci{
3498c2ecf20Sopenharmony_ci	struct page **pagep = pages;
3508c2ecf20Sopenharmony_ci	int i;
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	lru_add_drain();
3538c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++)
3548c2ecf20Sopenharmony_ci		free_swap_cache(pagep[i]);
3558c2ecf20Sopenharmony_ci	release_pages(pagep, nr);
3568c2ecf20Sopenharmony_ci}
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_cistatic inline bool swap_use_vma_readahead(void)
3598c2ecf20Sopenharmony_ci{
3608c2ecf20Sopenharmony_ci	return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap);
3618c2ecf20Sopenharmony_ci}
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci/*
3648c2ecf20Sopenharmony_ci * Lookup a swap entry in the swap cache. A found page will be returned
3658c2ecf20Sopenharmony_ci * unlocked and with its refcount incremented - we rely on the kernel
3668c2ecf20Sopenharmony_ci * lock getting page table operations atomic even if we drop the page
3678c2ecf20Sopenharmony_ci * lock before returning.
3688c2ecf20Sopenharmony_ci */
3698c2ecf20Sopenharmony_cistruct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
3708c2ecf20Sopenharmony_ci			       unsigned long addr)
3718c2ecf20Sopenharmony_ci{
3728c2ecf20Sopenharmony_ci	struct page *page;
3738c2ecf20Sopenharmony_ci	struct swap_info_struct *si;
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	si = get_swap_device(entry);
3768c2ecf20Sopenharmony_ci	if (!si)
3778c2ecf20Sopenharmony_ci		return NULL;
3788c2ecf20Sopenharmony_ci	page = find_get_page(swap_address_space(entry), swp_offset(entry));
3798c2ecf20Sopenharmony_ci	put_swap_device(si);
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci	INC_CACHE_INFO(find_total);
3828c2ecf20Sopenharmony_ci	if (page) {
3838c2ecf20Sopenharmony_ci		bool vma_ra = swap_use_vma_readahead();
3848c2ecf20Sopenharmony_ci		bool readahead;
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci		INC_CACHE_INFO(find_success);
3878c2ecf20Sopenharmony_ci		/*
3888c2ecf20Sopenharmony_ci		 * At the moment, we don't support PG_readahead for anon THP
3898c2ecf20Sopenharmony_ci		 * so let's bail out rather than confusing the readahead stat.
3908c2ecf20Sopenharmony_ci		 */
3918c2ecf20Sopenharmony_ci		if (unlikely(PageTransCompound(page)))
3928c2ecf20Sopenharmony_ci			return page;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci		readahead = TestClearPageReadahead(page);
3958c2ecf20Sopenharmony_ci		if (vma && vma_ra) {
3968c2ecf20Sopenharmony_ci			unsigned long ra_val;
3978c2ecf20Sopenharmony_ci			int win, hits;
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci			ra_val = GET_SWAP_RA_VAL(vma);
4008c2ecf20Sopenharmony_ci			win = SWAP_RA_WIN(ra_val);
4018c2ecf20Sopenharmony_ci			hits = SWAP_RA_HITS(ra_val);
4028c2ecf20Sopenharmony_ci			if (readahead)
4038c2ecf20Sopenharmony_ci				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
4048c2ecf20Sopenharmony_ci			atomic_long_set(&vma->swap_readahead_info,
4058c2ecf20Sopenharmony_ci					SWAP_RA_VAL(addr, win, hits));
4068c2ecf20Sopenharmony_ci		}
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci		if (readahead) {
4098c2ecf20Sopenharmony_ci			count_vm_event(SWAP_RA_HIT);
4108c2ecf20Sopenharmony_ci			if (!vma || !vma_ra)
4118c2ecf20Sopenharmony_ci				atomic_inc(&swapin_readahead_hits);
4128c2ecf20Sopenharmony_ci		}
4138c2ecf20Sopenharmony_ci	}
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci	return page;
4168c2ecf20Sopenharmony_ci}
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci/**
4198c2ecf20Sopenharmony_ci * find_get_incore_page - Find and get a page from the page or swap caches.
4208c2ecf20Sopenharmony_ci * @mapping: The address_space to search.
4218c2ecf20Sopenharmony_ci * @index: The page cache index.
4228c2ecf20Sopenharmony_ci *
4238c2ecf20Sopenharmony_ci * This differs from find_get_page() in that it will also look for the
4248c2ecf20Sopenharmony_ci * page in the swap cache.
4258c2ecf20Sopenharmony_ci *
4268c2ecf20Sopenharmony_ci * Return: The found page or %NULL.
4278c2ecf20Sopenharmony_ci */
4288c2ecf20Sopenharmony_cistruct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
4298c2ecf20Sopenharmony_ci{
4308c2ecf20Sopenharmony_ci	swp_entry_t swp;
4318c2ecf20Sopenharmony_ci	struct swap_info_struct *si;
4328c2ecf20Sopenharmony_ci	struct page *page = find_get_entry(mapping, index);
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	if (!page)
4358c2ecf20Sopenharmony_ci		return page;
4368c2ecf20Sopenharmony_ci	if (!xa_is_value(page))
4378c2ecf20Sopenharmony_ci		return find_subpage(page, index);
4388c2ecf20Sopenharmony_ci	if (!shmem_mapping(mapping))
4398c2ecf20Sopenharmony_ci		return NULL;
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	swp = radix_to_swp_entry(page);
4428c2ecf20Sopenharmony_ci	/* Prevent swapoff from happening to us */
4438c2ecf20Sopenharmony_ci	si = get_swap_device(swp);
4448c2ecf20Sopenharmony_ci	if (!si)
4458c2ecf20Sopenharmony_ci		return NULL;
4468c2ecf20Sopenharmony_ci	page = find_get_page(swap_address_space(swp), swp_offset(swp));
4478c2ecf20Sopenharmony_ci	put_swap_device(si);
4488c2ecf20Sopenharmony_ci	return page;
4498c2ecf20Sopenharmony_ci}
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_cistruct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
4528c2ecf20Sopenharmony_ci			struct vm_area_struct *vma, unsigned long addr,
4538c2ecf20Sopenharmony_ci			bool *new_page_allocated)
4548c2ecf20Sopenharmony_ci{
4558c2ecf20Sopenharmony_ci	struct swap_info_struct *si;
4568c2ecf20Sopenharmony_ci	struct page *page;
4578c2ecf20Sopenharmony_ci	void *shadow = NULL;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	*new_page_allocated = false;
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	for (;;) {
4628c2ecf20Sopenharmony_ci		int err;
4638c2ecf20Sopenharmony_ci		/*
4648c2ecf20Sopenharmony_ci		 * First check the swap cache.  Since this is normally
4658c2ecf20Sopenharmony_ci		 * called after lookup_swap_cache() failed, re-calling
4668c2ecf20Sopenharmony_ci		 * that would confuse statistics.
4678c2ecf20Sopenharmony_ci		 */
4688c2ecf20Sopenharmony_ci		si = get_swap_device(entry);
4698c2ecf20Sopenharmony_ci		if (!si)
4708c2ecf20Sopenharmony_ci			return NULL;
4718c2ecf20Sopenharmony_ci		page = find_get_page(swap_address_space(entry),
4728c2ecf20Sopenharmony_ci				     swp_offset(entry));
4738c2ecf20Sopenharmony_ci		put_swap_device(si);
4748c2ecf20Sopenharmony_ci		if (page)
4758c2ecf20Sopenharmony_ci			return page;
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci		/*
4788c2ecf20Sopenharmony_ci		 * Just skip read ahead for unused swap slot.
4798c2ecf20Sopenharmony_ci		 * During swap_off when swap_slot_cache is disabled,
4808c2ecf20Sopenharmony_ci		 * we have to handle the race between putting
4818c2ecf20Sopenharmony_ci		 * swap entry in swap cache and marking swap slot
4828c2ecf20Sopenharmony_ci		 * as SWAP_HAS_CACHE.  That's done in later part of code or
4838c2ecf20Sopenharmony_ci		 * else swap_off will be aborted if we return NULL.
4848c2ecf20Sopenharmony_ci		 */
4858c2ecf20Sopenharmony_ci		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
4868c2ecf20Sopenharmony_ci			return NULL;
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci		/*
4898c2ecf20Sopenharmony_ci		 * Get a new page to read into from swap.  Allocate it now,
4908c2ecf20Sopenharmony_ci		 * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will
4918c2ecf20Sopenharmony_ci		 * cause any racers to loop around until we add it to cache.
4928c2ecf20Sopenharmony_ci		 */
4938c2ecf20Sopenharmony_ci		page = alloc_page_vma(gfp_mask, vma, addr);
4948c2ecf20Sopenharmony_ci		if (!page)
4958c2ecf20Sopenharmony_ci			return NULL;
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci		/*
4988c2ecf20Sopenharmony_ci		 * Swap entry may have been freed since our caller observed it.
4998c2ecf20Sopenharmony_ci		 */
5008c2ecf20Sopenharmony_ci		err = swapcache_prepare(entry);
5018c2ecf20Sopenharmony_ci		if (!err)
5028c2ecf20Sopenharmony_ci			break;
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci		put_page(page);
5058c2ecf20Sopenharmony_ci		if (err != -EEXIST)
5068c2ecf20Sopenharmony_ci			return NULL;
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci		/*
5098c2ecf20Sopenharmony_ci		 * We might race against __delete_from_swap_cache(), and
5108c2ecf20Sopenharmony_ci		 * stumble across a swap_map entry whose SWAP_HAS_CACHE
5118c2ecf20Sopenharmony_ci		 * has not yet been cleared.  Or race against another
5128c2ecf20Sopenharmony_ci		 * __read_swap_cache_async(), which has set SWAP_HAS_CACHE
5138c2ecf20Sopenharmony_ci		 * in swap_map, but not yet added its page to swap cache.
5148c2ecf20Sopenharmony_ci		 */
5158c2ecf20Sopenharmony_ci		schedule_timeout_uninterruptible(1);
5168c2ecf20Sopenharmony_ci	}
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci	/*
5198c2ecf20Sopenharmony_ci	 * The swap entry is ours to swap in. Prepare the new page.
5208c2ecf20Sopenharmony_ci	 */
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci	__SetPageLocked(page);
5238c2ecf20Sopenharmony_ci	__SetPageSwapBacked(page);
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci	/* May fail (-ENOMEM) if XArray node allocation failed. */
5268c2ecf20Sopenharmony_ci	if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) {
5278c2ecf20Sopenharmony_ci		put_swap_page(page, entry);
5288c2ecf20Sopenharmony_ci		goto fail_unlock;
5298c2ecf20Sopenharmony_ci	}
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	if (mem_cgroup_charge(page, NULL, gfp_mask)) {
5328c2ecf20Sopenharmony_ci		delete_from_swap_cache(page);
5338c2ecf20Sopenharmony_ci		goto fail_unlock;
5348c2ecf20Sopenharmony_ci	}
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	if (shadow)
5378c2ecf20Sopenharmony_ci		workingset_refault(page, shadow);
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	/* Caller will initiate read into locked page */
5408c2ecf20Sopenharmony_ci	SetPageWorkingset(page);
5418c2ecf20Sopenharmony_ci	lru_cache_add(page);
5428c2ecf20Sopenharmony_ci	*new_page_allocated = true;
5438c2ecf20Sopenharmony_ci	return page;
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_cifail_unlock:
5468c2ecf20Sopenharmony_ci	unlock_page(page);
5478c2ecf20Sopenharmony_ci	put_page(page);
5488c2ecf20Sopenharmony_ci	return NULL;
5498c2ecf20Sopenharmony_ci}
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_ci/*
5528c2ecf20Sopenharmony_ci * Locate a page of swap in physical memory, reserving swap cache space
5538c2ecf20Sopenharmony_ci * and reading the disk if it is not already cached.
5548c2ecf20Sopenharmony_ci * A failure return means that either the page allocation failed or that
5558c2ecf20Sopenharmony_ci * the swap entry is no longer in use.
5568c2ecf20Sopenharmony_ci */
5578c2ecf20Sopenharmony_cistruct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
5588c2ecf20Sopenharmony_ci		struct vm_area_struct *vma, unsigned long addr, bool do_poll)
5598c2ecf20Sopenharmony_ci{
5608c2ecf20Sopenharmony_ci	bool page_was_allocated;
5618c2ecf20Sopenharmony_ci	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
5628c2ecf20Sopenharmony_ci			vma, addr, &page_was_allocated);
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	if (page_was_allocated)
5658c2ecf20Sopenharmony_ci		swap_readpage(retpage, do_poll);
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci	return retpage;
5688c2ecf20Sopenharmony_ci}
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_cistatic unsigned int __swapin_nr_pages(unsigned long prev_offset,
5718c2ecf20Sopenharmony_ci				      unsigned long offset,
5728c2ecf20Sopenharmony_ci				      int hits,
5738c2ecf20Sopenharmony_ci				      int max_pages,
5748c2ecf20Sopenharmony_ci				      int prev_win)
5758c2ecf20Sopenharmony_ci{
5768c2ecf20Sopenharmony_ci	unsigned int pages, last_ra;
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	/*
5798c2ecf20Sopenharmony_ci	 * This heuristic has been found to work well on both sequential and
5808c2ecf20Sopenharmony_ci	 * random loads, swapping to hard disk or to SSD: please don't ask
5818c2ecf20Sopenharmony_ci	 * what the "+ 2" means, it just happens to work well, that's all.
5828c2ecf20Sopenharmony_ci	 */
5838c2ecf20Sopenharmony_ci	pages = hits + 2;
5848c2ecf20Sopenharmony_ci	if (pages == 2) {
5858c2ecf20Sopenharmony_ci		/*
5868c2ecf20Sopenharmony_ci		 * We can have no readahead hits to judge by: but must not get
5878c2ecf20Sopenharmony_ci		 * stuck here forever, so check for an adjacent offset instead
5888c2ecf20Sopenharmony_ci		 * (and don't even bother to check whether swap type is same).
5898c2ecf20Sopenharmony_ci		 */
5908c2ecf20Sopenharmony_ci		if (offset != prev_offset + 1 && offset != prev_offset - 1)
5918c2ecf20Sopenharmony_ci			pages = 1;
5928c2ecf20Sopenharmony_ci	} else {
5938c2ecf20Sopenharmony_ci		unsigned int roundup = 4;
5948c2ecf20Sopenharmony_ci		while (roundup < pages)
5958c2ecf20Sopenharmony_ci			roundup <<= 1;
5968c2ecf20Sopenharmony_ci		pages = roundup;
5978c2ecf20Sopenharmony_ci	}
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci	if (pages > max_pages)
6008c2ecf20Sopenharmony_ci		pages = max_pages;
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci	/* Don't shrink readahead too fast */
6038c2ecf20Sopenharmony_ci	last_ra = prev_win / 2;
6048c2ecf20Sopenharmony_ci	if (pages < last_ra)
6058c2ecf20Sopenharmony_ci		pages = last_ra;
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	return pages;
6088c2ecf20Sopenharmony_ci}
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_cistatic unsigned long swapin_nr_pages(unsigned long offset)
6118c2ecf20Sopenharmony_ci{
6128c2ecf20Sopenharmony_ci	static unsigned long prev_offset;
6138c2ecf20Sopenharmony_ci	unsigned int hits, pages, max_pages;
6148c2ecf20Sopenharmony_ci	static atomic_t last_readahead_pages;
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci	max_pages = 1 << READ_ONCE(page_cluster);
6178c2ecf20Sopenharmony_ci	if (max_pages <= 1)
6188c2ecf20Sopenharmony_ci		return 1;
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	hits = atomic_xchg(&swapin_readahead_hits, 0);
6218c2ecf20Sopenharmony_ci	pages = __swapin_nr_pages(READ_ONCE(prev_offset), offset, hits,
6228c2ecf20Sopenharmony_ci				  max_pages,
6238c2ecf20Sopenharmony_ci				  atomic_read(&last_readahead_pages));
6248c2ecf20Sopenharmony_ci	if (!hits)
6258c2ecf20Sopenharmony_ci		WRITE_ONCE(prev_offset, offset);
6268c2ecf20Sopenharmony_ci	atomic_set(&last_readahead_pages, pages);
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci	return pages;
6298c2ecf20Sopenharmony_ci}
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci/**
6328c2ecf20Sopenharmony_ci * swap_cluster_readahead - swap in pages in hope we need them soon
6338c2ecf20Sopenharmony_ci * @entry: swap entry of this memory
6348c2ecf20Sopenharmony_ci * @gfp_mask: memory allocation flags
6358c2ecf20Sopenharmony_ci * @vmf: fault information
6368c2ecf20Sopenharmony_ci *
6378c2ecf20Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin.
6388c2ecf20Sopenharmony_ci *
6398c2ecf20Sopenharmony_ci * Primitive swap readahead code. We simply read an aligned block of
6408c2ecf20Sopenharmony_ci * (1 << page_cluster) entries in the swap area. This method is chosen
6418c2ecf20Sopenharmony_ci * because it doesn't cost us any seek time.  We also make sure to queue
6428c2ecf20Sopenharmony_ci * the 'original' request together with the readahead ones...
6438c2ecf20Sopenharmony_ci *
6448c2ecf20Sopenharmony_ci * This has been extended to use the NUMA policies from the mm triggering
6458c2ecf20Sopenharmony_ci * the readahead.
6468c2ecf20Sopenharmony_ci *
6478c2ecf20Sopenharmony_ci * Caller must hold read mmap_lock if vmf->vma is not NULL.
6488c2ecf20Sopenharmony_ci */
6498c2ecf20Sopenharmony_cistruct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
6508c2ecf20Sopenharmony_ci				struct vm_fault *vmf)
6518c2ecf20Sopenharmony_ci{
6528c2ecf20Sopenharmony_ci	struct page *page;
6538c2ecf20Sopenharmony_ci	unsigned long entry_offset = swp_offset(entry);
6548c2ecf20Sopenharmony_ci	unsigned long offset = entry_offset;
6558c2ecf20Sopenharmony_ci	unsigned long start_offset, end_offset;
6568c2ecf20Sopenharmony_ci	unsigned long mask;
6578c2ecf20Sopenharmony_ci	struct swap_info_struct *si = swp_swap_info(entry);
6588c2ecf20Sopenharmony_ci	struct blk_plug plug;
6598c2ecf20Sopenharmony_ci	bool do_poll = true, page_allocated;
6608c2ecf20Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
6618c2ecf20Sopenharmony_ci	unsigned long addr = vmf->address;
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci	mask = swapin_nr_pages(offset) - 1;
6648c2ecf20Sopenharmony_ci	if (!mask)
6658c2ecf20Sopenharmony_ci		goto skip;
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci	/* Test swap type to make sure the dereference is safe */
6688c2ecf20Sopenharmony_ci	if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
6698c2ecf20Sopenharmony_ci		struct inode *inode = si->swap_file->f_mapping->host;
6708c2ecf20Sopenharmony_ci		if (inode_read_congested(inode))
6718c2ecf20Sopenharmony_ci			goto skip;
6728c2ecf20Sopenharmony_ci	}
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	do_poll = false;
6758c2ecf20Sopenharmony_ci	/* Read a page_cluster sized and aligned cluster around offset. */
6768c2ecf20Sopenharmony_ci	start_offset = offset & ~mask;
6778c2ecf20Sopenharmony_ci	end_offset = offset | mask;
6788c2ecf20Sopenharmony_ci	if (!start_offset)	/* First page is swap header. */
6798c2ecf20Sopenharmony_ci		start_offset++;
6808c2ecf20Sopenharmony_ci	if (end_offset >= si->max)
6818c2ecf20Sopenharmony_ci		end_offset = si->max - 1;
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
6848c2ecf20Sopenharmony_ci	for (offset = start_offset; offset <= end_offset ; offset++) {
6858c2ecf20Sopenharmony_ci		/* Ok, do the async read-ahead now */
6868c2ecf20Sopenharmony_ci		page = __read_swap_cache_async(
6878c2ecf20Sopenharmony_ci			swp_entry(swp_type(entry), offset),
6888c2ecf20Sopenharmony_ci			gfp_mask, vma, addr, &page_allocated);
6898c2ecf20Sopenharmony_ci		if (!page)
6908c2ecf20Sopenharmony_ci			continue;
6918c2ecf20Sopenharmony_ci		if (page_allocated) {
6928c2ecf20Sopenharmony_ci			swap_readpage(page, false);
6938c2ecf20Sopenharmony_ci			if (offset != entry_offset) {
6948c2ecf20Sopenharmony_ci				SetPageReadahead(page);
6958c2ecf20Sopenharmony_ci				count_vm_event(SWAP_RA);
6968c2ecf20Sopenharmony_ci			}
6978c2ecf20Sopenharmony_ci		}
6988c2ecf20Sopenharmony_ci		put_page(page);
6998c2ecf20Sopenharmony_ci	}
7008c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_ci	lru_add_drain();	/* Push any new pages onto the LRU now */
7038c2ecf20Sopenharmony_ciskip:
7048c2ecf20Sopenharmony_ci	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
7058c2ecf20Sopenharmony_ci}
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ciint init_swap_address_space(unsigned int type, unsigned long nr_pages)
7088c2ecf20Sopenharmony_ci{
7098c2ecf20Sopenharmony_ci	struct address_space *spaces, *space;
7108c2ecf20Sopenharmony_ci	unsigned int i, nr;
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
7138c2ecf20Sopenharmony_ci	spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
7148c2ecf20Sopenharmony_ci	if (!spaces)
7158c2ecf20Sopenharmony_ci		return -ENOMEM;
7168c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++) {
7178c2ecf20Sopenharmony_ci		space = spaces + i;
7188c2ecf20Sopenharmony_ci		xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
7198c2ecf20Sopenharmony_ci		atomic_set(&space->i_mmap_writable, 0);
7208c2ecf20Sopenharmony_ci		space->a_ops = &swap_aops;
7218c2ecf20Sopenharmony_ci		/* swap cache doesn't use writeback related tags */
7228c2ecf20Sopenharmony_ci		mapping_set_no_writeback_tags(space);
7238c2ecf20Sopenharmony_ci	}
7248c2ecf20Sopenharmony_ci	nr_swapper_spaces[type] = nr;
7258c2ecf20Sopenharmony_ci	swapper_spaces[type] = spaces;
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	return 0;
7288c2ecf20Sopenharmony_ci}
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_civoid exit_swap_address_space(unsigned int type)
7318c2ecf20Sopenharmony_ci{
7328c2ecf20Sopenharmony_ci	kvfree(swapper_spaces[type]);
7338c2ecf20Sopenharmony_ci	nr_swapper_spaces[type] = 0;
7348c2ecf20Sopenharmony_ci	swapper_spaces[type] = NULL;
7358c2ecf20Sopenharmony_ci}
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_cistatic inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
7388c2ecf20Sopenharmony_ci				     unsigned long faddr,
7398c2ecf20Sopenharmony_ci				     unsigned long lpfn,
7408c2ecf20Sopenharmony_ci				     unsigned long rpfn,
7418c2ecf20Sopenharmony_ci				     unsigned long *start,
7428c2ecf20Sopenharmony_ci				     unsigned long *end)
7438c2ecf20Sopenharmony_ci{
7448c2ecf20Sopenharmony_ci	*start = max3(lpfn, PFN_DOWN(vma->vm_start),
7458c2ecf20Sopenharmony_ci		      PFN_DOWN(faddr & PMD_MASK));
7468c2ecf20Sopenharmony_ci	*end = min3(rpfn, PFN_DOWN(vma->vm_end),
7478c2ecf20Sopenharmony_ci		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
7488c2ecf20Sopenharmony_ci}
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_cistatic void swap_ra_info(struct vm_fault *vmf,
7518c2ecf20Sopenharmony_ci			struct vma_swap_readahead *ra_info)
7528c2ecf20Sopenharmony_ci{
7538c2ecf20Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
7548c2ecf20Sopenharmony_ci	unsigned long ra_val;
7558c2ecf20Sopenharmony_ci	swp_entry_t entry;
7568c2ecf20Sopenharmony_ci	unsigned long faddr, pfn, fpfn;
7578c2ecf20Sopenharmony_ci	unsigned long start, end;
7588c2ecf20Sopenharmony_ci	pte_t *pte, *orig_pte;
7598c2ecf20Sopenharmony_ci	unsigned int max_win, hits, prev_win, win, left;
7608c2ecf20Sopenharmony_ci#ifndef CONFIG_64BIT
7618c2ecf20Sopenharmony_ci	pte_t *tpte;
7628c2ecf20Sopenharmony_ci#endif
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
7658c2ecf20Sopenharmony_ci			     SWAP_RA_ORDER_CEILING);
7668c2ecf20Sopenharmony_ci	if (max_win == 1) {
7678c2ecf20Sopenharmony_ci		ra_info->win = 1;
7688c2ecf20Sopenharmony_ci		return;
7698c2ecf20Sopenharmony_ci	}
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_ci	faddr = vmf->address;
7728c2ecf20Sopenharmony_ci	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
7738c2ecf20Sopenharmony_ci	entry = pte_to_swp_entry(*pte);
7748c2ecf20Sopenharmony_ci	if ((unlikely(non_swap_entry(entry)))) {
7758c2ecf20Sopenharmony_ci		pte_unmap(orig_pte);
7768c2ecf20Sopenharmony_ci		return;
7778c2ecf20Sopenharmony_ci	}
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_ci	fpfn = PFN_DOWN(faddr);
7808c2ecf20Sopenharmony_ci	ra_val = GET_SWAP_RA_VAL(vma);
7818c2ecf20Sopenharmony_ci	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
7828c2ecf20Sopenharmony_ci	prev_win = SWAP_RA_WIN(ra_val);
7838c2ecf20Sopenharmony_ci	hits = SWAP_RA_HITS(ra_val);
7848c2ecf20Sopenharmony_ci	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
7858c2ecf20Sopenharmony_ci					       max_win, prev_win);
7868c2ecf20Sopenharmony_ci	atomic_long_set(&vma->swap_readahead_info,
7878c2ecf20Sopenharmony_ci			SWAP_RA_VAL(faddr, win, 0));
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	if (win == 1) {
7908c2ecf20Sopenharmony_ci		pte_unmap(orig_pte);
7918c2ecf20Sopenharmony_ci		return;
7928c2ecf20Sopenharmony_ci	}
7938c2ecf20Sopenharmony_ci
7948c2ecf20Sopenharmony_ci	/* Copy the PTEs because the page table may be unmapped */
7958c2ecf20Sopenharmony_ci	if (fpfn == pfn + 1)
7968c2ecf20Sopenharmony_ci		swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end);
7978c2ecf20Sopenharmony_ci	else if (pfn == fpfn + 1)
7988c2ecf20Sopenharmony_ci		swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1,
7998c2ecf20Sopenharmony_ci				  &start, &end);
8008c2ecf20Sopenharmony_ci	else {
8018c2ecf20Sopenharmony_ci		left = (win - 1) / 2;
8028c2ecf20Sopenharmony_ci		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
8038c2ecf20Sopenharmony_ci				  &start, &end);
8048c2ecf20Sopenharmony_ci	}
8058c2ecf20Sopenharmony_ci	ra_info->nr_pte = end - start;
8068c2ecf20Sopenharmony_ci	ra_info->offset = fpfn - start;
8078c2ecf20Sopenharmony_ci	pte -= ra_info->offset;
8088c2ecf20Sopenharmony_ci#ifdef CONFIG_64BIT
8098c2ecf20Sopenharmony_ci	ra_info->ptes = pte;
8108c2ecf20Sopenharmony_ci#else
8118c2ecf20Sopenharmony_ci	tpte = ra_info->ptes;
8128c2ecf20Sopenharmony_ci	for (pfn = start; pfn != end; pfn++)
8138c2ecf20Sopenharmony_ci		*tpte++ = *pte++;
8148c2ecf20Sopenharmony_ci#endif
8158c2ecf20Sopenharmony_ci	pte_unmap(orig_pte);
8168c2ecf20Sopenharmony_ci}
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci/**
8198c2ecf20Sopenharmony_ci * swap_vma_readahead - swap in pages in hope we need them soon
8208c2ecf20Sopenharmony_ci * @fentry: swap entry of this memory
8218c2ecf20Sopenharmony_ci * @gfp_mask: memory allocation flags
8228c2ecf20Sopenharmony_ci * @vmf: fault information
8238c2ecf20Sopenharmony_ci *
8248c2ecf20Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin.
8258c2ecf20Sopenharmony_ci *
8268c2ecf20Sopenharmony_ci * Primitive swap readahead code. We simply read in a few pages whoes
8278c2ecf20Sopenharmony_ci * virtual addresses are around the fault address in the same vma.
8288c2ecf20Sopenharmony_ci *
8298c2ecf20Sopenharmony_ci * Caller must hold read mmap_lock if vmf->vma is not NULL.
8308c2ecf20Sopenharmony_ci *
8318c2ecf20Sopenharmony_ci */
8328c2ecf20Sopenharmony_cistatic struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
8338c2ecf20Sopenharmony_ci				       struct vm_fault *vmf)
8348c2ecf20Sopenharmony_ci{
8358c2ecf20Sopenharmony_ci	struct blk_plug plug;
8368c2ecf20Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
8378c2ecf20Sopenharmony_ci	struct page *page;
8388c2ecf20Sopenharmony_ci	pte_t *pte, pentry;
8398c2ecf20Sopenharmony_ci	swp_entry_t entry;
8408c2ecf20Sopenharmony_ci	unsigned int i;
8418c2ecf20Sopenharmony_ci	bool page_allocated;
8428c2ecf20Sopenharmony_ci	struct vma_swap_readahead ra_info = {0,};
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci	swap_ra_info(vmf, &ra_info);
8458c2ecf20Sopenharmony_ci	if (ra_info.win == 1)
8468c2ecf20Sopenharmony_ci		goto skip;
8478c2ecf20Sopenharmony_ci
8488c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
8498c2ecf20Sopenharmony_ci	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
8508c2ecf20Sopenharmony_ci	     i++, pte++) {
8518c2ecf20Sopenharmony_ci		pentry = *pte;
8528c2ecf20Sopenharmony_ci		if (pte_none(pentry))
8538c2ecf20Sopenharmony_ci			continue;
8548c2ecf20Sopenharmony_ci		if (pte_present(pentry))
8558c2ecf20Sopenharmony_ci			continue;
8568c2ecf20Sopenharmony_ci		entry = pte_to_swp_entry(pentry);
8578c2ecf20Sopenharmony_ci		if (unlikely(non_swap_entry(entry)))
8588c2ecf20Sopenharmony_ci			continue;
8598c2ecf20Sopenharmony_ci		page = __read_swap_cache_async(entry, gfp_mask, vma,
8608c2ecf20Sopenharmony_ci					       vmf->address, &page_allocated);
8618c2ecf20Sopenharmony_ci		if (!page)
8628c2ecf20Sopenharmony_ci			continue;
8638c2ecf20Sopenharmony_ci		if (page_allocated) {
8648c2ecf20Sopenharmony_ci			swap_readpage(page, false);
8658c2ecf20Sopenharmony_ci			if (i != ra_info.offset) {
8668c2ecf20Sopenharmony_ci				SetPageReadahead(page);
8678c2ecf20Sopenharmony_ci				count_vm_event(SWAP_RA);
8688c2ecf20Sopenharmony_ci			}
8698c2ecf20Sopenharmony_ci		}
8708c2ecf20Sopenharmony_ci		put_page(page);
8718c2ecf20Sopenharmony_ci	}
8728c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
8738c2ecf20Sopenharmony_ci	lru_add_drain();
8748c2ecf20Sopenharmony_ciskip:
8758c2ecf20Sopenharmony_ci	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
8768c2ecf20Sopenharmony_ci				     ra_info.win == 1);
8778c2ecf20Sopenharmony_ci}
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci/**
8808c2ecf20Sopenharmony_ci * swapin_readahead - swap in pages in hope we need them soon
8818c2ecf20Sopenharmony_ci * @entry: swap entry of this memory
8828c2ecf20Sopenharmony_ci * @gfp_mask: memory allocation flags
8838c2ecf20Sopenharmony_ci * @vmf: fault information
8848c2ecf20Sopenharmony_ci *
8858c2ecf20Sopenharmony_ci * Returns the struct page for entry and addr, after queueing swapin.
8868c2ecf20Sopenharmony_ci *
8878c2ecf20Sopenharmony_ci * It's a main entry function for swap readahead. By the configuration,
8888c2ecf20Sopenharmony_ci * it will read ahead blocks by cluster-based(ie, physical disk based)
8898c2ecf20Sopenharmony_ci * or vma-based(ie, virtual address based on faulty address) readahead.
8908c2ecf20Sopenharmony_ci */
8918c2ecf20Sopenharmony_cistruct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
8928c2ecf20Sopenharmony_ci				struct vm_fault *vmf)
8938c2ecf20Sopenharmony_ci{
8948c2ecf20Sopenharmony_ci	return swap_use_vma_readahead() ?
8958c2ecf20Sopenharmony_ci			swap_vma_readahead(entry, gfp_mask, vmf) :
8968c2ecf20Sopenharmony_ci			swap_cluster_readahead(entry, gfp_mask, vmf);
8978c2ecf20Sopenharmony_ci}
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci#ifdef CONFIG_SYSFS
9008c2ecf20Sopenharmony_cistatic ssize_t vma_ra_enabled_show(struct kobject *kobj,
9018c2ecf20Sopenharmony_ci				     struct kobj_attribute *attr, char *buf)
9028c2ecf20Sopenharmony_ci{
9038c2ecf20Sopenharmony_ci	return sprintf(buf, "%s\n", enable_vma_readahead ? "true" : "false");
9048c2ecf20Sopenharmony_ci}
9058c2ecf20Sopenharmony_cistatic ssize_t vma_ra_enabled_store(struct kobject *kobj,
9068c2ecf20Sopenharmony_ci				      struct kobj_attribute *attr,
9078c2ecf20Sopenharmony_ci				      const char *buf, size_t count)
9088c2ecf20Sopenharmony_ci{
9098c2ecf20Sopenharmony_ci	if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
9108c2ecf20Sopenharmony_ci		enable_vma_readahead = true;
9118c2ecf20Sopenharmony_ci	else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
9128c2ecf20Sopenharmony_ci		enable_vma_readahead = false;
9138c2ecf20Sopenharmony_ci	else
9148c2ecf20Sopenharmony_ci		return -EINVAL;
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_ci	return count;
9178c2ecf20Sopenharmony_ci}
9188c2ecf20Sopenharmony_cistatic struct kobj_attribute vma_ra_enabled_attr =
9198c2ecf20Sopenharmony_ci	__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
9208c2ecf20Sopenharmony_ci	       vma_ra_enabled_store);
9218c2ecf20Sopenharmony_ci
9228c2ecf20Sopenharmony_cistatic struct attribute *swap_attrs[] = {
9238c2ecf20Sopenharmony_ci	&vma_ra_enabled_attr.attr,
9248c2ecf20Sopenharmony_ci	NULL,
9258c2ecf20Sopenharmony_ci};
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_cistatic struct attribute_group swap_attr_group = {
9288c2ecf20Sopenharmony_ci	.attrs = swap_attrs,
9298c2ecf20Sopenharmony_ci};
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_cistatic int __init swap_init_sysfs(void)
9328c2ecf20Sopenharmony_ci{
9338c2ecf20Sopenharmony_ci	int err;
9348c2ecf20Sopenharmony_ci	struct kobject *swap_kobj;
9358c2ecf20Sopenharmony_ci
9368c2ecf20Sopenharmony_ci	swap_kobj = kobject_create_and_add("swap", mm_kobj);
9378c2ecf20Sopenharmony_ci	if (!swap_kobj) {
9388c2ecf20Sopenharmony_ci		pr_err("failed to create swap kobject\n");
9398c2ecf20Sopenharmony_ci		return -ENOMEM;
9408c2ecf20Sopenharmony_ci	}
9418c2ecf20Sopenharmony_ci	err = sysfs_create_group(swap_kobj, &swap_attr_group);
9428c2ecf20Sopenharmony_ci	if (err) {
9438c2ecf20Sopenharmony_ci		pr_err("failed to register swap group\n");
9448c2ecf20Sopenharmony_ci		goto delete_obj;
9458c2ecf20Sopenharmony_ci	}
9468c2ecf20Sopenharmony_ci	return 0;
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_cidelete_obj:
9498c2ecf20Sopenharmony_ci	kobject_put(swap_kobj);
9508c2ecf20Sopenharmony_ci	return err;
9518c2ecf20Sopenharmony_ci}
9528c2ecf20Sopenharmony_cisubsys_initcall(swap_init_sysfs);
9538c2ecf20Sopenharmony_ci#endif
954