162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/mm/page_io.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Swap reorganised 29.12.95, 862306a36Sopenharmony_ci * Asynchronous swapping added 30.12.95. Stephen Tweedie 962306a36Sopenharmony_ci * Removed race in async swapping. 14.4.1996. Bruno Haible 1062306a36Sopenharmony_ci * Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie 1162306a36Sopenharmony_ci * Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/mm.h> 1562306a36Sopenharmony_ci#include <linux/kernel_stat.h> 1662306a36Sopenharmony_ci#include <linux/gfp.h> 1762306a36Sopenharmony_ci#include <linux/pagemap.h> 1862306a36Sopenharmony_ci#include <linux/swap.h> 1962306a36Sopenharmony_ci#include <linux/bio.h> 2062306a36Sopenharmony_ci#include <linux/swapops.h> 2162306a36Sopenharmony_ci#include <linux/writeback.h> 2262306a36Sopenharmony_ci#include <linux/blkdev.h> 2362306a36Sopenharmony_ci#include <linux/psi.h> 2462306a36Sopenharmony_ci#include <linux/uio.h> 2562306a36Sopenharmony_ci#include <linux/sched/task.h> 2662306a36Sopenharmony_ci#include <linux/delayacct.h> 2762306a36Sopenharmony_ci#include <linux/zswap.h> 2862306a36Sopenharmony_ci#include "swap.h" 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_cistatic void __end_swap_bio_write(struct bio *bio) 3162306a36Sopenharmony_ci{ 3262306a36Sopenharmony_ci struct folio *folio = bio_first_folio_all(bio); 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci if (bio->bi_status) { 3562306a36Sopenharmony_ci /* 3662306a36Sopenharmony_ci * We failed to write the page out to swap-space. 3762306a36Sopenharmony_ci * Re-dirty the page in order to avoid it being reclaimed. 3862306a36Sopenharmony_ci * Also print a dire warning that things will go BAD (tm) 3962306a36Sopenharmony_ci * very quickly. 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * Also clear PG_reclaim to avoid folio_rotate_reclaimable() 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_ci folio_mark_dirty(folio); 4462306a36Sopenharmony_ci pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", 4562306a36Sopenharmony_ci MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), 4662306a36Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector); 4762306a36Sopenharmony_ci folio_clear_reclaim(folio); 4862306a36Sopenharmony_ci } 4962306a36Sopenharmony_ci folio_end_writeback(folio); 5062306a36Sopenharmony_ci} 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_cistatic void end_swap_bio_write(struct bio *bio) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci __end_swap_bio_write(bio); 5562306a36Sopenharmony_ci bio_put(bio); 5662306a36Sopenharmony_ci} 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistatic void __end_swap_bio_read(struct bio *bio) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci struct folio *folio = bio_first_folio_all(bio); 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (bio->bi_status) { 6362306a36Sopenharmony_ci pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n", 6462306a36Sopenharmony_ci MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), 6562306a36Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector); 6662306a36Sopenharmony_ci } else { 6762306a36Sopenharmony_ci folio_mark_uptodate(folio); 6862306a36Sopenharmony_ci } 6962306a36Sopenharmony_ci folio_unlock(folio); 7062306a36Sopenharmony_ci} 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cistatic void end_swap_bio_read(struct bio *bio) 7362306a36Sopenharmony_ci{ 7462306a36Sopenharmony_ci __end_swap_bio_read(bio); 7562306a36Sopenharmony_ci bio_put(bio); 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ciint generic_swapfile_activate(struct swap_info_struct *sis, 7962306a36Sopenharmony_ci struct file *swap_file, 8062306a36Sopenharmony_ci sector_t *span) 8162306a36Sopenharmony_ci{ 8262306a36Sopenharmony_ci struct address_space *mapping = swap_file->f_mapping; 8362306a36Sopenharmony_ci struct inode *inode = mapping->host; 8462306a36Sopenharmony_ci unsigned blocks_per_page; 8562306a36Sopenharmony_ci unsigned long page_no; 8662306a36Sopenharmony_ci unsigned blkbits; 8762306a36Sopenharmony_ci sector_t probe_block; 8862306a36Sopenharmony_ci sector_t last_block; 8962306a36Sopenharmony_ci sector_t lowest_block = -1; 9062306a36Sopenharmony_ci sector_t highest_block = 0; 9162306a36Sopenharmony_ci int nr_extents = 0; 9262306a36Sopenharmony_ci int ret; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci blkbits = inode->i_blkbits; 9562306a36Sopenharmony_ci blocks_per_page = PAGE_SIZE >> blkbits; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci /* 9862306a36Sopenharmony_ci * Map all the blocks into the extent tree. This code doesn't try 9962306a36Sopenharmony_ci * to be very smart. 10062306a36Sopenharmony_ci */ 10162306a36Sopenharmony_ci probe_block = 0; 10262306a36Sopenharmony_ci page_no = 0; 10362306a36Sopenharmony_ci last_block = i_size_read(inode) >> blkbits; 10462306a36Sopenharmony_ci while ((probe_block + blocks_per_page) <= last_block && 10562306a36Sopenharmony_ci page_no < sis->max) { 10662306a36Sopenharmony_ci unsigned block_in_page; 10762306a36Sopenharmony_ci sector_t first_block; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci cond_resched(); 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci first_block = probe_block; 11262306a36Sopenharmony_ci ret = bmap(inode, &first_block); 11362306a36Sopenharmony_ci if (ret || !first_block) 11462306a36Sopenharmony_ci goto bad_bmap; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci /* 11762306a36Sopenharmony_ci * It must be PAGE_SIZE aligned on-disk 11862306a36Sopenharmony_ci */ 11962306a36Sopenharmony_ci if (first_block & (blocks_per_page - 1)) { 12062306a36Sopenharmony_ci probe_block++; 12162306a36Sopenharmony_ci goto reprobe; 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci for (block_in_page = 1; block_in_page < blocks_per_page; 12562306a36Sopenharmony_ci block_in_page++) { 12662306a36Sopenharmony_ci sector_t block; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci block = probe_block + block_in_page; 12962306a36Sopenharmony_ci ret = bmap(inode, &block); 13062306a36Sopenharmony_ci if (ret || !block) 13162306a36Sopenharmony_ci goto bad_bmap; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci if (block != first_block + block_in_page) { 13462306a36Sopenharmony_ci /* Discontiguity */ 13562306a36Sopenharmony_ci probe_block++; 13662306a36Sopenharmony_ci goto reprobe; 13762306a36Sopenharmony_ci } 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci first_block >>= (PAGE_SHIFT - blkbits); 14162306a36Sopenharmony_ci if (page_no) { /* exclude the header page */ 14262306a36Sopenharmony_ci if (first_block < lowest_block) 14362306a36Sopenharmony_ci lowest_block = first_block; 14462306a36Sopenharmony_ci if (first_block > highest_block) 14562306a36Sopenharmony_ci highest_block = first_block; 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci /* 14962306a36Sopenharmony_ci * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks 15062306a36Sopenharmony_ci */ 15162306a36Sopenharmony_ci ret = add_swap_extent(sis, page_no, 1, first_block); 15262306a36Sopenharmony_ci if (ret < 0) 15362306a36Sopenharmony_ci goto out; 15462306a36Sopenharmony_ci nr_extents += ret; 15562306a36Sopenharmony_ci page_no++; 15662306a36Sopenharmony_ci probe_block += blocks_per_page; 15762306a36Sopenharmony_cireprobe: 15862306a36Sopenharmony_ci continue; 15962306a36Sopenharmony_ci } 16062306a36Sopenharmony_ci ret = nr_extents; 16162306a36Sopenharmony_ci *span = 1 + highest_block - lowest_block; 16262306a36Sopenharmony_ci if (page_no == 0) 16362306a36Sopenharmony_ci page_no = 1; /* force Empty message */ 16462306a36Sopenharmony_ci sis->max = page_no; 16562306a36Sopenharmony_ci sis->pages = page_no - 1; 16662306a36Sopenharmony_ci sis->highest_bit = page_no - 1; 16762306a36Sopenharmony_ciout: 16862306a36Sopenharmony_ci return ret; 16962306a36Sopenharmony_cibad_bmap: 17062306a36Sopenharmony_ci pr_err("swapon: swapfile has holes\n"); 17162306a36Sopenharmony_ci ret = -EINVAL; 17262306a36Sopenharmony_ci goto out; 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci/* 17662306a36Sopenharmony_ci * We may have stale swap cache pages in memory: notice 17762306a36Sopenharmony_ci * them here and get rid of the unnecessary final write. 17862306a36Sopenharmony_ci */ 17962306a36Sopenharmony_ciint swap_writepage(struct page *page, struct writeback_control *wbc) 18062306a36Sopenharmony_ci{ 18162306a36Sopenharmony_ci struct folio *folio = page_folio(page); 18262306a36Sopenharmony_ci int ret; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci if (folio_free_swap(folio)) { 18562306a36Sopenharmony_ci folio_unlock(folio); 18662306a36Sopenharmony_ci return 0; 18762306a36Sopenharmony_ci } 18862306a36Sopenharmony_ci /* 18962306a36Sopenharmony_ci * Arch code may have to preserve more data than just the page 19062306a36Sopenharmony_ci * contents, e.g. memory tags. 19162306a36Sopenharmony_ci */ 19262306a36Sopenharmony_ci ret = arch_prepare_to_swap(&folio->page); 19362306a36Sopenharmony_ci if (ret) { 19462306a36Sopenharmony_ci folio_mark_dirty(folio); 19562306a36Sopenharmony_ci folio_unlock(folio); 19662306a36Sopenharmony_ci return ret; 19762306a36Sopenharmony_ci } 19862306a36Sopenharmony_ci if (zswap_store(folio)) { 19962306a36Sopenharmony_ci folio_start_writeback(folio); 20062306a36Sopenharmony_ci folio_unlock(folio); 20162306a36Sopenharmony_ci folio_end_writeback(folio); 20262306a36Sopenharmony_ci return 0; 20362306a36Sopenharmony_ci } 20462306a36Sopenharmony_ci __swap_writepage(&folio->page, wbc); 20562306a36Sopenharmony_ci return 0; 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_cistatic inline void count_swpout_vm_event(struct folio *folio) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 21162306a36Sopenharmony_ci if (unlikely(folio_test_pmd_mappable(folio))) 21262306a36Sopenharmony_ci count_vm_event(THP_SWPOUT); 21362306a36Sopenharmony_ci#endif 21462306a36Sopenharmony_ci count_vm_events(PSWPOUT, folio_nr_pages(folio)); 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) 21862306a36Sopenharmony_cistatic void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci struct cgroup_subsys_state *css; 22162306a36Sopenharmony_ci struct mem_cgroup *memcg; 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci memcg = folio_memcg(folio); 22462306a36Sopenharmony_ci if (!memcg) 22562306a36Sopenharmony_ci return; 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci rcu_read_lock(); 22862306a36Sopenharmony_ci css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys); 22962306a36Sopenharmony_ci bio_associate_blkg_from_css(bio, css); 23062306a36Sopenharmony_ci rcu_read_unlock(); 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ci#else 23362306a36Sopenharmony_ci#define bio_associate_blkg_from_page(bio, folio) do { } while (0) 23462306a36Sopenharmony_ci#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */ 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_cistruct swap_iocb { 23762306a36Sopenharmony_ci struct kiocb iocb; 23862306a36Sopenharmony_ci struct bio_vec bvec[SWAP_CLUSTER_MAX]; 23962306a36Sopenharmony_ci int pages; 24062306a36Sopenharmony_ci int len; 24162306a36Sopenharmony_ci}; 24262306a36Sopenharmony_cistatic mempool_t *sio_pool; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ciint sio_pool_init(void) 24562306a36Sopenharmony_ci{ 24662306a36Sopenharmony_ci if (!sio_pool) { 24762306a36Sopenharmony_ci mempool_t *pool = mempool_create_kmalloc_pool( 24862306a36Sopenharmony_ci SWAP_CLUSTER_MAX, sizeof(struct swap_iocb)); 24962306a36Sopenharmony_ci if (cmpxchg(&sio_pool, NULL, pool)) 25062306a36Sopenharmony_ci mempool_destroy(pool); 25162306a36Sopenharmony_ci } 25262306a36Sopenharmony_ci if (!sio_pool) 25362306a36Sopenharmony_ci return -ENOMEM; 25462306a36Sopenharmony_ci return 0; 25562306a36Sopenharmony_ci} 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_cistatic void sio_write_complete(struct kiocb *iocb, long ret) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb); 26062306a36Sopenharmony_ci struct page *page = sio->bvec[0].bv_page; 26162306a36Sopenharmony_ci int p; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci if (ret != sio->len) { 26462306a36Sopenharmony_ci /* 26562306a36Sopenharmony_ci * In the case of swap-over-nfs, this can be a 26662306a36Sopenharmony_ci * temporary failure if the system has limited 26762306a36Sopenharmony_ci * memory for allocating transmit buffers. 26862306a36Sopenharmony_ci * Mark the page dirty and avoid 26962306a36Sopenharmony_ci * folio_rotate_reclaimable but rate-limit the 27062306a36Sopenharmony_ci * messages but do not flag PageError like 27162306a36Sopenharmony_ci * the normal direct-to-bio case as it could 27262306a36Sopenharmony_ci * be temporary. 27362306a36Sopenharmony_ci */ 27462306a36Sopenharmony_ci pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n", 27562306a36Sopenharmony_ci ret, page_file_offset(page)); 27662306a36Sopenharmony_ci for (p = 0; p < sio->pages; p++) { 27762306a36Sopenharmony_ci page = sio->bvec[p].bv_page; 27862306a36Sopenharmony_ci set_page_dirty(page); 27962306a36Sopenharmony_ci ClearPageReclaim(page); 28062306a36Sopenharmony_ci } 28162306a36Sopenharmony_ci } else { 28262306a36Sopenharmony_ci for (p = 0; p < sio->pages; p++) 28362306a36Sopenharmony_ci count_swpout_vm_event(page_folio(sio->bvec[p].bv_page)); 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci for (p = 0; p < sio->pages; p++) 28762306a36Sopenharmony_ci end_page_writeback(sio->bvec[p].bv_page); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci mempool_free(sio, sio_pool); 29062306a36Sopenharmony_ci} 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_cistatic void swap_writepage_fs(struct page *page, struct writeback_control *wbc) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci struct swap_iocb *sio = NULL; 29562306a36Sopenharmony_ci struct swap_info_struct *sis = page_swap_info(page); 29662306a36Sopenharmony_ci struct file *swap_file = sis->swap_file; 29762306a36Sopenharmony_ci loff_t pos = page_file_offset(page); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci set_page_writeback(page); 30062306a36Sopenharmony_ci unlock_page(page); 30162306a36Sopenharmony_ci if (wbc->swap_plug) 30262306a36Sopenharmony_ci sio = *wbc->swap_plug; 30362306a36Sopenharmony_ci if (sio) { 30462306a36Sopenharmony_ci if (sio->iocb.ki_filp != swap_file || 30562306a36Sopenharmony_ci sio->iocb.ki_pos + sio->len != pos) { 30662306a36Sopenharmony_ci swap_write_unplug(sio); 30762306a36Sopenharmony_ci sio = NULL; 30862306a36Sopenharmony_ci } 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci if (!sio) { 31162306a36Sopenharmony_ci sio = mempool_alloc(sio_pool, GFP_NOIO); 31262306a36Sopenharmony_ci init_sync_kiocb(&sio->iocb, swap_file); 31362306a36Sopenharmony_ci sio->iocb.ki_complete = sio_write_complete; 31462306a36Sopenharmony_ci sio->iocb.ki_pos = pos; 31562306a36Sopenharmony_ci sio->pages = 0; 31662306a36Sopenharmony_ci sio->len = 0; 31762306a36Sopenharmony_ci } 31862306a36Sopenharmony_ci bvec_set_page(&sio->bvec[sio->pages], page, thp_size(page), 0); 31962306a36Sopenharmony_ci sio->len += thp_size(page); 32062306a36Sopenharmony_ci sio->pages += 1; 32162306a36Sopenharmony_ci if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) { 32262306a36Sopenharmony_ci swap_write_unplug(sio); 32362306a36Sopenharmony_ci sio = NULL; 32462306a36Sopenharmony_ci } 32562306a36Sopenharmony_ci if (wbc->swap_plug) 32662306a36Sopenharmony_ci *wbc->swap_plug = sio; 32762306a36Sopenharmony_ci} 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_cistatic void swap_writepage_bdev_sync(struct page *page, 33062306a36Sopenharmony_ci struct writeback_control *wbc, struct swap_info_struct *sis) 33162306a36Sopenharmony_ci{ 33262306a36Sopenharmony_ci struct bio_vec bv; 33362306a36Sopenharmony_ci struct bio bio; 33462306a36Sopenharmony_ci struct folio *folio = page_folio(page); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci bio_init(&bio, sis->bdev, &bv, 1, 33762306a36Sopenharmony_ci REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc)); 33862306a36Sopenharmony_ci bio.bi_iter.bi_sector = swap_page_sector(page); 33962306a36Sopenharmony_ci __bio_add_page(&bio, page, thp_size(page), 0); 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci bio_associate_blkg_from_page(&bio, folio); 34262306a36Sopenharmony_ci count_swpout_vm_event(folio); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci folio_start_writeback(folio); 34562306a36Sopenharmony_ci folio_unlock(folio); 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci submit_bio_wait(&bio); 34862306a36Sopenharmony_ci __end_swap_bio_write(&bio); 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_cistatic void swap_writepage_bdev_async(struct page *page, 35262306a36Sopenharmony_ci struct writeback_control *wbc, struct swap_info_struct *sis) 35362306a36Sopenharmony_ci{ 35462306a36Sopenharmony_ci struct bio *bio; 35562306a36Sopenharmony_ci struct folio *folio = page_folio(page); 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci bio = bio_alloc(sis->bdev, 1, 35862306a36Sopenharmony_ci REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc), 35962306a36Sopenharmony_ci GFP_NOIO); 36062306a36Sopenharmony_ci bio->bi_iter.bi_sector = swap_page_sector(page); 36162306a36Sopenharmony_ci bio->bi_end_io = end_swap_bio_write; 36262306a36Sopenharmony_ci __bio_add_page(bio, page, thp_size(page), 0); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci bio_associate_blkg_from_page(bio, folio); 36562306a36Sopenharmony_ci count_swpout_vm_event(folio); 36662306a36Sopenharmony_ci folio_start_writeback(folio); 36762306a36Sopenharmony_ci folio_unlock(folio); 36862306a36Sopenharmony_ci submit_bio(bio); 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_civoid __swap_writepage(struct page *page, struct writeback_control *wbc) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci struct swap_info_struct *sis = page_swap_info(page); 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci VM_BUG_ON_PAGE(!PageSwapCache(page), page); 37662306a36Sopenharmony_ci /* 37762306a36Sopenharmony_ci * ->flags can be updated non-atomicially (scan_swap_map_slots), 37862306a36Sopenharmony_ci * but that will never affect SWP_FS_OPS, so the data_race 37962306a36Sopenharmony_ci * is safe. 38062306a36Sopenharmony_ci */ 38162306a36Sopenharmony_ci if (data_race(sis->flags & SWP_FS_OPS)) 38262306a36Sopenharmony_ci swap_writepage_fs(page, wbc); 38362306a36Sopenharmony_ci else if (sis->flags & SWP_SYNCHRONOUS_IO) 38462306a36Sopenharmony_ci swap_writepage_bdev_sync(page, wbc, sis); 38562306a36Sopenharmony_ci else 38662306a36Sopenharmony_ci swap_writepage_bdev_async(page, wbc, sis); 38762306a36Sopenharmony_ci} 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_civoid swap_write_unplug(struct swap_iocb *sio) 39062306a36Sopenharmony_ci{ 39162306a36Sopenharmony_ci struct iov_iter from; 39262306a36Sopenharmony_ci struct address_space *mapping = sio->iocb.ki_filp->f_mapping; 39362306a36Sopenharmony_ci int ret; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci iov_iter_bvec(&from, ITER_SOURCE, sio->bvec, sio->pages, sio->len); 39662306a36Sopenharmony_ci ret = mapping->a_ops->swap_rw(&sio->iocb, &from); 39762306a36Sopenharmony_ci if (ret != -EIOCBQUEUED) 39862306a36Sopenharmony_ci sio_write_complete(&sio->iocb, ret); 39962306a36Sopenharmony_ci} 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_cistatic void sio_read_complete(struct kiocb *iocb, long ret) 40262306a36Sopenharmony_ci{ 40362306a36Sopenharmony_ci struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb); 40462306a36Sopenharmony_ci int p; 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci if (ret == sio->len) { 40762306a36Sopenharmony_ci for (p = 0; p < sio->pages; p++) { 40862306a36Sopenharmony_ci struct folio *folio = page_folio(sio->bvec[p].bv_page); 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci folio_mark_uptodate(folio); 41162306a36Sopenharmony_ci folio_unlock(folio); 41262306a36Sopenharmony_ci } 41362306a36Sopenharmony_ci count_vm_events(PSWPIN, sio->pages); 41462306a36Sopenharmony_ci } else { 41562306a36Sopenharmony_ci for (p = 0; p < sio->pages; p++) { 41662306a36Sopenharmony_ci struct folio *folio = page_folio(sio->bvec[p].bv_page); 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci folio_unlock(folio); 41962306a36Sopenharmony_ci } 42062306a36Sopenharmony_ci pr_alert_ratelimited("Read-error on swap-device\n"); 42162306a36Sopenharmony_ci } 42262306a36Sopenharmony_ci mempool_free(sio, sio_pool); 42362306a36Sopenharmony_ci} 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_cistatic void swap_readpage_fs(struct page *page, 42662306a36Sopenharmony_ci struct swap_iocb **plug) 42762306a36Sopenharmony_ci{ 42862306a36Sopenharmony_ci struct swap_info_struct *sis = page_swap_info(page); 42962306a36Sopenharmony_ci struct swap_iocb *sio = NULL; 43062306a36Sopenharmony_ci loff_t pos = page_file_offset(page); 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci if (plug) 43362306a36Sopenharmony_ci sio = *plug; 43462306a36Sopenharmony_ci if (sio) { 43562306a36Sopenharmony_ci if (sio->iocb.ki_filp != sis->swap_file || 43662306a36Sopenharmony_ci sio->iocb.ki_pos + sio->len != pos) { 43762306a36Sopenharmony_ci swap_read_unplug(sio); 43862306a36Sopenharmony_ci sio = NULL; 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci } 44162306a36Sopenharmony_ci if (!sio) { 44262306a36Sopenharmony_ci sio = mempool_alloc(sio_pool, GFP_KERNEL); 44362306a36Sopenharmony_ci init_sync_kiocb(&sio->iocb, sis->swap_file); 44462306a36Sopenharmony_ci sio->iocb.ki_pos = pos; 44562306a36Sopenharmony_ci sio->iocb.ki_complete = sio_read_complete; 44662306a36Sopenharmony_ci sio->pages = 0; 44762306a36Sopenharmony_ci sio->len = 0; 44862306a36Sopenharmony_ci } 44962306a36Sopenharmony_ci bvec_set_page(&sio->bvec[sio->pages], page, thp_size(page), 0); 45062306a36Sopenharmony_ci sio->len += thp_size(page); 45162306a36Sopenharmony_ci sio->pages += 1; 45262306a36Sopenharmony_ci if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) { 45362306a36Sopenharmony_ci swap_read_unplug(sio); 45462306a36Sopenharmony_ci sio = NULL; 45562306a36Sopenharmony_ci } 45662306a36Sopenharmony_ci if (plug) 45762306a36Sopenharmony_ci *plug = sio; 45862306a36Sopenharmony_ci} 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_cistatic void swap_readpage_bdev_sync(struct page *page, 46162306a36Sopenharmony_ci struct swap_info_struct *sis) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci struct bio_vec bv; 46462306a36Sopenharmony_ci struct bio bio; 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ); 46762306a36Sopenharmony_ci bio.bi_iter.bi_sector = swap_page_sector(page); 46862306a36Sopenharmony_ci __bio_add_page(&bio, page, thp_size(page), 0); 46962306a36Sopenharmony_ci /* 47062306a36Sopenharmony_ci * Keep this task valid during swap readpage because the oom killer may 47162306a36Sopenharmony_ci * attempt to access it in the page fault retry time check. 47262306a36Sopenharmony_ci */ 47362306a36Sopenharmony_ci get_task_struct(current); 47462306a36Sopenharmony_ci count_vm_event(PSWPIN); 47562306a36Sopenharmony_ci submit_bio_wait(&bio); 47662306a36Sopenharmony_ci __end_swap_bio_read(&bio); 47762306a36Sopenharmony_ci put_task_struct(current); 47862306a36Sopenharmony_ci} 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_cistatic void swap_readpage_bdev_async(struct page *page, 48162306a36Sopenharmony_ci struct swap_info_struct *sis) 48262306a36Sopenharmony_ci{ 48362306a36Sopenharmony_ci struct bio *bio; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL); 48662306a36Sopenharmony_ci bio->bi_iter.bi_sector = swap_page_sector(page); 48762306a36Sopenharmony_ci bio->bi_end_io = end_swap_bio_read; 48862306a36Sopenharmony_ci __bio_add_page(bio, page, thp_size(page), 0); 48962306a36Sopenharmony_ci count_vm_event(PSWPIN); 49062306a36Sopenharmony_ci submit_bio(bio); 49162306a36Sopenharmony_ci} 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_civoid swap_readpage(struct page *page, bool synchronous, struct swap_iocb **plug) 49462306a36Sopenharmony_ci{ 49562306a36Sopenharmony_ci struct folio *folio = page_folio(page); 49662306a36Sopenharmony_ci struct swap_info_struct *sis = page_swap_info(page); 49762306a36Sopenharmony_ci bool workingset = folio_test_workingset(folio); 49862306a36Sopenharmony_ci unsigned long pflags; 49962306a36Sopenharmony_ci bool in_thrashing; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_swapcache(folio) && !synchronous, folio); 50262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 50362306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio); 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci /* 50662306a36Sopenharmony_ci * Count submission time as memory stall and delay. When the device 50762306a36Sopenharmony_ci * is congested, or the submitting cgroup IO-throttled, submission 50862306a36Sopenharmony_ci * can be a significant part of overall IO time. 50962306a36Sopenharmony_ci */ 51062306a36Sopenharmony_ci if (workingset) { 51162306a36Sopenharmony_ci delayacct_thrashing_start(&in_thrashing); 51262306a36Sopenharmony_ci psi_memstall_enter(&pflags); 51362306a36Sopenharmony_ci } 51462306a36Sopenharmony_ci delayacct_swapin_start(); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci if (zswap_load(folio)) { 51762306a36Sopenharmony_ci folio_mark_uptodate(folio); 51862306a36Sopenharmony_ci folio_unlock(folio); 51962306a36Sopenharmony_ci } else if (data_race(sis->flags & SWP_FS_OPS)) { 52062306a36Sopenharmony_ci swap_readpage_fs(page, plug); 52162306a36Sopenharmony_ci } else if (synchronous || (sis->flags & SWP_SYNCHRONOUS_IO)) { 52262306a36Sopenharmony_ci swap_readpage_bdev_sync(page, sis); 52362306a36Sopenharmony_ci } else { 52462306a36Sopenharmony_ci swap_readpage_bdev_async(page, sis); 52562306a36Sopenharmony_ci } 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci if (workingset) { 52862306a36Sopenharmony_ci delayacct_thrashing_end(&in_thrashing); 52962306a36Sopenharmony_ci psi_memstall_leave(&pflags); 53062306a36Sopenharmony_ci } 53162306a36Sopenharmony_ci delayacct_swapin_end(); 53262306a36Sopenharmony_ci} 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_civoid __swap_read_unplug(struct swap_iocb *sio) 53562306a36Sopenharmony_ci{ 53662306a36Sopenharmony_ci struct iov_iter from; 53762306a36Sopenharmony_ci struct address_space *mapping = sio->iocb.ki_filp->f_mapping; 53862306a36Sopenharmony_ci int ret; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci iov_iter_bvec(&from, ITER_DEST, sio->bvec, sio->pages, sio->len); 54162306a36Sopenharmony_ci ret = mapping->a_ops->swap_rw(&sio->iocb, &from); 54262306a36Sopenharmony_ci if (ret != -EIOCBQUEUED) 54362306a36Sopenharmony_ci sio_read_complete(&sio->iocb, ret); 54462306a36Sopenharmony_ci} 545