18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * z3fold.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Author: Vitaly Wool <vitaly.wool@konsulko.com> 68c2ecf20Sopenharmony_ci * Copyright (C) 2016, Sony Mobile Communications Inc. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * This implementation is based on zbud written by Seth Jennings. 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * z3fold is an special purpose allocator for storing compressed pages. It 118c2ecf20Sopenharmony_ci * can store up to three compressed pages per page which improves the 128c2ecf20Sopenharmony_ci * compression ratio of zbud while retaining its main concepts (e. g. always 138c2ecf20Sopenharmony_ci * storing an integral number of objects per page) and simplicity. 148c2ecf20Sopenharmony_ci * It still has simple and deterministic reclaim properties that make it 158c2ecf20Sopenharmony_ci * preferable to a higher density approach (with no requirement on integral 168c2ecf20Sopenharmony_ci * number of object per page) when reclaim is used. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * As in zbud, pages are divided into "chunks". The size of the chunks is 198c2ecf20Sopenharmony_ci * fixed at compile time and is determined by NCHUNKS_ORDER below. 208c2ecf20Sopenharmony_ci * 218c2ecf20Sopenharmony_ci * z3fold doesn't export any API and is meant to be used via zpool API. 228c2ecf20Sopenharmony_ci */ 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci#include <linux/atomic.h> 278c2ecf20Sopenharmony_ci#include <linux/sched.h> 288c2ecf20Sopenharmony_ci#include <linux/cpumask.h> 298c2ecf20Sopenharmony_ci#include <linux/list.h> 308c2ecf20Sopenharmony_ci#include <linux/mm.h> 318c2ecf20Sopenharmony_ci#include <linux/module.h> 328c2ecf20Sopenharmony_ci#include <linux/page-flags.h> 338c2ecf20Sopenharmony_ci#include <linux/migrate.h> 348c2ecf20Sopenharmony_ci#include <linux/node.h> 358c2ecf20Sopenharmony_ci#include <linux/compaction.h> 368c2ecf20Sopenharmony_ci#include <linux/percpu.h> 378c2ecf20Sopenharmony_ci#include <linux/mount.h> 388c2ecf20Sopenharmony_ci#include <linux/pseudo_fs.h> 398c2ecf20Sopenharmony_ci#include <linux/fs.h> 408c2ecf20Sopenharmony_ci#include <linux/preempt.h> 418c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 428c2ecf20Sopenharmony_ci#include <linux/slab.h> 438c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 448c2ecf20Sopenharmony_ci#include <linux/zpool.h> 458c2ecf20Sopenharmony_ci#include <linux/magic.h> 468c2ecf20Sopenharmony_ci#include <linux/kmemleak.h> 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci/* 498c2ecf20Sopenharmony_ci * NCHUNKS_ORDER determines the internal allocation granularity, effectively 508c2ecf20Sopenharmony_ci * adjusting internal fragmentation. It also determines the number of 518c2ecf20Sopenharmony_ci * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 528c2ecf20Sopenharmony_ci * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 538c2ecf20Sopenharmony_ci * in the beginning of an allocated page are occupied by z3fold header, so 548c2ecf20Sopenharmony_ci * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 558c2ecf20Sopenharmony_ci * which shows the max number of free chunks in z3fold page, also there will 568c2ecf20Sopenharmony_ci * be 63, or 62, respectively, freelists per pool. 578c2ecf20Sopenharmony_ci */ 588c2ecf20Sopenharmony_ci#define NCHUNKS_ORDER 6 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 618c2ecf20Sopenharmony_ci#define CHUNK_SIZE (1 << CHUNK_SHIFT) 628c2ecf20Sopenharmony_ci#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 638c2ecf20Sopenharmony_ci#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 648c2ecf20Sopenharmony_ci#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 658c2ecf20Sopenharmony_ci#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci#define BUDDY_MASK (0x3) 688c2ecf20Sopenharmony_ci#define BUDDY_SHIFT 2 698c2ecf20Sopenharmony_ci#define SLOTS_ALIGN (0x40) 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci/***************** 728c2ecf20Sopenharmony_ci * Structures 738c2ecf20Sopenharmony_ci*****************/ 748c2ecf20Sopenharmony_cistruct z3fold_pool; 758c2ecf20Sopenharmony_cistruct z3fold_ops { 768c2ecf20Sopenharmony_ci int (*evict)(struct z3fold_pool *pool, unsigned long handle); 778c2ecf20Sopenharmony_ci}; 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_cienum buddy { 808c2ecf20Sopenharmony_ci HEADLESS = 0, 818c2ecf20Sopenharmony_ci FIRST, 828c2ecf20Sopenharmony_ci MIDDLE, 838c2ecf20Sopenharmony_ci LAST, 848c2ecf20Sopenharmony_ci BUDDIES_MAX = LAST 858c2ecf20Sopenharmony_ci}; 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_cistruct z3fold_buddy_slots { 888c2ecf20Sopenharmony_ci /* 898c2ecf20Sopenharmony_ci * we are using BUDDY_MASK in handle_to_buddy etc. so there should 908c2ecf20Sopenharmony_ci * be enough slots to hold all possible variants 918c2ecf20Sopenharmony_ci */ 928c2ecf20Sopenharmony_ci unsigned long slot[BUDDY_MASK + 1]; 938c2ecf20Sopenharmony_ci unsigned long pool; /* back link */ 948c2ecf20Sopenharmony_ci rwlock_t lock; 958c2ecf20Sopenharmony_ci}; 968c2ecf20Sopenharmony_ci#define HANDLE_FLAG_MASK (0x03) 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci/* 998c2ecf20Sopenharmony_ci * struct z3fold_header - z3fold page metadata occupying first chunks of each 1008c2ecf20Sopenharmony_ci * z3fold page, except for HEADLESS pages 1018c2ecf20Sopenharmony_ci * @buddy: links the z3fold page into the relevant list in the 1028c2ecf20Sopenharmony_ci * pool 1038c2ecf20Sopenharmony_ci * @page_lock: per-page lock 1048c2ecf20Sopenharmony_ci * @refcount: reference count for the z3fold page 1058c2ecf20Sopenharmony_ci * @work: work_struct for page layout optimization 1068c2ecf20Sopenharmony_ci * @slots: pointer to the structure holding buddy slots 1078c2ecf20Sopenharmony_ci * @pool: pointer to the containing pool 1088c2ecf20Sopenharmony_ci * @cpu: CPU which this page "belongs" to 1098c2ecf20Sopenharmony_ci * @first_chunks: the size of the first buddy in chunks, 0 if free 1108c2ecf20Sopenharmony_ci * @middle_chunks: the size of the middle buddy in chunks, 0 if free 1118c2ecf20Sopenharmony_ci * @last_chunks: the size of the last buddy in chunks, 0 if free 1128c2ecf20Sopenharmony_ci * @first_num: the starting number (for the first handle) 1138c2ecf20Sopenharmony_ci * @mapped_count: the number of objects currently mapped 1148c2ecf20Sopenharmony_ci */ 1158c2ecf20Sopenharmony_cistruct z3fold_header { 1168c2ecf20Sopenharmony_ci struct list_head buddy; 1178c2ecf20Sopenharmony_ci spinlock_t page_lock; 1188c2ecf20Sopenharmony_ci struct kref refcount; 1198c2ecf20Sopenharmony_ci struct work_struct work; 1208c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots; 1218c2ecf20Sopenharmony_ci struct z3fold_pool *pool; 1228c2ecf20Sopenharmony_ci short cpu; 1238c2ecf20Sopenharmony_ci unsigned short first_chunks; 1248c2ecf20Sopenharmony_ci unsigned short middle_chunks; 1258c2ecf20Sopenharmony_ci unsigned short last_chunks; 1268c2ecf20Sopenharmony_ci unsigned short start_middle; 1278c2ecf20Sopenharmony_ci unsigned short first_num:2; 1288c2ecf20Sopenharmony_ci unsigned short mapped_count:2; 1298c2ecf20Sopenharmony_ci unsigned short foreign_handles:2; 1308c2ecf20Sopenharmony_ci}; 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci/** 1338c2ecf20Sopenharmony_ci * struct z3fold_pool - stores metadata for each z3fold pool 1348c2ecf20Sopenharmony_ci * @name: pool name 1358c2ecf20Sopenharmony_ci * @lock: protects pool unbuddied/lru lists 1368c2ecf20Sopenharmony_ci * @stale_lock: protects pool stale page list 1378c2ecf20Sopenharmony_ci * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 1388c2ecf20Sopenharmony_ci * buddies; the list each z3fold page is added to depends on 1398c2ecf20Sopenharmony_ci * the size of its free region. 1408c2ecf20Sopenharmony_ci * @lru: list tracking the z3fold pages in LRU order by most recently 1418c2ecf20Sopenharmony_ci * added buddy. 1428c2ecf20Sopenharmony_ci * @stale: list of pages marked for freeing 1438c2ecf20Sopenharmony_ci * @pages_nr: number of z3fold pages in the pool. 1448c2ecf20Sopenharmony_ci * @c_handle: cache for z3fold_buddy_slots allocation 1458c2ecf20Sopenharmony_ci * @ops: pointer to a structure of user defined operations specified at 1468c2ecf20Sopenharmony_ci * pool creation time. 1478c2ecf20Sopenharmony_ci * @compact_wq: workqueue for page layout background optimization 1488c2ecf20Sopenharmony_ci * @release_wq: workqueue for safe page release 1498c2ecf20Sopenharmony_ci * @work: work_struct for safe page release 1508c2ecf20Sopenharmony_ci * @inode: inode for z3fold pseudo filesystem 1518c2ecf20Sopenharmony_ci * 1528c2ecf20Sopenharmony_ci * This structure is allocated at pool creation time and maintains metadata 1538c2ecf20Sopenharmony_ci * pertaining to a particular z3fold pool. 1548c2ecf20Sopenharmony_ci */ 1558c2ecf20Sopenharmony_cistruct z3fold_pool { 1568c2ecf20Sopenharmony_ci const char *name; 1578c2ecf20Sopenharmony_ci spinlock_t lock; 1588c2ecf20Sopenharmony_ci spinlock_t stale_lock; 1598c2ecf20Sopenharmony_ci struct list_head *unbuddied; 1608c2ecf20Sopenharmony_ci struct list_head lru; 1618c2ecf20Sopenharmony_ci struct list_head stale; 1628c2ecf20Sopenharmony_ci atomic64_t pages_nr; 1638c2ecf20Sopenharmony_ci struct kmem_cache *c_handle; 1648c2ecf20Sopenharmony_ci const struct z3fold_ops *ops; 1658c2ecf20Sopenharmony_ci struct zpool *zpool; 1668c2ecf20Sopenharmony_ci const struct zpool_ops *zpool_ops; 1678c2ecf20Sopenharmony_ci struct workqueue_struct *compact_wq; 1688c2ecf20Sopenharmony_ci struct workqueue_struct *release_wq; 1698c2ecf20Sopenharmony_ci struct work_struct work; 1708c2ecf20Sopenharmony_ci struct inode *inode; 1718c2ecf20Sopenharmony_ci}; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci/* 1748c2ecf20Sopenharmony_ci * Internal z3fold page flags 1758c2ecf20Sopenharmony_ci */ 1768c2ecf20Sopenharmony_cienum z3fold_page_flags { 1778c2ecf20Sopenharmony_ci PAGE_HEADLESS = 0, 1788c2ecf20Sopenharmony_ci MIDDLE_CHUNK_MAPPED, 1798c2ecf20Sopenharmony_ci NEEDS_COMPACTING, 1808c2ecf20Sopenharmony_ci PAGE_STALE, 1818c2ecf20Sopenharmony_ci PAGE_CLAIMED, /* by either reclaim or free */ 1828c2ecf20Sopenharmony_ci}; 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci/* 1858c2ecf20Sopenharmony_ci * handle flags, go under HANDLE_FLAG_MASK 1868c2ecf20Sopenharmony_ci */ 1878c2ecf20Sopenharmony_cienum z3fold_handle_flags { 1888c2ecf20Sopenharmony_ci HANDLES_NOFREE = 0, 1898c2ecf20Sopenharmony_ci}; 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci/* 1928c2ecf20Sopenharmony_ci * Forward declarations 1938c2ecf20Sopenharmony_ci */ 1948c2ecf20Sopenharmony_cistatic struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool); 1958c2ecf20Sopenharmony_cistatic void compact_page_work(struct work_struct *w); 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci/***************** 1988c2ecf20Sopenharmony_ci * Helpers 1998c2ecf20Sopenharmony_ci*****************/ 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci/* Converts an allocation size in bytes to size in z3fold chunks */ 2028c2ecf20Sopenharmony_cistatic int size_to_chunks(size_t size) 2038c2ecf20Sopenharmony_ci{ 2048c2ecf20Sopenharmony_ci return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 2058c2ecf20Sopenharmony_ci} 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci#define for_each_unbuddied_list(_iter, _begin) \ 2088c2ecf20Sopenharmony_ci for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_cistatic inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 2118c2ecf20Sopenharmony_ci gfp_t gfp) 2128c2ecf20Sopenharmony_ci{ 2138c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci slots = kmem_cache_zalloc(pool->c_handle, 2168c2ecf20Sopenharmony_ci (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE))); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci if (slots) { 2198c2ecf20Sopenharmony_ci /* It will be freed separately in free_handle(). */ 2208c2ecf20Sopenharmony_ci kmemleak_not_leak(slots); 2218c2ecf20Sopenharmony_ci slots->pool = (unsigned long)pool; 2228c2ecf20Sopenharmony_ci rwlock_init(&slots->lock); 2238c2ecf20Sopenharmony_ci } 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci return slots; 2268c2ecf20Sopenharmony_ci} 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_cistatic inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 2298c2ecf20Sopenharmony_ci{ 2308c2ecf20Sopenharmony_ci return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_cistatic inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 2348c2ecf20Sopenharmony_ci{ 2358c2ecf20Sopenharmony_ci return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 2368c2ecf20Sopenharmony_ci} 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci/* Lock a z3fold page */ 2398c2ecf20Sopenharmony_cistatic inline void z3fold_page_lock(struct z3fold_header *zhdr) 2408c2ecf20Sopenharmony_ci{ 2418c2ecf20Sopenharmony_ci spin_lock(&zhdr->page_lock); 2428c2ecf20Sopenharmony_ci} 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci/* Try to lock a z3fold page */ 2458c2ecf20Sopenharmony_cistatic inline int z3fold_page_trylock(struct z3fold_header *zhdr) 2468c2ecf20Sopenharmony_ci{ 2478c2ecf20Sopenharmony_ci return spin_trylock(&zhdr->page_lock); 2488c2ecf20Sopenharmony_ci} 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci/* Unlock a z3fold page */ 2518c2ecf20Sopenharmony_cistatic inline void z3fold_page_unlock(struct z3fold_header *zhdr) 2528c2ecf20Sopenharmony_ci{ 2538c2ecf20Sopenharmony_ci spin_unlock(&zhdr->page_lock); 2548c2ecf20Sopenharmony_ci} 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_cistatic inline struct z3fold_header *__get_z3fold_header(unsigned long handle, 2588c2ecf20Sopenharmony_ci bool lock) 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots; 2618c2ecf20Sopenharmony_ci struct z3fold_header *zhdr; 2628c2ecf20Sopenharmony_ci int locked = 0; 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci if (!(handle & (1 << PAGE_HEADLESS))) { 2658c2ecf20Sopenharmony_ci slots = handle_to_slots(handle); 2668c2ecf20Sopenharmony_ci do { 2678c2ecf20Sopenharmony_ci unsigned long addr; 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci read_lock(&slots->lock); 2708c2ecf20Sopenharmony_ci addr = *(unsigned long *)handle; 2718c2ecf20Sopenharmony_ci zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 2728c2ecf20Sopenharmony_ci if (lock) 2738c2ecf20Sopenharmony_ci locked = z3fold_page_trylock(zhdr); 2748c2ecf20Sopenharmony_ci read_unlock(&slots->lock); 2758c2ecf20Sopenharmony_ci if (locked) 2768c2ecf20Sopenharmony_ci break; 2778c2ecf20Sopenharmony_ci cpu_relax(); 2788c2ecf20Sopenharmony_ci } while (lock); 2798c2ecf20Sopenharmony_ci } else { 2808c2ecf20Sopenharmony_ci zhdr = (struct z3fold_header *)(handle & PAGE_MASK); 2818c2ecf20Sopenharmony_ci } 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci return zhdr; 2848c2ecf20Sopenharmony_ci} 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci/* Returns the z3fold page where a given handle is stored */ 2878c2ecf20Sopenharmony_cistatic inline struct z3fold_header *handle_to_z3fold_header(unsigned long h) 2888c2ecf20Sopenharmony_ci{ 2898c2ecf20Sopenharmony_ci return __get_z3fold_header(h, false); 2908c2ecf20Sopenharmony_ci} 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci/* return locked z3fold page if it's not headless */ 2938c2ecf20Sopenharmony_cistatic inline struct z3fold_header *get_z3fold_header(unsigned long h) 2948c2ecf20Sopenharmony_ci{ 2958c2ecf20Sopenharmony_ci return __get_z3fold_header(h, true); 2968c2ecf20Sopenharmony_ci} 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_cistatic inline void put_z3fold_header(struct z3fold_header *zhdr) 2998c2ecf20Sopenharmony_ci{ 3008c2ecf20Sopenharmony_ci struct page *page = virt_to_page(zhdr); 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci if (!test_bit(PAGE_HEADLESS, &page->private)) 3038c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 3048c2ecf20Sopenharmony_ci} 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_cistatic inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) 3078c2ecf20Sopenharmony_ci{ 3088c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots; 3098c2ecf20Sopenharmony_ci int i; 3108c2ecf20Sopenharmony_ci bool is_free; 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci if (handle & (1 << PAGE_HEADLESS)) 3138c2ecf20Sopenharmony_ci return; 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci if (WARN_ON(*(unsigned long *)handle == 0)) 3168c2ecf20Sopenharmony_ci return; 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci slots = handle_to_slots(handle); 3198c2ecf20Sopenharmony_ci write_lock(&slots->lock); 3208c2ecf20Sopenharmony_ci *(unsigned long *)handle = 0; 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci if (test_bit(HANDLES_NOFREE, &slots->pool)) { 3238c2ecf20Sopenharmony_ci write_unlock(&slots->lock); 3248c2ecf20Sopenharmony_ci return; /* simple case, nothing else to do */ 3258c2ecf20Sopenharmony_ci } 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci if (zhdr->slots != slots) 3288c2ecf20Sopenharmony_ci zhdr->foreign_handles--; 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci is_free = true; 3318c2ecf20Sopenharmony_ci for (i = 0; i <= BUDDY_MASK; i++) { 3328c2ecf20Sopenharmony_ci if (slots->slot[i]) { 3338c2ecf20Sopenharmony_ci is_free = false; 3348c2ecf20Sopenharmony_ci break; 3358c2ecf20Sopenharmony_ci } 3368c2ecf20Sopenharmony_ci } 3378c2ecf20Sopenharmony_ci write_unlock(&slots->lock); 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci if (is_free) { 3408c2ecf20Sopenharmony_ci struct z3fold_pool *pool = slots_to_pool(slots); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci if (zhdr->slots == slots) 3438c2ecf20Sopenharmony_ci zhdr->slots = NULL; 3448c2ecf20Sopenharmony_ci kmem_cache_free(pool->c_handle, slots); 3458c2ecf20Sopenharmony_ci } 3468c2ecf20Sopenharmony_ci} 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_cistatic int z3fold_init_fs_context(struct fs_context *fc) 3498c2ecf20Sopenharmony_ci{ 3508c2ecf20Sopenharmony_ci return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; 3518c2ecf20Sopenharmony_ci} 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_cistatic struct file_system_type z3fold_fs = { 3548c2ecf20Sopenharmony_ci .name = "z3fold", 3558c2ecf20Sopenharmony_ci .init_fs_context = z3fold_init_fs_context, 3568c2ecf20Sopenharmony_ci .kill_sb = kill_anon_super, 3578c2ecf20Sopenharmony_ci}; 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_cistatic struct vfsmount *z3fold_mnt; 3608c2ecf20Sopenharmony_cistatic int z3fold_mount(void) 3618c2ecf20Sopenharmony_ci{ 3628c2ecf20Sopenharmony_ci int ret = 0; 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci z3fold_mnt = kern_mount(&z3fold_fs); 3658c2ecf20Sopenharmony_ci if (IS_ERR(z3fold_mnt)) 3668c2ecf20Sopenharmony_ci ret = PTR_ERR(z3fold_mnt); 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci return ret; 3698c2ecf20Sopenharmony_ci} 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_cistatic void z3fold_unmount(void) 3728c2ecf20Sopenharmony_ci{ 3738c2ecf20Sopenharmony_ci kern_unmount(z3fold_mnt); 3748c2ecf20Sopenharmony_ci} 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_cistatic const struct address_space_operations z3fold_aops; 3778c2ecf20Sopenharmony_cistatic int z3fold_register_migration(struct z3fold_pool *pool) 3788c2ecf20Sopenharmony_ci{ 3798c2ecf20Sopenharmony_ci pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb); 3808c2ecf20Sopenharmony_ci if (IS_ERR(pool->inode)) { 3818c2ecf20Sopenharmony_ci pool->inode = NULL; 3828c2ecf20Sopenharmony_ci return 1; 3838c2ecf20Sopenharmony_ci } 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci pool->inode->i_mapping->private_data = pool; 3868c2ecf20Sopenharmony_ci pool->inode->i_mapping->a_ops = &z3fold_aops; 3878c2ecf20Sopenharmony_ci return 0; 3888c2ecf20Sopenharmony_ci} 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_cistatic void z3fold_unregister_migration(struct z3fold_pool *pool) 3918c2ecf20Sopenharmony_ci{ 3928c2ecf20Sopenharmony_ci if (pool->inode) 3938c2ecf20Sopenharmony_ci iput(pool->inode); 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci/* Initializes the z3fold header of a newly allocated z3fold page */ 3978c2ecf20Sopenharmony_cistatic struct z3fold_header *init_z3fold_page(struct page *page, bool headless, 3988c2ecf20Sopenharmony_ci struct z3fold_pool *pool, gfp_t gfp) 3998c2ecf20Sopenharmony_ci{ 4008c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = page_address(page); 4018c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots; 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&page->lru); 4048c2ecf20Sopenharmony_ci clear_bit(PAGE_HEADLESS, &page->private); 4058c2ecf20Sopenharmony_ci clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 4068c2ecf20Sopenharmony_ci clear_bit(NEEDS_COMPACTING, &page->private); 4078c2ecf20Sopenharmony_ci clear_bit(PAGE_STALE, &page->private); 4088c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 4098c2ecf20Sopenharmony_ci if (headless) 4108c2ecf20Sopenharmony_ci return zhdr; 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci slots = alloc_slots(pool, gfp); 4138c2ecf20Sopenharmony_ci if (!slots) 4148c2ecf20Sopenharmony_ci return NULL; 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci spin_lock_init(&zhdr->page_lock); 4178c2ecf20Sopenharmony_ci kref_init(&zhdr->refcount); 4188c2ecf20Sopenharmony_ci zhdr->first_chunks = 0; 4198c2ecf20Sopenharmony_ci zhdr->middle_chunks = 0; 4208c2ecf20Sopenharmony_ci zhdr->last_chunks = 0; 4218c2ecf20Sopenharmony_ci zhdr->first_num = 0; 4228c2ecf20Sopenharmony_ci zhdr->start_middle = 0; 4238c2ecf20Sopenharmony_ci zhdr->cpu = -1; 4248c2ecf20Sopenharmony_ci zhdr->foreign_handles = 0; 4258c2ecf20Sopenharmony_ci zhdr->mapped_count = 0; 4268c2ecf20Sopenharmony_ci zhdr->slots = slots; 4278c2ecf20Sopenharmony_ci zhdr->pool = pool; 4288c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&zhdr->buddy); 4298c2ecf20Sopenharmony_ci INIT_WORK(&zhdr->work, compact_page_work); 4308c2ecf20Sopenharmony_ci return zhdr; 4318c2ecf20Sopenharmony_ci} 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci/* Resets the struct page fields and frees the page */ 4348c2ecf20Sopenharmony_cistatic void free_z3fold_page(struct page *page, bool headless) 4358c2ecf20Sopenharmony_ci{ 4368c2ecf20Sopenharmony_ci if (!headless) { 4378c2ecf20Sopenharmony_ci lock_page(page); 4388c2ecf20Sopenharmony_ci __ClearPageMovable(page); 4398c2ecf20Sopenharmony_ci unlock_page(page); 4408c2ecf20Sopenharmony_ci } 4418c2ecf20Sopenharmony_ci ClearPagePrivate(page); 4428c2ecf20Sopenharmony_ci __free_page(page); 4438c2ecf20Sopenharmony_ci} 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci/* Helper function to build the index */ 4468c2ecf20Sopenharmony_cistatic inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 4478c2ecf20Sopenharmony_ci{ 4488c2ecf20Sopenharmony_ci return (bud + zhdr->first_num) & BUDDY_MASK; 4498c2ecf20Sopenharmony_ci} 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci/* 4528c2ecf20Sopenharmony_ci * Encodes the handle of a particular buddy within a z3fold page 4538c2ecf20Sopenharmony_ci * Pool lock should be held as this function accesses first_num 4548c2ecf20Sopenharmony_ci */ 4558c2ecf20Sopenharmony_cistatic unsigned long __encode_handle(struct z3fold_header *zhdr, 4568c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots, 4578c2ecf20Sopenharmony_ci enum buddy bud) 4588c2ecf20Sopenharmony_ci{ 4598c2ecf20Sopenharmony_ci unsigned long h = (unsigned long)zhdr; 4608c2ecf20Sopenharmony_ci int idx = 0; 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci /* 4638c2ecf20Sopenharmony_ci * For a headless page, its handle is its pointer with the extra 4648c2ecf20Sopenharmony_ci * PAGE_HEADLESS bit set 4658c2ecf20Sopenharmony_ci */ 4668c2ecf20Sopenharmony_ci if (bud == HEADLESS) 4678c2ecf20Sopenharmony_ci return h | (1 << PAGE_HEADLESS); 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci /* otherwise, return pointer to encoded handle */ 4708c2ecf20Sopenharmony_ci idx = __idx(zhdr, bud); 4718c2ecf20Sopenharmony_ci h += idx; 4728c2ecf20Sopenharmony_ci if (bud == LAST) 4738c2ecf20Sopenharmony_ci h |= (zhdr->last_chunks << BUDDY_SHIFT); 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci write_lock(&slots->lock); 4768c2ecf20Sopenharmony_ci slots->slot[idx] = h; 4778c2ecf20Sopenharmony_ci write_unlock(&slots->lock); 4788c2ecf20Sopenharmony_ci return (unsigned long)&slots->slot[idx]; 4798c2ecf20Sopenharmony_ci} 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_cistatic unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 4828c2ecf20Sopenharmony_ci{ 4838c2ecf20Sopenharmony_ci return __encode_handle(zhdr, zhdr->slots, bud); 4848c2ecf20Sopenharmony_ci} 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_ci/* only for LAST bud, returns zero otherwise */ 4878c2ecf20Sopenharmony_cistatic unsigned short handle_to_chunks(unsigned long handle) 4888c2ecf20Sopenharmony_ci{ 4898c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots = handle_to_slots(handle); 4908c2ecf20Sopenharmony_ci unsigned long addr; 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci read_lock(&slots->lock); 4938c2ecf20Sopenharmony_ci addr = *(unsigned long *)handle; 4948c2ecf20Sopenharmony_ci read_unlock(&slots->lock); 4958c2ecf20Sopenharmony_ci return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 4968c2ecf20Sopenharmony_ci} 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci/* 4998c2ecf20Sopenharmony_ci * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 5008c2ecf20Sopenharmony_ci * but that doesn't matter. because the masking will result in the 5018c2ecf20Sopenharmony_ci * correct buddy number. 5028c2ecf20Sopenharmony_ci */ 5038c2ecf20Sopenharmony_cistatic enum buddy handle_to_buddy(unsigned long handle) 5048c2ecf20Sopenharmony_ci{ 5058c2ecf20Sopenharmony_ci struct z3fold_header *zhdr; 5068c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots = handle_to_slots(handle); 5078c2ecf20Sopenharmony_ci unsigned long addr; 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci read_lock(&slots->lock); 5108c2ecf20Sopenharmony_ci WARN_ON(handle & (1 << PAGE_HEADLESS)); 5118c2ecf20Sopenharmony_ci addr = *(unsigned long *)handle; 5128c2ecf20Sopenharmony_ci read_unlock(&slots->lock); 5138c2ecf20Sopenharmony_ci zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 5148c2ecf20Sopenharmony_ci return (addr - zhdr->first_num) & BUDDY_MASK; 5158c2ecf20Sopenharmony_ci} 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_cistatic inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 5188c2ecf20Sopenharmony_ci{ 5198c2ecf20Sopenharmony_ci return zhdr->pool; 5208c2ecf20Sopenharmony_ci} 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_cistatic void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 5238c2ecf20Sopenharmony_ci{ 5248c2ecf20Sopenharmony_ci struct page *page = virt_to_page(zhdr); 5258c2ecf20Sopenharmony_ci struct z3fold_pool *pool = zhdr_to_pool(zhdr); 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci WARN_ON(!list_empty(&zhdr->buddy)); 5288c2ecf20Sopenharmony_ci set_bit(PAGE_STALE, &page->private); 5298c2ecf20Sopenharmony_ci clear_bit(NEEDS_COMPACTING, &page->private); 5308c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 5318c2ecf20Sopenharmony_ci if (!list_empty(&page->lru)) 5328c2ecf20Sopenharmony_ci list_del_init(&page->lru); 5338c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci if (locked) 5368c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci spin_lock(&pool->stale_lock); 5398c2ecf20Sopenharmony_ci list_add(&zhdr->buddy, &pool->stale); 5408c2ecf20Sopenharmony_ci queue_work(pool->release_wq, &pool->work); 5418c2ecf20Sopenharmony_ci spin_unlock(&pool->stale_lock); 5428c2ecf20Sopenharmony_ci} 5438c2ecf20Sopenharmony_ci 5448c2ecf20Sopenharmony_cistatic void __attribute__((__unused__)) 5458c2ecf20Sopenharmony_ci release_z3fold_page(struct kref *ref) 5468c2ecf20Sopenharmony_ci{ 5478c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 5488c2ecf20Sopenharmony_ci refcount); 5498c2ecf20Sopenharmony_ci __release_z3fold_page(zhdr, false); 5508c2ecf20Sopenharmony_ci} 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_cistatic void release_z3fold_page_locked(struct kref *ref) 5538c2ecf20Sopenharmony_ci{ 5548c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 5558c2ecf20Sopenharmony_ci refcount); 5568c2ecf20Sopenharmony_ci WARN_ON(z3fold_page_trylock(zhdr)); 5578c2ecf20Sopenharmony_ci __release_z3fold_page(zhdr, true); 5588c2ecf20Sopenharmony_ci} 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_cistatic void release_z3fold_page_locked_list(struct kref *ref) 5618c2ecf20Sopenharmony_ci{ 5628c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 5638c2ecf20Sopenharmony_ci refcount); 5648c2ecf20Sopenharmony_ci struct z3fold_pool *pool = zhdr_to_pool(zhdr); 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 5678c2ecf20Sopenharmony_ci list_del_init(&zhdr->buddy); 5688c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci WARN_ON(z3fold_page_trylock(zhdr)); 5718c2ecf20Sopenharmony_ci __release_z3fold_page(zhdr, true); 5728c2ecf20Sopenharmony_ci} 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_cistatic void free_pages_work(struct work_struct *w) 5758c2ecf20Sopenharmony_ci{ 5768c2ecf20Sopenharmony_ci struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci spin_lock(&pool->stale_lock); 5798c2ecf20Sopenharmony_ci while (!list_empty(&pool->stale)) { 5808c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = list_first_entry(&pool->stale, 5818c2ecf20Sopenharmony_ci struct z3fold_header, buddy); 5828c2ecf20Sopenharmony_ci struct page *page = virt_to_page(zhdr); 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci list_del(&zhdr->buddy); 5858c2ecf20Sopenharmony_ci if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 5868c2ecf20Sopenharmony_ci continue; 5878c2ecf20Sopenharmony_ci spin_unlock(&pool->stale_lock); 5888c2ecf20Sopenharmony_ci cancel_work_sync(&zhdr->work); 5898c2ecf20Sopenharmony_ci free_z3fold_page(page, false); 5908c2ecf20Sopenharmony_ci cond_resched(); 5918c2ecf20Sopenharmony_ci spin_lock(&pool->stale_lock); 5928c2ecf20Sopenharmony_ci } 5938c2ecf20Sopenharmony_ci spin_unlock(&pool->stale_lock); 5948c2ecf20Sopenharmony_ci} 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci/* 5978c2ecf20Sopenharmony_ci * Returns the number of free chunks in a z3fold page. 5988c2ecf20Sopenharmony_ci * NB: can't be used with HEADLESS pages. 5998c2ecf20Sopenharmony_ci */ 6008c2ecf20Sopenharmony_cistatic int num_free_chunks(struct z3fold_header *zhdr) 6018c2ecf20Sopenharmony_ci{ 6028c2ecf20Sopenharmony_ci int nfree; 6038c2ecf20Sopenharmony_ci /* 6048c2ecf20Sopenharmony_ci * If there is a middle object, pick up the bigger free space 6058c2ecf20Sopenharmony_ci * either before or after it. Otherwise just subtract the number 6068c2ecf20Sopenharmony_ci * of chunks occupied by the first and the last objects. 6078c2ecf20Sopenharmony_ci */ 6088c2ecf20Sopenharmony_ci if (zhdr->middle_chunks != 0) { 6098c2ecf20Sopenharmony_ci int nfree_before = zhdr->first_chunks ? 6108c2ecf20Sopenharmony_ci 0 : zhdr->start_middle - ZHDR_CHUNKS; 6118c2ecf20Sopenharmony_ci int nfree_after = zhdr->last_chunks ? 6128c2ecf20Sopenharmony_ci 0 : TOTAL_CHUNKS - 6138c2ecf20Sopenharmony_ci (zhdr->start_middle + zhdr->middle_chunks); 6148c2ecf20Sopenharmony_ci nfree = max(nfree_before, nfree_after); 6158c2ecf20Sopenharmony_ci } else 6168c2ecf20Sopenharmony_ci nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 6178c2ecf20Sopenharmony_ci return nfree; 6188c2ecf20Sopenharmony_ci} 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci/* Add to the appropriate unbuddied list */ 6218c2ecf20Sopenharmony_cistatic inline void add_to_unbuddied(struct z3fold_pool *pool, 6228c2ecf20Sopenharmony_ci struct z3fold_header *zhdr) 6238c2ecf20Sopenharmony_ci{ 6248c2ecf20Sopenharmony_ci if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 6258c2ecf20Sopenharmony_ci zhdr->middle_chunks == 0) { 6268c2ecf20Sopenharmony_ci struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied); 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci int freechunks = num_free_chunks(zhdr); 6298c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 6308c2ecf20Sopenharmony_ci list_add(&zhdr->buddy, &unbuddied[freechunks]); 6318c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 6328c2ecf20Sopenharmony_ci zhdr->cpu = smp_processor_id(); 6338c2ecf20Sopenharmony_ci put_cpu_ptr(pool->unbuddied); 6348c2ecf20Sopenharmony_ci } 6358c2ecf20Sopenharmony_ci} 6368c2ecf20Sopenharmony_ci 6378c2ecf20Sopenharmony_cistatic inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks) 6388c2ecf20Sopenharmony_ci{ 6398c2ecf20Sopenharmony_ci enum buddy bud = HEADLESS; 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci if (zhdr->middle_chunks) { 6428c2ecf20Sopenharmony_ci if (!zhdr->first_chunks && 6438c2ecf20Sopenharmony_ci chunks <= zhdr->start_middle - ZHDR_CHUNKS) 6448c2ecf20Sopenharmony_ci bud = FIRST; 6458c2ecf20Sopenharmony_ci else if (!zhdr->last_chunks) 6468c2ecf20Sopenharmony_ci bud = LAST; 6478c2ecf20Sopenharmony_ci } else { 6488c2ecf20Sopenharmony_ci if (!zhdr->first_chunks) 6498c2ecf20Sopenharmony_ci bud = FIRST; 6508c2ecf20Sopenharmony_ci else if (!zhdr->last_chunks) 6518c2ecf20Sopenharmony_ci bud = LAST; 6528c2ecf20Sopenharmony_ci else 6538c2ecf20Sopenharmony_ci bud = MIDDLE; 6548c2ecf20Sopenharmony_ci } 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci return bud; 6578c2ecf20Sopenharmony_ci} 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_cistatic inline void *mchunk_memmove(struct z3fold_header *zhdr, 6608c2ecf20Sopenharmony_ci unsigned short dst_chunk) 6618c2ecf20Sopenharmony_ci{ 6628c2ecf20Sopenharmony_ci void *beg = zhdr; 6638c2ecf20Sopenharmony_ci return memmove(beg + (dst_chunk << CHUNK_SHIFT), 6648c2ecf20Sopenharmony_ci beg + (zhdr->start_middle << CHUNK_SHIFT), 6658c2ecf20Sopenharmony_ci zhdr->middle_chunks << CHUNK_SHIFT); 6668c2ecf20Sopenharmony_ci} 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_cistatic inline bool buddy_single(struct z3fold_header *zhdr) 6698c2ecf20Sopenharmony_ci{ 6708c2ecf20Sopenharmony_ci return !((zhdr->first_chunks && zhdr->middle_chunks) || 6718c2ecf20Sopenharmony_ci (zhdr->first_chunks && zhdr->last_chunks) || 6728c2ecf20Sopenharmony_ci (zhdr->middle_chunks && zhdr->last_chunks)); 6738c2ecf20Sopenharmony_ci} 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_cistatic struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) 6768c2ecf20Sopenharmony_ci{ 6778c2ecf20Sopenharmony_ci struct z3fold_pool *pool = zhdr_to_pool(zhdr); 6788c2ecf20Sopenharmony_ci void *p = zhdr; 6798c2ecf20Sopenharmony_ci unsigned long old_handle = 0; 6808c2ecf20Sopenharmony_ci size_t sz = 0; 6818c2ecf20Sopenharmony_ci struct z3fold_header *new_zhdr = NULL; 6828c2ecf20Sopenharmony_ci int first_idx = __idx(zhdr, FIRST); 6838c2ecf20Sopenharmony_ci int middle_idx = __idx(zhdr, MIDDLE); 6848c2ecf20Sopenharmony_ci int last_idx = __idx(zhdr, LAST); 6858c2ecf20Sopenharmony_ci unsigned short *moved_chunks = NULL; 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci /* 6888c2ecf20Sopenharmony_ci * No need to protect slots here -- all the slots are "local" and 6898c2ecf20Sopenharmony_ci * the page lock is already taken 6908c2ecf20Sopenharmony_ci */ 6918c2ecf20Sopenharmony_ci if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) { 6928c2ecf20Sopenharmony_ci p += ZHDR_SIZE_ALIGNED; 6938c2ecf20Sopenharmony_ci sz = zhdr->first_chunks << CHUNK_SHIFT; 6948c2ecf20Sopenharmony_ci old_handle = (unsigned long)&zhdr->slots->slot[first_idx]; 6958c2ecf20Sopenharmony_ci moved_chunks = &zhdr->first_chunks; 6968c2ecf20Sopenharmony_ci } else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) { 6978c2ecf20Sopenharmony_ci p += zhdr->start_middle << CHUNK_SHIFT; 6988c2ecf20Sopenharmony_ci sz = zhdr->middle_chunks << CHUNK_SHIFT; 6998c2ecf20Sopenharmony_ci old_handle = (unsigned long)&zhdr->slots->slot[middle_idx]; 7008c2ecf20Sopenharmony_ci moved_chunks = &zhdr->middle_chunks; 7018c2ecf20Sopenharmony_ci } else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) { 7028c2ecf20Sopenharmony_ci p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); 7038c2ecf20Sopenharmony_ci sz = zhdr->last_chunks << CHUNK_SHIFT; 7048c2ecf20Sopenharmony_ci old_handle = (unsigned long)&zhdr->slots->slot[last_idx]; 7058c2ecf20Sopenharmony_ci moved_chunks = &zhdr->last_chunks; 7068c2ecf20Sopenharmony_ci } 7078c2ecf20Sopenharmony_ci 7088c2ecf20Sopenharmony_ci if (sz > 0) { 7098c2ecf20Sopenharmony_ci enum buddy new_bud = HEADLESS; 7108c2ecf20Sopenharmony_ci short chunks = size_to_chunks(sz); 7118c2ecf20Sopenharmony_ci void *q; 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci new_zhdr = __z3fold_alloc(pool, sz, false); 7148c2ecf20Sopenharmony_ci if (!new_zhdr) 7158c2ecf20Sopenharmony_ci return NULL; 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci if (WARN_ON(new_zhdr == zhdr)) 7188c2ecf20Sopenharmony_ci goto out_fail; 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_ci new_bud = get_free_buddy(new_zhdr, chunks); 7218c2ecf20Sopenharmony_ci q = new_zhdr; 7228c2ecf20Sopenharmony_ci switch (new_bud) { 7238c2ecf20Sopenharmony_ci case FIRST: 7248c2ecf20Sopenharmony_ci new_zhdr->first_chunks = chunks; 7258c2ecf20Sopenharmony_ci q += ZHDR_SIZE_ALIGNED; 7268c2ecf20Sopenharmony_ci break; 7278c2ecf20Sopenharmony_ci case MIDDLE: 7288c2ecf20Sopenharmony_ci new_zhdr->middle_chunks = chunks; 7298c2ecf20Sopenharmony_ci new_zhdr->start_middle = 7308c2ecf20Sopenharmony_ci new_zhdr->first_chunks + ZHDR_CHUNKS; 7318c2ecf20Sopenharmony_ci q += new_zhdr->start_middle << CHUNK_SHIFT; 7328c2ecf20Sopenharmony_ci break; 7338c2ecf20Sopenharmony_ci case LAST: 7348c2ecf20Sopenharmony_ci new_zhdr->last_chunks = chunks; 7358c2ecf20Sopenharmony_ci q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT); 7368c2ecf20Sopenharmony_ci break; 7378c2ecf20Sopenharmony_ci default: 7388c2ecf20Sopenharmony_ci goto out_fail; 7398c2ecf20Sopenharmony_ci } 7408c2ecf20Sopenharmony_ci new_zhdr->foreign_handles++; 7418c2ecf20Sopenharmony_ci memcpy(q, p, sz); 7428c2ecf20Sopenharmony_ci write_lock(&zhdr->slots->lock); 7438c2ecf20Sopenharmony_ci *(unsigned long *)old_handle = (unsigned long)new_zhdr + 7448c2ecf20Sopenharmony_ci __idx(new_zhdr, new_bud); 7458c2ecf20Sopenharmony_ci if (new_bud == LAST) 7468c2ecf20Sopenharmony_ci *(unsigned long *)old_handle |= 7478c2ecf20Sopenharmony_ci (new_zhdr->last_chunks << BUDDY_SHIFT); 7488c2ecf20Sopenharmony_ci write_unlock(&zhdr->slots->lock); 7498c2ecf20Sopenharmony_ci add_to_unbuddied(pool, new_zhdr); 7508c2ecf20Sopenharmony_ci z3fold_page_unlock(new_zhdr); 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci *moved_chunks = 0; 7538c2ecf20Sopenharmony_ci } 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci return new_zhdr; 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ciout_fail: 7588c2ecf20Sopenharmony_ci if (new_zhdr) { 7598c2ecf20Sopenharmony_ci if (kref_put(&new_zhdr->refcount, release_z3fold_page_locked)) 7608c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 7618c2ecf20Sopenharmony_ci else { 7628c2ecf20Sopenharmony_ci add_to_unbuddied(pool, new_zhdr); 7638c2ecf20Sopenharmony_ci z3fold_page_unlock(new_zhdr); 7648c2ecf20Sopenharmony_ci } 7658c2ecf20Sopenharmony_ci } 7668c2ecf20Sopenharmony_ci return NULL; 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci} 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_ci#define BIG_CHUNK_GAP 3 7718c2ecf20Sopenharmony_ci/* Has to be called with lock held */ 7728c2ecf20Sopenharmony_cistatic int z3fold_compact_page(struct z3fold_header *zhdr) 7738c2ecf20Sopenharmony_ci{ 7748c2ecf20Sopenharmony_ci struct page *page = virt_to_page(zhdr); 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 7778c2ecf20Sopenharmony_ci return 0; /* can't move middle chunk, it's used */ 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci if (unlikely(PageIsolated(page))) 7808c2ecf20Sopenharmony_ci return 0; 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_ci if (zhdr->middle_chunks == 0) 7838c2ecf20Sopenharmony_ci return 0; /* nothing to compact */ 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 7868c2ecf20Sopenharmony_ci /* move to the beginning */ 7878c2ecf20Sopenharmony_ci mchunk_memmove(zhdr, ZHDR_CHUNKS); 7888c2ecf20Sopenharmony_ci zhdr->first_chunks = zhdr->middle_chunks; 7898c2ecf20Sopenharmony_ci zhdr->middle_chunks = 0; 7908c2ecf20Sopenharmony_ci zhdr->start_middle = 0; 7918c2ecf20Sopenharmony_ci zhdr->first_num++; 7928c2ecf20Sopenharmony_ci return 1; 7938c2ecf20Sopenharmony_ci } 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci /* 7968c2ecf20Sopenharmony_ci * moving data is expensive, so let's only do that if 7978c2ecf20Sopenharmony_ci * there's substantial gain (at least BIG_CHUNK_GAP chunks) 7988c2ecf20Sopenharmony_ci */ 7998c2ecf20Sopenharmony_ci if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 8008c2ecf20Sopenharmony_ci zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 8018c2ecf20Sopenharmony_ci BIG_CHUNK_GAP) { 8028c2ecf20Sopenharmony_ci mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 8038c2ecf20Sopenharmony_ci zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 8048c2ecf20Sopenharmony_ci return 1; 8058c2ecf20Sopenharmony_ci } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 8068c2ecf20Sopenharmony_ci TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 8078c2ecf20Sopenharmony_ci + zhdr->middle_chunks) >= 8088c2ecf20Sopenharmony_ci BIG_CHUNK_GAP) { 8098c2ecf20Sopenharmony_ci unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 8108c2ecf20Sopenharmony_ci zhdr->middle_chunks; 8118c2ecf20Sopenharmony_ci mchunk_memmove(zhdr, new_start); 8128c2ecf20Sopenharmony_ci zhdr->start_middle = new_start; 8138c2ecf20Sopenharmony_ci return 1; 8148c2ecf20Sopenharmony_ci } 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci return 0; 8178c2ecf20Sopenharmony_ci} 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_cistatic void do_compact_page(struct z3fold_header *zhdr, bool locked) 8208c2ecf20Sopenharmony_ci{ 8218c2ecf20Sopenharmony_ci struct z3fold_pool *pool = zhdr_to_pool(zhdr); 8228c2ecf20Sopenharmony_ci struct page *page; 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci page = virt_to_page(zhdr); 8258c2ecf20Sopenharmony_ci if (locked) 8268c2ecf20Sopenharmony_ci WARN_ON(z3fold_page_trylock(zhdr)); 8278c2ecf20Sopenharmony_ci else 8288c2ecf20Sopenharmony_ci z3fold_page_lock(zhdr); 8298c2ecf20Sopenharmony_ci if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 8308c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 8318c2ecf20Sopenharmony_ci return; 8328c2ecf20Sopenharmony_ci } 8338c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 8348c2ecf20Sopenharmony_ci list_del_init(&zhdr->buddy); 8358c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_ci if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 8388c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 8398c2ecf20Sopenharmony_ci return; 8408c2ecf20Sopenharmony_ci } 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci if (test_bit(PAGE_STALE, &page->private) || 8438c2ecf20Sopenharmony_ci test_and_set_bit(PAGE_CLAIMED, &page->private)) { 8448c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 8458c2ecf20Sopenharmony_ci return; 8468c2ecf20Sopenharmony_ci } 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci if (!zhdr->foreign_handles && buddy_single(zhdr) && 8498c2ecf20Sopenharmony_ci zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { 8508c2ecf20Sopenharmony_ci if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) 8518c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 8528c2ecf20Sopenharmony_ci else { 8538c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 8548c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 8558c2ecf20Sopenharmony_ci } 8568c2ecf20Sopenharmony_ci return; 8578c2ecf20Sopenharmony_ci } 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci z3fold_compact_page(zhdr); 8608c2ecf20Sopenharmony_ci add_to_unbuddied(pool, zhdr); 8618c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 8628c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 8638c2ecf20Sopenharmony_ci} 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_cistatic void compact_page_work(struct work_struct *w) 8668c2ecf20Sopenharmony_ci{ 8678c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 8688c2ecf20Sopenharmony_ci work); 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ci do_compact_page(zhdr, false); 8718c2ecf20Sopenharmony_ci} 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci/* returns _locked_ z3fold page header or NULL */ 8748c2ecf20Sopenharmony_cistatic inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 8758c2ecf20Sopenharmony_ci size_t size, bool can_sleep) 8768c2ecf20Sopenharmony_ci{ 8778c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = NULL; 8788c2ecf20Sopenharmony_ci struct page *page; 8798c2ecf20Sopenharmony_ci struct list_head *unbuddied; 8808c2ecf20Sopenharmony_ci int chunks = size_to_chunks(size), i; 8818c2ecf20Sopenharmony_ci 8828c2ecf20Sopenharmony_cilookup: 8838c2ecf20Sopenharmony_ci /* First, try to find an unbuddied z3fold page. */ 8848c2ecf20Sopenharmony_ci unbuddied = get_cpu_ptr(pool->unbuddied); 8858c2ecf20Sopenharmony_ci for_each_unbuddied_list(i, chunks) { 8868c2ecf20Sopenharmony_ci struct list_head *l = &unbuddied[i]; 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci zhdr = list_first_entry_or_null(READ_ONCE(l), 8898c2ecf20Sopenharmony_ci struct z3fold_header, buddy); 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci if (!zhdr) 8928c2ecf20Sopenharmony_ci continue; 8938c2ecf20Sopenharmony_ci 8948c2ecf20Sopenharmony_ci /* Re-check under lock. */ 8958c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 8968c2ecf20Sopenharmony_ci l = &unbuddied[i]; 8978c2ecf20Sopenharmony_ci if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 8988c2ecf20Sopenharmony_ci struct z3fold_header, buddy)) || 8998c2ecf20Sopenharmony_ci !z3fold_page_trylock(zhdr)) { 9008c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 9018c2ecf20Sopenharmony_ci zhdr = NULL; 9028c2ecf20Sopenharmony_ci put_cpu_ptr(pool->unbuddied); 9038c2ecf20Sopenharmony_ci if (can_sleep) 9048c2ecf20Sopenharmony_ci cond_resched(); 9058c2ecf20Sopenharmony_ci goto lookup; 9068c2ecf20Sopenharmony_ci } 9078c2ecf20Sopenharmony_ci list_del_init(&zhdr->buddy); 9088c2ecf20Sopenharmony_ci zhdr->cpu = -1; 9098c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 9108c2ecf20Sopenharmony_ci 9118c2ecf20Sopenharmony_ci page = virt_to_page(zhdr); 9128c2ecf20Sopenharmony_ci if (test_bit(NEEDS_COMPACTING, &page->private) || 9138c2ecf20Sopenharmony_ci test_bit(PAGE_CLAIMED, &page->private)) { 9148c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 9158c2ecf20Sopenharmony_ci zhdr = NULL; 9168c2ecf20Sopenharmony_ci put_cpu_ptr(pool->unbuddied); 9178c2ecf20Sopenharmony_ci if (can_sleep) 9188c2ecf20Sopenharmony_ci cond_resched(); 9198c2ecf20Sopenharmony_ci goto lookup; 9208c2ecf20Sopenharmony_ci } 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci /* 9238c2ecf20Sopenharmony_ci * this page could not be removed from its unbuddied 9248c2ecf20Sopenharmony_ci * list while pool lock was held, and then we've taken 9258c2ecf20Sopenharmony_ci * page lock so kref_put could not be called before 9268c2ecf20Sopenharmony_ci * we got here, so it's safe to just call kref_get() 9278c2ecf20Sopenharmony_ci */ 9288c2ecf20Sopenharmony_ci kref_get(&zhdr->refcount); 9298c2ecf20Sopenharmony_ci break; 9308c2ecf20Sopenharmony_ci } 9318c2ecf20Sopenharmony_ci put_cpu_ptr(pool->unbuddied); 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci if (!zhdr) { 9348c2ecf20Sopenharmony_ci int cpu; 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ci /* look for _exact_ match on other cpus' lists */ 9378c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 9388c2ecf20Sopenharmony_ci struct list_head *l; 9398c2ecf20Sopenharmony_ci 9408c2ecf20Sopenharmony_ci unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 9418c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 9428c2ecf20Sopenharmony_ci l = &unbuddied[chunks]; 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_ci zhdr = list_first_entry_or_null(READ_ONCE(l), 9458c2ecf20Sopenharmony_ci struct z3fold_header, buddy); 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ci if (!zhdr || !z3fold_page_trylock(zhdr)) { 9488c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 9498c2ecf20Sopenharmony_ci zhdr = NULL; 9508c2ecf20Sopenharmony_ci continue; 9518c2ecf20Sopenharmony_ci } 9528c2ecf20Sopenharmony_ci list_del_init(&zhdr->buddy); 9538c2ecf20Sopenharmony_ci zhdr->cpu = -1; 9548c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci page = virt_to_page(zhdr); 9578c2ecf20Sopenharmony_ci if (test_bit(NEEDS_COMPACTING, &page->private) || 9588c2ecf20Sopenharmony_ci test_bit(PAGE_CLAIMED, &page->private)) { 9598c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 9608c2ecf20Sopenharmony_ci zhdr = NULL; 9618c2ecf20Sopenharmony_ci if (can_sleep) 9628c2ecf20Sopenharmony_ci cond_resched(); 9638c2ecf20Sopenharmony_ci continue; 9648c2ecf20Sopenharmony_ci } 9658c2ecf20Sopenharmony_ci kref_get(&zhdr->refcount); 9668c2ecf20Sopenharmony_ci break; 9678c2ecf20Sopenharmony_ci } 9688c2ecf20Sopenharmony_ci } 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_ci if (zhdr && !zhdr->slots) 9718c2ecf20Sopenharmony_ci zhdr->slots = alloc_slots(pool, 9728c2ecf20Sopenharmony_ci can_sleep ? GFP_NOIO : GFP_ATOMIC); 9738c2ecf20Sopenharmony_ci return zhdr; 9748c2ecf20Sopenharmony_ci} 9758c2ecf20Sopenharmony_ci 9768c2ecf20Sopenharmony_ci/* 9778c2ecf20Sopenharmony_ci * API Functions 9788c2ecf20Sopenharmony_ci */ 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci/** 9818c2ecf20Sopenharmony_ci * z3fold_create_pool() - create a new z3fold pool 9828c2ecf20Sopenharmony_ci * @name: pool name 9838c2ecf20Sopenharmony_ci * @gfp: gfp flags when allocating the z3fold pool structure 9848c2ecf20Sopenharmony_ci * @ops: user-defined operations for the z3fold pool 9858c2ecf20Sopenharmony_ci * 9868c2ecf20Sopenharmony_ci * Return: pointer to the new z3fold pool or NULL if the metadata allocation 9878c2ecf20Sopenharmony_ci * failed. 9888c2ecf20Sopenharmony_ci */ 9898c2ecf20Sopenharmony_cistatic struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, 9908c2ecf20Sopenharmony_ci const struct z3fold_ops *ops) 9918c2ecf20Sopenharmony_ci{ 9928c2ecf20Sopenharmony_ci struct z3fold_pool *pool = NULL; 9938c2ecf20Sopenharmony_ci int i, cpu; 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci pool = kzalloc(sizeof(struct z3fold_pool), gfp); 9968c2ecf20Sopenharmony_ci if (!pool) 9978c2ecf20Sopenharmony_ci goto out; 9988c2ecf20Sopenharmony_ci pool->c_handle = kmem_cache_create("z3fold_handle", 9998c2ecf20Sopenharmony_ci sizeof(struct z3fold_buddy_slots), 10008c2ecf20Sopenharmony_ci SLOTS_ALIGN, 0, NULL); 10018c2ecf20Sopenharmony_ci if (!pool->c_handle) 10028c2ecf20Sopenharmony_ci goto out_c; 10038c2ecf20Sopenharmony_ci spin_lock_init(&pool->lock); 10048c2ecf20Sopenharmony_ci spin_lock_init(&pool->stale_lock); 10058c2ecf20Sopenharmony_ci pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); 10068c2ecf20Sopenharmony_ci if (!pool->unbuddied) 10078c2ecf20Sopenharmony_ci goto out_pool; 10088c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 10098c2ecf20Sopenharmony_ci struct list_head *unbuddied = 10108c2ecf20Sopenharmony_ci per_cpu_ptr(pool->unbuddied, cpu); 10118c2ecf20Sopenharmony_ci for_each_unbuddied_list(i, 0) 10128c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&unbuddied[i]); 10138c2ecf20Sopenharmony_ci } 10148c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&pool->lru); 10158c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&pool->stale); 10168c2ecf20Sopenharmony_ci atomic64_set(&pool->pages_nr, 0); 10178c2ecf20Sopenharmony_ci pool->name = name; 10188c2ecf20Sopenharmony_ci pool->compact_wq = create_singlethread_workqueue(pool->name); 10198c2ecf20Sopenharmony_ci if (!pool->compact_wq) 10208c2ecf20Sopenharmony_ci goto out_unbuddied; 10218c2ecf20Sopenharmony_ci pool->release_wq = create_singlethread_workqueue(pool->name); 10228c2ecf20Sopenharmony_ci if (!pool->release_wq) 10238c2ecf20Sopenharmony_ci goto out_wq; 10248c2ecf20Sopenharmony_ci if (z3fold_register_migration(pool)) 10258c2ecf20Sopenharmony_ci goto out_rwq; 10268c2ecf20Sopenharmony_ci INIT_WORK(&pool->work, free_pages_work); 10278c2ecf20Sopenharmony_ci pool->ops = ops; 10288c2ecf20Sopenharmony_ci return pool; 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_ciout_rwq: 10318c2ecf20Sopenharmony_ci destroy_workqueue(pool->release_wq); 10328c2ecf20Sopenharmony_ciout_wq: 10338c2ecf20Sopenharmony_ci destroy_workqueue(pool->compact_wq); 10348c2ecf20Sopenharmony_ciout_unbuddied: 10358c2ecf20Sopenharmony_ci free_percpu(pool->unbuddied); 10368c2ecf20Sopenharmony_ciout_pool: 10378c2ecf20Sopenharmony_ci kmem_cache_destroy(pool->c_handle); 10388c2ecf20Sopenharmony_ciout_c: 10398c2ecf20Sopenharmony_ci kfree(pool); 10408c2ecf20Sopenharmony_ciout: 10418c2ecf20Sopenharmony_ci return NULL; 10428c2ecf20Sopenharmony_ci} 10438c2ecf20Sopenharmony_ci 10448c2ecf20Sopenharmony_ci/** 10458c2ecf20Sopenharmony_ci * z3fold_destroy_pool() - destroys an existing z3fold pool 10468c2ecf20Sopenharmony_ci * @pool: the z3fold pool to be destroyed 10478c2ecf20Sopenharmony_ci * 10488c2ecf20Sopenharmony_ci * The pool should be emptied before this function is called. 10498c2ecf20Sopenharmony_ci */ 10508c2ecf20Sopenharmony_cistatic void z3fold_destroy_pool(struct z3fold_pool *pool) 10518c2ecf20Sopenharmony_ci{ 10528c2ecf20Sopenharmony_ci kmem_cache_destroy(pool->c_handle); 10538c2ecf20Sopenharmony_ci 10548c2ecf20Sopenharmony_ci /* 10558c2ecf20Sopenharmony_ci * We need to destroy pool->compact_wq before pool->release_wq, 10568c2ecf20Sopenharmony_ci * as any pending work on pool->compact_wq will call 10578c2ecf20Sopenharmony_ci * queue_work(pool->release_wq, &pool->work). 10588c2ecf20Sopenharmony_ci * 10598c2ecf20Sopenharmony_ci * There are still outstanding pages until both workqueues are drained, 10608c2ecf20Sopenharmony_ci * so we cannot unregister migration until then. 10618c2ecf20Sopenharmony_ci */ 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci destroy_workqueue(pool->compact_wq); 10648c2ecf20Sopenharmony_ci destroy_workqueue(pool->release_wq); 10658c2ecf20Sopenharmony_ci z3fold_unregister_migration(pool); 10668c2ecf20Sopenharmony_ci free_percpu(pool->unbuddied); 10678c2ecf20Sopenharmony_ci kfree(pool); 10688c2ecf20Sopenharmony_ci} 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_ci/** 10718c2ecf20Sopenharmony_ci * z3fold_alloc() - allocates a region of a given size 10728c2ecf20Sopenharmony_ci * @pool: z3fold pool from which to allocate 10738c2ecf20Sopenharmony_ci * @size: size in bytes of the desired allocation 10748c2ecf20Sopenharmony_ci * @gfp: gfp flags used if the pool needs to grow 10758c2ecf20Sopenharmony_ci * @handle: handle of the new allocation 10768c2ecf20Sopenharmony_ci * 10778c2ecf20Sopenharmony_ci * This function will attempt to find a free region in the pool large enough to 10788c2ecf20Sopenharmony_ci * satisfy the allocation request. A search of the unbuddied lists is 10798c2ecf20Sopenharmony_ci * performed first. If no suitable free region is found, then a new page is 10808c2ecf20Sopenharmony_ci * allocated and added to the pool to satisfy the request. 10818c2ecf20Sopenharmony_ci * 10828c2ecf20Sopenharmony_ci * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used 10838c2ecf20Sopenharmony_ci * as z3fold pool pages. 10848c2ecf20Sopenharmony_ci * 10858c2ecf20Sopenharmony_ci * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 10868c2ecf20Sopenharmony_ci * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 10878c2ecf20Sopenharmony_ci * a new page. 10888c2ecf20Sopenharmony_ci */ 10898c2ecf20Sopenharmony_cistatic int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 10908c2ecf20Sopenharmony_ci unsigned long *handle) 10918c2ecf20Sopenharmony_ci{ 10928c2ecf20Sopenharmony_ci int chunks = size_to_chunks(size); 10938c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = NULL; 10948c2ecf20Sopenharmony_ci struct page *page = NULL; 10958c2ecf20Sopenharmony_ci enum buddy bud; 10968c2ecf20Sopenharmony_ci bool can_sleep = gfpflags_allow_blocking(gfp); 10978c2ecf20Sopenharmony_ci 10988c2ecf20Sopenharmony_ci if (!size) 10998c2ecf20Sopenharmony_ci return -EINVAL; 11008c2ecf20Sopenharmony_ci 11018c2ecf20Sopenharmony_ci if (size > PAGE_SIZE) 11028c2ecf20Sopenharmony_ci return -ENOSPC; 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 11058c2ecf20Sopenharmony_ci bud = HEADLESS; 11068c2ecf20Sopenharmony_ci else { 11078c2ecf20Sopenharmony_ciretry: 11088c2ecf20Sopenharmony_ci zhdr = __z3fold_alloc(pool, size, can_sleep); 11098c2ecf20Sopenharmony_ci if (zhdr) { 11108c2ecf20Sopenharmony_ci bud = get_free_buddy(zhdr, chunks); 11118c2ecf20Sopenharmony_ci if (bud == HEADLESS) { 11128c2ecf20Sopenharmony_ci if (kref_put(&zhdr->refcount, 11138c2ecf20Sopenharmony_ci release_z3fold_page_locked)) 11148c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 11158c2ecf20Sopenharmony_ci else 11168c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 11178c2ecf20Sopenharmony_ci pr_err("No free chunks in unbuddied\n"); 11188c2ecf20Sopenharmony_ci WARN_ON(1); 11198c2ecf20Sopenharmony_ci goto retry; 11208c2ecf20Sopenharmony_ci } 11218c2ecf20Sopenharmony_ci page = virt_to_page(zhdr); 11228c2ecf20Sopenharmony_ci goto found; 11238c2ecf20Sopenharmony_ci } 11248c2ecf20Sopenharmony_ci bud = FIRST; 11258c2ecf20Sopenharmony_ci } 11268c2ecf20Sopenharmony_ci 11278c2ecf20Sopenharmony_ci page = NULL; 11288c2ecf20Sopenharmony_ci if (can_sleep) { 11298c2ecf20Sopenharmony_ci spin_lock(&pool->stale_lock); 11308c2ecf20Sopenharmony_ci zhdr = list_first_entry_or_null(&pool->stale, 11318c2ecf20Sopenharmony_ci struct z3fold_header, buddy); 11328c2ecf20Sopenharmony_ci /* 11338c2ecf20Sopenharmony_ci * Before allocating a page, let's see if we can take one from 11348c2ecf20Sopenharmony_ci * the stale pages list. cancel_work_sync() can sleep so we 11358c2ecf20Sopenharmony_ci * limit this case to the contexts where we can sleep 11368c2ecf20Sopenharmony_ci */ 11378c2ecf20Sopenharmony_ci if (zhdr) { 11388c2ecf20Sopenharmony_ci list_del(&zhdr->buddy); 11398c2ecf20Sopenharmony_ci spin_unlock(&pool->stale_lock); 11408c2ecf20Sopenharmony_ci cancel_work_sync(&zhdr->work); 11418c2ecf20Sopenharmony_ci page = virt_to_page(zhdr); 11428c2ecf20Sopenharmony_ci } else { 11438c2ecf20Sopenharmony_ci spin_unlock(&pool->stale_lock); 11448c2ecf20Sopenharmony_ci } 11458c2ecf20Sopenharmony_ci } 11468c2ecf20Sopenharmony_ci if (!page) 11478c2ecf20Sopenharmony_ci page = alloc_page(gfp); 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ci if (!page) 11508c2ecf20Sopenharmony_ci return -ENOMEM; 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); 11538c2ecf20Sopenharmony_ci if (!zhdr) { 11548c2ecf20Sopenharmony_ci __free_page(page); 11558c2ecf20Sopenharmony_ci return -ENOMEM; 11568c2ecf20Sopenharmony_ci } 11578c2ecf20Sopenharmony_ci atomic64_inc(&pool->pages_nr); 11588c2ecf20Sopenharmony_ci 11598c2ecf20Sopenharmony_ci if (bud == HEADLESS) { 11608c2ecf20Sopenharmony_ci set_bit(PAGE_HEADLESS, &page->private); 11618c2ecf20Sopenharmony_ci goto headless; 11628c2ecf20Sopenharmony_ci } 11638c2ecf20Sopenharmony_ci if (can_sleep) { 11648c2ecf20Sopenharmony_ci lock_page(page); 11658c2ecf20Sopenharmony_ci __SetPageMovable(page, pool->inode->i_mapping); 11668c2ecf20Sopenharmony_ci unlock_page(page); 11678c2ecf20Sopenharmony_ci } else { 11688c2ecf20Sopenharmony_ci if (trylock_page(page)) { 11698c2ecf20Sopenharmony_ci __SetPageMovable(page, pool->inode->i_mapping); 11708c2ecf20Sopenharmony_ci unlock_page(page); 11718c2ecf20Sopenharmony_ci } 11728c2ecf20Sopenharmony_ci } 11738c2ecf20Sopenharmony_ci z3fold_page_lock(zhdr); 11748c2ecf20Sopenharmony_ci 11758c2ecf20Sopenharmony_cifound: 11768c2ecf20Sopenharmony_ci if (bud == FIRST) 11778c2ecf20Sopenharmony_ci zhdr->first_chunks = chunks; 11788c2ecf20Sopenharmony_ci else if (bud == LAST) 11798c2ecf20Sopenharmony_ci zhdr->last_chunks = chunks; 11808c2ecf20Sopenharmony_ci else { 11818c2ecf20Sopenharmony_ci zhdr->middle_chunks = chunks; 11828c2ecf20Sopenharmony_ci zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 11838c2ecf20Sopenharmony_ci } 11848c2ecf20Sopenharmony_ci add_to_unbuddied(pool, zhdr); 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ciheadless: 11878c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 11888c2ecf20Sopenharmony_ci /* Add/move z3fold page to beginning of LRU */ 11898c2ecf20Sopenharmony_ci if (!list_empty(&page->lru)) 11908c2ecf20Sopenharmony_ci list_del(&page->lru); 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_ci list_add(&page->lru, &pool->lru); 11938c2ecf20Sopenharmony_ci 11948c2ecf20Sopenharmony_ci *handle = encode_handle(zhdr, bud); 11958c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 11968c2ecf20Sopenharmony_ci if (bud != HEADLESS) 11978c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 11988c2ecf20Sopenharmony_ci 11998c2ecf20Sopenharmony_ci return 0; 12008c2ecf20Sopenharmony_ci} 12018c2ecf20Sopenharmony_ci 12028c2ecf20Sopenharmony_ci/** 12038c2ecf20Sopenharmony_ci * z3fold_free() - frees the allocation associated with the given handle 12048c2ecf20Sopenharmony_ci * @pool: pool in which the allocation resided 12058c2ecf20Sopenharmony_ci * @handle: handle associated with the allocation returned by z3fold_alloc() 12068c2ecf20Sopenharmony_ci * 12078c2ecf20Sopenharmony_ci * In the case that the z3fold page in which the allocation resides is under 12088c2ecf20Sopenharmony_ci * reclaim, as indicated by the PG_reclaim flag being set, this function 12098c2ecf20Sopenharmony_ci * only sets the first|last_chunks to 0. The page is actually freed 12108c2ecf20Sopenharmony_ci * once both buddies are evicted (see z3fold_reclaim_page() below). 12118c2ecf20Sopenharmony_ci */ 12128c2ecf20Sopenharmony_cistatic void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 12138c2ecf20Sopenharmony_ci{ 12148c2ecf20Sopenharmony_ci struct z3fold_header *zhdr; 12158c2ecf20Sopenharmony_ci struct page *page; 12168c2ecf20Sopenharmony_ci enum buddy bud; 12178c2ecf20Sopenharmony_ci bool page_claimed; 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci zhdr = get_z3fold_header(handle); 12208c2ecf20Sopenharmony_ci page = virt_to_page(zhdr); 12218c2ecf20Sopenharmony_ci page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) { 12248c2ecf20Sopenharmony_ci /* if a headless page is under reclaim, just leave. 12258c2ecf20Sopenharmony_ci * NB: we use test_and_set_bit for a reason: if the bit 12268c2ecf20Sopenharmony_ci * has not been set before, we release this page 12278c2ecf20Sopenharmony_ci * immediately so we don't care about its value any more. 12288c2ecf20Sopenharmony_ci */ 12298c2ecf20Sopenharmony_ci if (!page_claimed) { 12308c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 12318c2ecf20Sopenharmony_ci list_del(&page->lru); 12328c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 12338c2ecf20Sopenharmony_ci put_z3fold_header(zhdr); 12348c2ecf20Sopenharmony_ci free_z3fold_page(page, true); 12358c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 12368c2ecf20Sopenharmony_ci } 12378c2ecf20Sopenharmony_ci return; 12388c2ecf20Sopenharmony_ci } 12398c2ecf20Sopenharmony_ci 12408c2ecf20Sopenharmony_ci /* Non-headless case */ 12418c2ecf20Sopenharmony_ci bud = handle_to_buddy(handle); 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ci switch (bud) { 12448c2ecf20Sopenharmony_ci case FIRST: 12458c2ecf20Sopenharmony_ci zhdr->first_chunks = 0; 12468c2ecf20Sopenharmony_ci break; 12478c2ecf20Sopenharmony_ci case MIDDLE: 12488c2ecf20Sopenharmony_ci zhdr->middle_chunks = 0; 12498c2ecf20Sopenharmony_ci break; 12508c2ecf20Sopenharmony_ci case LAST: 12518c2ecf20Sopenharmony_ci zhdr->last_chunks = 0; 12528c2ecf20Sopenharmony_ci break; 12538c2ecf20Sopenharmony_ci default: 12548c2ecf20Sopenharmony_ci pr_err("%s: unknown bud %d\n", __func__, bud); 12558c2ecf20Sopenharmony_ci WARN_ON(1); 12568c2ecf20Sopenharmony_ci put_z3fold_header(zhdr); 12578c2ecf20Sopenharmony_ci return; 12588c2ecf20Sopenharmony_ci } 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_ci if (!page_claimed) 12618c2ecf20Sopenharmony_ci free_handle(handle, zhdr); 12628c2ecf20Sopenharmony_ci if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { 12638c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 12648c2ecf20Sopenharmony_ci return; 12658c2ecf20Sopenharmony_ci } 12668c2ecf20Sopenharmony_ci if (page_claimed) { 12678c2ecf20Sopenharmony_ci /* the page has not been claimed by us */ 12688c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 12698c2ecf20Sopenharmony_ci return; 12708c2ecf20Sopenharmony_ci } 12718c2ecf20Sopenharmony_ci if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 12728c2ecf20Sopenharmony_ci put_z3fold_header(zhdr); 12738c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 12748c2ecf20Sopenharmony_ci return; 12758c2ecf20Sopenharmony_ci } 12768c2ecf20Sopenharmony_ci if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 12778c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 12788c2ecf20Sopenharmony_ci list_del_init(&zhdr->buddy); 12798c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 12808c2ecf20Sopenharmony_ci zhdr->cpu = -1; 12818c2ecf20Sopenharmony_ci kref_get(&zhdr->refcount); 12828c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 12838c2ecf20Sopenharmony_ci do_compact_page(zhdr, true); 12848c2ecf20Sopenharmony_ci return; 12858c2ecf20Sopenharmony_ci } 12868c2ecf20Sopenharmony_ci kref_get(&zhdr->refcount); 12878c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 12888c2ecf20Sopenharmony_ci queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 12898c2ecf20Sopenharmony_ci put_z3fold_header(zhdr); 12908c2ecf20Sopenharmony_ci} 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_ci/** 12938c2ecf20Sopenharmony_ci * z3fold_reclaim_page() - evicts allocations from a pool page and frees it 12948c2ecf20Sopenharmony_ci * @pool: pool from which a page will attempt to be evicted 12958c2ecf20Sopenharmony_ci * @retries: number of pages on the LRU list for which eviction will 12968c2ecf20Sopenharmony_ci * be attempted before failing 12978c2ecf20Sopenharmony_ci * 12988c2ecf20Sopenharmony_ci * z3fold reclaim is different from normal system reclaim in that it is done 12998c2ecf20Sopenharmony_ci * from the bottom, up. This is because only the bottom layer, z3fold, has 13008c2ecf20Sopenharmony_ci * information on how the allocations are organized within each z3fold page. 13018c2ecf20Sopenharmony_ci * This has the potential to create interesting locking situations between 13028c2ecf20Sopenharmony_ci * z3fold and the user, however. 13038c2ecf20Sopenharmony_ci * 13048c2ecf20Sopenharmony_ci * To avoid these, this is how z3fold_reclaim_page() should be called: 13058c2ecf20Sopenharmony_ci * 13068c2ecf20Sopenharmony_ci * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). 13078c2ecf20Sopenharmony_ci * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and 13088c2ecf20Sopenharmony_ci * call the user-defined eviction handler with the pool and handle as 13098c2ecf20Sopenharmony_ci * arguments. 13108c2ecf20Sopenharmony_ci * 13118c2ecf20Sopenharmony_ci * If the handle can not be evicted, the eviction handler should return 13128c2ecf20Sopenharmony_ci * non-zero. z3fold_reclaim_page() will add the z3fold page back to the 13138c2ecf20Sopenharmony_ci * appropriate list and try the next z3fold page on the LRU up to 13148c2ecf20Sopenharmony_ci * a user defined number of retries. 13158c2ecf20Sopenharmony_ci * 13168c2ecf20Sopenharmony_ci * If the handle is successfully evicted, the eviction handler should 13178c2ecf20Sopenharmony_ci * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() 13188c2ecf20Sopenharmony_ci * contains logic to delay freeing the page if the page is under reclaim, 13198c2ecf20Sopenharmony_ci * as indicated by the setting of the PG_reclaim flag on the underlying page. 13208c2ecf20Sopenharmony_ci * 13218c2ecf20Sopenharmony_ci * If all buddies in the z3fold page are successfully evicted, then the 13228c2ecf20Sopenharmony_ci * z3fold page can be freed. 13238c2ecf20Sopenharmony_ci * 13248c2ecf20Sopenharmony_ci * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are 13258c2ecf20Sopenharmony_ci * no pages to evict or an eviction handler is not registered, -EAGAIN if 13268c2ecf20Sopenharmony_ci * the retry limit was hit. 13278c2ecf20Sopenharmony_ci */ 13288c2ecf20Sopenharmony_cistatic int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) 13298c2ecf20Sopenharmony_ci{ 13308c2ecf20Sopenharmony_ci int i, ret = -1; 13318c2ecf20Sopenharmony_ci struct z3fold_header *zhdr = NULL; 13328c2ecf20Sopenharmony_ci struct page *page = NULL; 13338c2ecf20Sopenharmony_ci struct list_head *pos; 13348c2ecf20Sopenharmony_ci unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; 13358c2ecf20Sopenharmony_ci struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN))); 13368c2ecf20Sopenharmony_ci 13378c2ecf20Sopenharmony_ci rwlock_init(&slots.lock); 13388c2ecf20Sopenharmony_ci slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE); 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 13418c2ecf20Sopenharmony_ci if (!pool->ops || !pool->ops->evict || retries == 0) { 13428c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 13438c2ecf20Sopenharmony_ci return -EINVAL; 13448c2ecf20Sopenharmony_ci } 13458c2ecf20Sopenharmony_ci for (i = 0; i < retries; i++) { 13468c2ecf20Sopenharmony_ci if (list_empty(&pool->lru)) { 13478c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 13488c2ecf20Sopenharmony_ci return -EINVAL; 13498c2ecf20Sopenharmony_ci } 13508c2ecf20Sopenharmony_ci list_for_each_prev(pos, &pool->lru) { 13518c2ecf20Sopenharmony_ci page = list_entry(pos, struct page, lru); 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci zhdr = page_address(page); 13548c2ecf20Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) { 13558c2ecf20Sopenharmony_ci /* 13568c2ecf20Sopenharmony_ci * For non-headless pages, we wait to do this 13578c2ecf20Sopenharmony_ci * until we have the page lock to avoid racing 13588c2ecf20Sopenharmony_ci * with __z3fold_alloc(). Headless pages don't 13598c2ecf20Sopenharmony_ci * have a lock (and __z3fold_alloc() will never 13608c2ecf20Sopenharmony_ci * see them), but we still need to test and set 13618c2ecf20Sopenharmony_ci * PAGE_CLAIMED to avoid racing with 13628c2ecf20Sopenharmony_ci * z3fold_free(), so just do it now before 13638c2ecf20Sopenharmony_ci * leaving the loop. 13648c2ecf20Sopenharmony_ci */ 13658c2ecf20Sopenharmony_ci if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 13668c2ecf20Sopenharmony_ci continue; 13678c2ecf20Sopenharmony_ci 13688c2ecf20Sopenharmony_ci break; 13698c2ecf20Sopenharmony_ci } 13708c2ecf20Sopenharmony_ci 13718c2ecf20Sopenharmony_ci if (kref_get_unless_zero(&zhdr->refcount) == 0) { 13728c2ecf20Sopenharmony_ci zhdr = NULL; 13738c2ecf20Sopenharmony_ci break; 13748c2ecf20Sopenharmony_ci } 13758c2ecf20Sopenharmony_ci if (!z3fold_page_trylock(zhdr)) { 13768c2ecf20Sopenharmony_ci if (kref_put(&zhdr->refcount, 13778c2ecf20Sopenharmony_ci release_z3fold_page)) 13788c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 13798c2ecf20Sopenharmony_ci zhdr = NULL; 13808c2ecf20Sopenharmony_ci continue; /* can't evict at this point */ 13818c2ecf20Sopenharmony_ci } 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci /* test_and_set_bit is of course atomic, but we still 13848c2ecf20Sopenharmony_ci * need to do it under page lock, otherwise checking 13858c2ecf20Sopenharmony_ci * that bit in __z3fold_alloc wouldn't make sense 13868c2ecf20Sopenharmony_ci */ 13878c2ecf20Sopenharmony_ci if (zhdr->foreign_handles || 13888c2ecf20Sopenharmony_ci test_and_set_bit(PAGE_CLAIMED, &page->private)) { 13898c2ecf20Sopenharmony_ci if (kref_put(&zhdr->refcount, 13908c2ecf20Sopenharmony_ci release_z3fold_page_locked)) 13918c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 13928c2ecf20Sopenharmony_ci else 13938c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 13948c2ecf20Sopenharmony_ci zhdr = NULL; 13958c2ecf20Sopenharmony_ci continue; /* can't evict such page */ 13968c2ecf20Sopenharmony_ci } 13978c2ecf20Sopenharmony_ci list_del_init(&zhdr->buddy); 13988c2ecf20Sopenharmony_ci zhdr->cpu = -1; 13998c2ecf20Sopenharmony_ci break; 14008c2ecf20Sopenharmony_ci } 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_ci if (!zhdr) 14038c2ecf20Sopenharmony_ci break; 14048c2ecf20Sopenharmony_ci 14058c2ecf20Sopenharmony_ci list_del_init(&page->lru); 14068c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 14078c2ecf20Sopenharmony_ci 14088c2ecf20Sopenharmony_ci if (!test_bit(PAGE_HEADLESS, &page->private)) { 14098c2ecf20Sopenharmony_ci /* 14108c2ecf20Sopenharmony_ci * We need encode the handles before unlocking, and 14118c2ecf20Sopenharmony_ci * use our local slots structure because z3fold_free 14128c2ecf20Sopenharmony_ci * can zero out zhdr->slots and we can't do much 14138c2ecf20Sopenharmony_ci * about that 14148c2ecf20Sopenharmony_ci */ 14158c2ecf20Sopenharmony_ci first_handle = 0; 14168c2ecf20Sopenharmony_ci last_handle = 0; 14178c2ecf20Sopenharmony_ci middle_handle = 0; 14188c2ecf20Sopenharmony_ci memset(slots.slot, 0, sizeof(slots.slot)); 14198c2ecf20Sopenharmony_ci if (zhdr->first_chunks) 14208c2ecf20Sopenharmony_ci first_handle = __encode_handle(zhdr, &slots, 14218c2ecf20Sopenharmony_ci FIRST); 14228c2ecf20Sopenharmony_ci if (zhdr->middle_chunks) 14238c2ecf20Sopenharmony_ci middle_handle = __encode_handle(zhdr, &slots, 14248c2ecf20Sopenharmony_ci MIDDLE); 14258c2ecf20Sopenharmony_ci if (zhdr->last_chunks) 14268c2ecf20Sopenharmony_ci last_handle = __encode_handle(zhdr, &slots, 14278c2ecf20Sopenharmony_ci LAST); 14288c2ecf20Sopenharmony_ci /* 14298c2ecf20Sopenharmony_ci * it's safe to unlock here because we hold a 14308c2ecf20Sopenharmony_ci * reference to this page 14318c2ecf20Sopenharmony_ci */ 14328c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 14338c2ecf20Sopenharmony_ci } else { 14348c2ecf20Sopenharmony_ci first_handle = encode_handle(zhdr, HEADLESS); 14358c2ecf20Sopenharmony_ci last_handle = middle_handle = 0; 14368c2ecf20Sopenharmony_ci } 14378c2ecf20Sopenharmony_ci /* Issue the eviction callback(s) */ 14388c2ecf20Sopenharmony_ci if (middle_handle) { 14398c2ecf20Sopenharmony_ci ret = pool->ops->evict(pool, middle_handle); 14408c2ecf20Sopenharmony_ci if (ret) 14418c2ecf20Sopenharmony_ci goto next; 14428c2ecf20Sopenharmony_ci } 14438c2ecf20Sopenharmony_ci if (first_handle) { 14448c2ecf20Sopenharmony_ci ret = pool->ops->evict(pool, first_handle); 14458c2ecf20Sopenharmony_ci if (ret) 14468c2ecf20Sopenharmony_ci goto next; 14478c2ecf20Sopenharmony_ci } 14488c2ecf20Sopenharmony_ci if (last_handle) { 14498c2ecf20Sopenharmony_ci ret = pool->ops->evict(pool, last_handle); 14508c2ecf20Sopenharmony_ci if (ret) 14518c2ecf20Sopenharmony_ci goto next; 14528c2ecf20Sopenharmony_ci } 14538c2ecf20Sopenharmony_cinext: 14548c2ecf20Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) { 14558c2ecf20Sopenharmony_ci if (ret == 0) { 14568c2ecf20Sopenharmony_ci free_z3fold_page(page, true); 14578c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 14588c2ecf20Sopenharmony_ci return 0; 14598c2ecf20Sopenharmony_ci } 14608c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 14618c2ecf20Sopenharmony_ci list_add(&page->lru, &pool->lru); 14628c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 14638c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 14648c2ecf20Sopenharmony_ci } else { 14658c2ecf20Sopenharmony_ci struct z3fold_buddy_slots *slots = zhdr->slots; 14668c2ecf20Sopenharmony_ci z3fold_page_lock(zhdr); 14678c2ecf20Sopenharmony_ci if (kref_put(&zhdr->refcount, 14688c2ecf20Sopenharmony_ci release_z3fold_page_locked)) { 14698c2ecf20Sopenharmony_ci kmem_cache_free(pool->c_handle, slots); 14708c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 14718c2ecf20Sopenharmony_ci return 0; 14728c2ecf20Sopenharmony_ci } 14738c2ecf20Sopenharmony_ci /* 14748c2ecf20Sopenharmony_ci * if we are here, the page is still not completely 14758c2ecf20Sopenharmony_ci * free. Take the global pool lock then to be able 14768c2ecf20Sopenharmony_ci * to add it back to the lru list 14778c2ecf20Sopenharmony_ci */ 14788c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 14798c2ecf20Sopenharmony_ci list_add(&page->lru, &pool->lru); 14808c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 14818c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 14828c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 14838c2ecf20Sopenharmony_ci } 14848c2ecf20Sopenharmony_ci 14858c2ecf20Sopenharmony_ci /* We started off locked to we need to lock the pool back */ 14868c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 14878c2ecf20Sopenharmony_ci } 14888c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 14898c2ecf20Sopenharmony_ci return -EAGAIN; 14908c2ecf20Sopenharmony_ci} 14918c2ecf20Sopenharmony_ci 14928c2ecf20Sopenharmony_ci/** 14938c2ecf20Sopenharmony_ci * z3fold_map() - maps the allocation associated with the given handle 14948c2ecf20Sopenharmony_ci * @pool: pool in which the allocation resides 14958c2ecf20Sopenharmony_ci * @handle: handle associated with the allocation to be mapped 14968c2ecf20Sopenharmony_ci * 14978c2ecf20Sopenharmony_ci * Extracts the buddy number from handle and constructs the pointer to the 14988c2ecf20Sopenharmony_ci * correct starting chunk within the page. 14998c2ecf20Sopenharmony_ci * 15008c2ecf20Sopenharmony_ci * Returns: a pointer to the mapped allocation 15018c2ecf20Sopenharmony_ci */ 15028c2ecf20Sopenharmony_cistatic void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 15038c2ecf20Sopenharmony_ci{ 15048c2ecf20Sopenharmony_ci struct z3fold_header *zhdr; 15058c2ecf20Sopenharmony_ci struct page *page; 15068c2ecf20Sopenharmony_ci void *addr; 15078c2ecf20Sopenharmony_ci enum buddy buddy; 15088c2ecf20Sopenharmony_ci 15098c2ecf20Sopenharmony_ci zhdr = get_z3fold_header(handle); 15108c2ecf20Sopenharmony_ci addr = zhdr; 15118c2ecf20Sopenharmony_ci page = virt_to_page(zhdr); 15128c2ecf20Sopenharmony_ci 15138c2ecf20Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) 15148c2ecf20Sopenharmony_ci goto out; 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci buddy = handle_to_buddy(handle); 15178c2ecf20Sopenharmony_ci switch (buddy) { 15188c2ecf20Sopenharmony_ci case FIRST: 15198c2ecf20Sopenharmony_ci addr += ZHDR_SIZE_ALIGNED; 15208c2ecf20Sopenharmony_ci break; 15218c2ecf20Sopenharmony_ci case MIDDLE: 15228c2ecf20Sopenharmony_ci addr += zhdr->start_middle << CHUNK_SHIFT; 15238c2ecf20Sopenharmony_ci set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 15248c2ecf20Sopenharmony_ci break; 15258c2ecf20Sopenharmony_ci case LAST: 15268c2ecf20Sopenharmony_ci addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 15278c2ecf20Sopenharmony_ci break; 15288c2ecf20Sopenharmony_ci default: 15298c2ecf20Sopenharmony_ci pr_err("unknown buddy id %d\n", buddy); 15308c2ecf20Sopenharmony_ci WARN_ON(1); 15318c2ecf20Sopenharmony_ci addr = NULL; 15328c2ecf20Sopenharmony_ci break; 15338c2ecf20Sopenharmony_ci } 15348c2ecf20Sopenharmony_ci 15358c2ecf20Sopenharmony_ci if (addr) 15368c2ecf20Sopenharmony_ci zhdr->mapped_count++; 15378c2ecf20Sopenharmony_ciout: 15388c2ecf20Sopenharmony_ci put_z3fold_header(zhdr); 15398c2ecf20Sopenharmony_ci return addr; 15408c2ecf20Sopenharmony_ci} 15418c2ecf20Sopenharmony_ci 15428c2ecf20Sopenharmony_ci/** 15438c2ecf20Sopenharmony_ci * z3fold_unmap() - unmaps the allocation associated with the given handle 15448c2ecf20Sopenharmony_ci * @pool: pool in which the allocation resides 15458c2ecf20Sopenharmony_ci * @handle: handle associated with the allocation to be unmapped 15468c2ecf20Sopenharmony_ci */ 15478c2ecf20Sopenharmony_cistatic void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 15488c2ecf20Sopenharmony_ci{ 15498c2ecf20Sopenharmony_ci struct z3fold_header *zhdr; 15508c2ecf20Sopenharmony_ci struct page *page; 15518c2ecf20Sopenharmony_ci enum buddy buddy; 15528c2ecf20Sopenharmony_ci 15538c2ecf20Sopenharmony_ci zhdr = get_z3fold_header(handle); 15548c2ecf20Sopenharmony_ci page = virt_to_page(zhdr); 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) 15578c2ecf20Sopenharmony_ci return; 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_ci buddy = handle_to_buddy(handle); 15608c2ecf20Sopenharmony_ci if (buddy == MIDDLE) 15618c2ecf20Sopenharmony_ci clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 15628c2ecf20Sopenharmony_ci zhdr->mapped_count--; 15638c2ecf20Sopenharmony_ci put_z3fold_header(zhdr); 15648c2ecf20Sopenharmony_ci} 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_ci/** 15678c2ecf20Sopenharmony_ci * z3fold_get_pool_size() - gets the z3fold pool size in pages 15688c2ecf20Sopenharmony_ci * @pool: pool whose size is being queried 15698c2ecf20Sopenharmony_ci * 15708c2ecf20Sopenharmony_ci * Returns: size in pages of the given pool. 15718c2ecf20Sopenharmony_ci */ 15728c2ecf20Sopenharmony_cistatic u64 z3fold_get_pool_size(struct z3fold_pool *pool) 15738c2ecf20Sopenharmony_ci{ 15748c2ecf20Sopenharmony_ci return atomic64_read(&pool->pages_nr); 15758c2ecf20Sopenharmony_ci} 15768c2ecf20Sopenharmony_ci 15778c2ecf20Sopenharmony_cistatic bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 15788c2ecf20Sopenharmony_ci{ 15798c2ecf20Sopenharmony_ci struct z3fold_header *zhdr; 15808c2ecf20Sopenharmony_ci struct z3fold_pool *pool; 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageMovable(page), page); 15838c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageIsolated(page), page); 15848c2ecf20Sopenharmony_ci 15858c2ecf20Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) 15868c2ecf20Sopenharmony_ci return false; 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_ci zhdr = page_address(page); 15898c2ecf20Sopenharmony_ci z3fold_page_lock(zhdr); 15908c2ecf20Sopenharmony_ci if (test_bit(NEEDS_COMPACTING, &page->private) || 15918c2ecf20Sopenharmony_ci test_bit(PAGE_STALE, &page->private)) 15928c2ecf20Sopenharmony_ci goto out; 15938c2ecf20Sopenharmony_ci 15948c2ecf20Sopenharmony_ci if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) 15958c2ecf20Sopenharmony_ci goto out; 15968c2ecf20Sopenharmony_ci 15978c2ecf20Sopenharmony_ci if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 15988c2ecf20Sopenharmony_ci goto out; 15998c2ecf20Sopenharmony_ci pool = zhdr_to_pool(zhdr); 16008c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 16018c2ecf20Sopenharmony_ci if (!list_empty(&zhdr->buddy)) 16028c2ecf20Sopenharmony_ci list_del_init(&zhdr->buddy); 16038c2ecf20Sopenharmony_ci if (!list_empty(&page->lru)) 16048c2ecf20Sopenharmony_ci list_del_init(&page->lru); 16058c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 16068c2ecf20Sopenharmony_ci 16078c2ecf20Sopenharmony_ci kref_get(&zhdr->refcount); 16088c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 16098c2ecf20Sopenharmony_ci return true; 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ciout: 16128c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 16138c2ecf20Sopenharmony_ci return false; 16148c2ecf20Sopenharmony_ci} 16158c2ecf20Sopenharmony_ci 16168c2ecf20Sopenharmony_cistatic int z3fold_page_migrate(struct address_space *mapping, struct page *newpage, 16178c2ecf20Sopenharmony_ci struct page *page, enum migrate_mode mode) 16188c2ecf20Sopenharmony_ci{ 16198c2ecf20Sopenharmony_ci struct z3fold_header *zhdr, *new_zhdr; 16208c2ecf20Sopenharmony_ci struct z3fold_pool *pool; 16218c2ecf20Sopenharmony_ci struct address_space *new_mapping; 16228c2ecf20Sopenharmony_ci 16238c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageMovable(page), page); 16248c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageIsolated(page), page); 16258c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page); 16268c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 16278c2ecf20Sopenharmony_ci 16288c2ecf20Sopenharmony_ci zhdr = page_address(page); 16298c2ecf20Sopenharmony_ci pool = zhdr_to_pool(zhdr); 16308c2ecf20Sopenharmony_ci 16318c2ecf20Sopenharmony_ci if (!z3fold_page_trylock(zhdr)) 16328c2ecf20Sopenharmony_ci return -EAGAIN; 16338c2ecf20Sopenharmony_ci if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { 16348c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 16358c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 16368c2ecf20Sopenharmony_ci return -EBUSY; 16378c2ecf20Sopenharmony_ci } 16388c2ecf20Sopenharmony_ci if (work_pending(&zhdr->work)) { 16398c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 16408c2ecf20Sopenharmony_ci return -EAGAIN; 16418c2ecf20Sopenharmony_ci } 16428c2ecf20Sopenharmony_ci new_zhdr = page_address(newpage); 16438c2ecf20Sopenharmony_ci memcpy(new_zhdr, zhdr, PAGE_SIZE); 16448c2ecf20Sopenharmony_ci newpage->private = page->private; 16458c2ecf20Sopenharmony_ci page->private = 0; 16468c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 16478c2ecf20Sopenharmony_ci spin_lock_init(&new_zhdr->page_lock); 16488c2ecf20Sopenharmony_ci INIT_WORK(&new_zhdr->work, compact_page_work); 16498c2ecf20Sopenharmony_ci /* 16508c2ecf20Sopenharmony_ci * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 16518c2ecf20Sopenharmony_ci * so we only have to reinitialize it. 16528c2ecf20Sopenharmony_ci */ 16538c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&new_zhdr->buddy); 16548c2ecf20Sopenharmony_ci new_mapping = page_mapping(page); 16558c2ecf20Sopenharmony_ci __ClearPageMovable(page); 16568c2ecf20Sopenharmony_ci ClearPagePrivate(page); 16578c2ecf20Sopenharmony_ci 16588c2ecf20Sopenharmony_ci get_page(newpage); 16598c2ecf20Sopenharmony_ci z3fold_page_lock(new_zhdr); 16608c2ecf20Sopenharmony_ci if (new_zhdr->first_chunks) 16618c2ecf20Sopenharmony_ci encode_handle(new_zhdr, FIRST); 16628c2ecf20Sopenharmony_ci if (new_zhdr->last_chunks) 16638c2ecf20Sopenharmony_ci encode_handle(new_zhdr, LAST); 16648c2ecf20Sopenharmony_ci if (new_zhdr->middle_chunks) 16658c2ecf20Sopenharmony_ci encode_handle(new_zhdr, MIDDLE); 16668c2ecf20Sopenharmony_ci set_bit(NEEDS_COMPACTING, &newpage->private); 16678c2ecf20Sopenharmony_ci new_zhdr->cpu = smp_processor_id(); 16688c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 16698c2ecf20Sopenharmony_ci list_add(&newpage->lru, &pool->lru); 16708c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 16718c2ecf20Sopenharmony_ci __SetPageMovable(newpage, new_mapping); 16728c2ecf20Sopenharmony_ci z3fold_page_unlock(new_zhdr); 16738c2ecf20Sopenharmony_ci 16748c2ecf20Sopenharmony_ci queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 16758c2ecf20Sopenharmony_ci 16768c2ecf20Sopenharmony_ci page_mapcount_reset(page); 16778c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 16788c2ecf20Sopenharmony_ci put_page(page); 16798c2ecf20Sopenharmony_ci return 0; 16808c2ecf20Sopenharmony_ci} 16818c2ecf20Sopenharmony_ci 16828c2ecf20Sopenharmony_cistatic void z3fold_page_putback(struct page *page) 16838c2ecf20Sopenharmony_ci{ 16848c2ecf20Sopenharmony_ci struct z3fold_header *zhdr; 16858c2ecf20Sopenharmony_ci struct z3fold_pool *pool; 16868c2ecf20Sopenharmony_ci 16878c2ecf20Sopenharmony_ci zhdr = page_address(page); 16888c2ecf20Sopenharmony_ci pool = zhdr_to_pool(zhdr); 16898c2ecf20Sopenharmony_ci 16908c2ecf20Sopenharmony_ci z3fold_page_lock(zhdr); 16918c2ecf20Sopenharmony_ci if (!list_empty(&zhdr->buddy)) 16928c2ecf20Sopenharmony_ci list_del_init(&zhdr->buddy); 16938c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&page->lru); 16948c2ecf20Sopenharmony_ci if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 16958c2ecf20Sopenharmony_ci atomic64_dec(&pool->pages_nr); 16968c2ecf20Sopenharmony_ci return; 16978c2ecf20Sopenharmony_ci } 16988c2ecf20Sopenharmony_ci spin_lock(&pool->lock); 16998c2ecf20Sopenharmony_ci list_add(&page->lru, &pool->lru); 17008c2ecf20Sopenharmony_ci spin_unlock(&pool->lock); 17018c2ecf20Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 17028c2ecf20Sopenharmony_ci z3fold_page_unlock(zhdr); 17038c2ecf20Sopenharmony_ci} 17048c2ecf20Sopenharmony_ci 17058c2ecf20Sopenharmony_cistatic const struct address_space_operations z3fold_aops = { 17068c2ecf20Sopenharmony_ci .isolate_page = z3fold_page_isolate, 17078c2ecf20Sopenharmony_ci .migratepage = z3fold_page_migrate, 17088c2ecf20Sopenharmony_ci .putback_page = z3fold_page_putback, 17098c2ecf20Sopenharmony_ci}; 17108c2ecf20Sopenharmony_ci 17118c2ecf20Sopenharmony_ci/***************** 17128c2ecf20Sopenharmony_ci * zpool 17138c2ecf20Sopenharmony_ci ****************/ 17148c2ecf20Sopenharmony_ci 17158c2ecf20Sopenharmony_cistatic int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 17168c2ecf20Sopenharmony_ci{ 17178c2ecf20Sopenharmony_ci if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 17188c2ecf20Sopenharmony_ci return pool->zpool_ops->evict(pool->zpool, handle); 17198c2ecf20Sopenharmony_ci else 17208c2ecf20Sopenharmony_ci return -ENOENT; 17218c2ecf20Sopenharmony_ci} 17228c2ecf20Sopenharmony_ci 17238c2ecf20Sopenharmony_cistatic const struct z3fold_ops z3fold_zpool_ops = { 17248c2ecf20Sopenharmony_ci .evict = z3fold_zpool_evict 17258c2ecf20Sopenharmony_ci}; 17268c2ecf20Sopenharmony_ci 17278c2ecf20Sopenharmony_cistatic void *z3fold_zpool_create(const char *name, gfp_t gfp, 17288c2ecf20Sopenharmony_ci const struct zpool_ops *zpool_ops, 17298c2ecf20Sopenharmony_ci struct zpool *zpool) 17308c2ecf20Sopenharmony_ci{ 17318c2ecf20Sopenharmony_ci struct z3fold_pool *pool; 17328c2ecf20Sopenharmony_ci 17338c2ecf20Sopenharmony_ci pool = z3fold_create_pool(name, gfp, 17348c2ecf20Sopenharmony_ci zpool_ops ? &z3fold_zpool_ops : NULL); 17358c2ecf20Sopenharmony_ci if (pool) { 17368c2ecf20Sopenharmony_ci pool->zpool = zpool; 17378c2ecf20Sopenharmony_ci pool->zpool_ops = zpool_ops; 17388c2ecf20Sopenharmony_ci } 17398c2ecf20Sopenharmony_ci return pool; 17408c2ecf20Sopenharmony_ci} 17418c2ecf20Sopenharmony_ci 17428c2ecf20Sopenharmony_cistatic void z3fold_zpool_destroy(void *pool) 17438c2ecf20Sopenharmony_ci{ 17448c2ecf20Sopenharmony_ci z3fold_destroy_pool(pool); 17458c2ecf20Sopenharmony_ci} 17468c2ecf20Sopenharmony_ci 17478c2ecf20Sopenharmony_cistatic int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 17488c2ecf20Sopenharmony_ci unsigned long *handle) 17498c2ecf20Sopenharmony_ci{ 17508c2ecf20Sopenharmony_ci return z3fold_alloc(pool, size, gfp, handle); 17518c2ecf20Sopenharmony_ci} 17528c2ecf20Sopenharmony_cistatic void z3fold_zpool_free(void *pool, unsigned long handle) 17538c2ecf20Sopenharmony_ci{ 17548c2ecf20Sopenharmony_ci z3fold_free(pool, handle); 17558c2ecf20Sopenharmony_ci} 17568c2ecf20Sopenharmony_ci 17578c2ecf20Sopenharmony_cistatic int z3fold_zpool_shrink(void *pool, unsigned int pages, 17588c2ecf20Sopenharmony_ci unsigned int *reclaimed) 17598c2ecf20Sopenharmony_ci{ 17608c2ecf20Sopenharmony_ci unsigned int total = 0; 17618c2ecf20Sopenharmony_ci int ret = -EINVAL; 17628c2ecf20Sopenharmony_ci 17638c2ecf20Sopenharmony_ci while (total < pages) { 17648c2ecf20Sopenharmony_ci ret = z3fold_reclaim_page(pool, 8); 17658c2ecf20Sopenharmony_ci if (ret < 0) 17668c2ecf20Sopenharmony_ci break; 17678c2ecf20Sopenharmony_ci total++; 17688c2ecf20Sopenharmony_ci } 17698c2ecf20Sopenharmony_ci 17708c2ecf20Sopenharmony_ci if (reclaimed) 17718c2ecf20Sopenharmony_ci *reclaimed = total; 17728c2ecf20Sopenharmony_ci 17738c2ecf20Sopenharmony_ci return ret; 17748c2ecf20Sopenharmony_ci} 17758c2ecf20Sopenharmony_ci 17768c2ecf20Sopenharmony_cistatic void *z3fold_zpool_map(void *pool, unsigned long handle, 17778c2ecf20Sopenharmony_ci enum zpool_mapmode mm) 17788c2ecf20Sopenharmony_ci{ 17798c2ecf20Sopenharmony_ci return z3fold_map(pool, handle); 17808c2ecf20Sopenharmony_ci} 17818c2ecf20Sopenharmony_cistatic void z3fold_zpool_unmap(void *pool, unsigned long handle) 17828c2ecf20Sopenharmony_ci{ 17838c2ecf20Sopenharmony_ci z3fold_unmap(pool, handle); 17848c2ecf20Sopenharmony_ci} 17858c2ecf20Sopenharmony_ci 17868c2ecf20Sopenharmony_cistatic u64 z3fold_zpool_total_size(void *pool) 17878c2ecf20Sopenharmony_ci{ 17888c2ecf20Sopenharmony_ci return z3fold_get_pool_size(pool) * PAGE_SIZE; 17898c2ecf20Sopenharmony_ci} 17908c2ecf20Sopenharmony_ci 17918c2ecf20Sopenharmony_cistatic struct zpool_driver z3fold_zpool_driver = { 17928c2ecf20Sopenharmony_ci .type = "z3fold", 17938c2ecf20Sopenharmony_ci .sleep_mapped = true, 17948c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 17958c2ecf20Sopenharmony_ci .create = z3fold_zpool_create, 17968c2ecf20Sopenharmony_ci .destroy = z3fold_zpool_destroy, 17978c2ecf20Sopenharmony_ci .malloc = z3fold_zpool_malloc, 17988c2ecf20Sopenharmony_ci .free = z3fold_zpool_free, 17998c2ecf20Sopenharmony_ci .shrink = z3fold_zpool_shrink, 18008c2ecf20Sopenharmony_ci .map = z3fold_zpool_map, 18018c2ecf20Sopenharmony_ci .unmap = z3fold_zpool_unmap, 18028c2ecf20Sopenharmony_ci .total_size = z3fold_zpool_total_size, 18038c2ecf20Sopenharmony_ci}; 18048c2ecf20Sopenharmony_ci 18058c2ecf20Sopenharmony_ciMODULE_ALIAS("zpool-z3fold"); 18068c2ecf20Sopenharmony_ci 18078c2ecf20Sopenharmony_cistatic int __init init_z3fold(void) 18088c2ecf20Sopenharmony_ci{ 18098c2ecf20Sopenharmony_ci int ret; 18108c2ecf20Sopenharmony_ci 18118c2ecf20Sopenharmony_ci /* Make sure the z3fold header is not larger than the page size */ 18128c2ecf20Sopenharmony_ci BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); 18138c2ecf20Sopenharmony_ci ret = z3fold_mount(); 18148c2ecf20Sopenharmony_ci if (ret) 18158c2ecf20Sopenharmony_ci return ret; 18168c2ecf20Sopenharmony_ci 18178c2ecf20Sopenharmony_ci zpool_register_driver(&z3fold_zpool_driver); 18188c2ecf20Sopenharmony_ci 18198c2ecf20Sopenharmony_ci return 0; 18208c2ecf20Sopenharmony_ci} 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_cistatic void __exit exit_z3fold(void) 18238c2ecf20Sopenharmony_ci{ 18248c2ecf20Sopenharmony_ci z3fold_unmount(); 18258c2ecf20Sopenharmony_ci zpool_unregister_driver(&z3fold_zpool_driver); 18268c2ecf20Sopenharmony_ci} 18278c2ecf20Sopenharmony_ci 18288c2ecf20Sopenharmony_cimodule_init(init_z3fold); 18298c2ecf20Sopenharmony_cimodule_exit(exit_z3fold); 18308c2ecf20Sopenharmony_ci 18318c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 18328c2ecf20Sopenharmony_ciMODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 18338c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1834