162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * z3fold.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Author: Vitaly Wool <vitaly.wool@konsulko.com> 662306a36Sopenharmony_ci * Copyright (C) 2016, Sony Mobile Communications Inc. 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * This implementation is based on zbud written by Seth Jennings. 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * z3fold is an special purpose allocator for storing compressed pages. It 1162306a36Sopenharmony_ci * can store up to three compressed pages per page which improves the 1262306a36Sopenharmony_ci * compression ratio of zbud while retaining its main concepts (e. g. always 1362306a36Sopenharmony_ci * storing an integral number of objects per page) and simplicity. 1462306a36Sopenharmony_ci * It still has simple and deterministic reclaim properties that make it 1562306a36Sopenharmony_ci * preferable to a higher density approach (with no requirement on integral 1662306a36Sopenharmony_ci * number of object per page) when reclaim is used. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * As in zbud, pages are divided into "chunks". The size of the chunks is 1962306a36Sopenharmony_ci * fixed at compile time and is determined by NCHUNKS_ORDER below. 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * z3fold doesn't export any API and is meant to be used via zpool API. 2262306a36Sopenharmony_ci */ 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#include <linux/atomic.h> 2762306a36Sopenharmony_ci#include <linux/sched.h> 2862306a36Sopenharmony_ci#include <linux/cpumask.h> 2962306a36Sopenharmony_ci#include <linux/list.h> 3062306a36Sopenharmony_ci#include <linux/mm.h> 3162306a36Sopenharmony_ci#include <linux/module.h> 3262306a36Sopenharmony_ci#include <linux/page-flags.h> 3362306a36Sopenharmony_ci#include <linux/migrate.h> 3462306a36Sopenharmony_ci#include <linux/node.h> 3562306a36Sopenharmony_ci#include <linux/compaction.h> 3662306a36Sopenharmony_ci#include <linux/percpu.h> 3762306a36Sopenharmony_ci#include <linux/preempt.h> 3862306a36Sopenharmony_ci#include <linux/workqueue.h> 3962306a36Sopenharmony_ci#include <linux/slab.h> 4062306a36Sopenharmony_ci#include <linux/spinlock.h> 4162306a36Sopenharmony_ci#include <linux/zpool.h> 4262306a36Sopenharmony_ci#include <linux/kmemleak.h> 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/* 4562306a36Sopenharmony_ci * NCHUNKS_ORDER determines the internal allocation granularity, effectively 4662306a36Sopenharmony_ci * adjusting internal fragmentation. It also determines the number of 4762306a36Sopenharmony_ci * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 4862306a36Sopenharmony_ci * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 4962306a36Sopenharmony_ci * in the beginning of an allocated page are occupied by z3fold header, so 5062306a36Sopenharmony_ci * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 5162306a36Sopenharmony_ci * which shows the max number of free chunks in z3fold page, also there will 5262306a36Sopenharmony_ci * be 63, or 62, respectively, freelists per pool. 5362306a36Sopenharmony_ci */ 5462306a36Sopenharmony_ci#define NCHUNKS_ORDER 6 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 5762306a36Sopenharmony_ci#define CHUNK_SIZE (1 << CHUNK_SHIFT) 5862306a36Sopenharmony_ci#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 5962306a36Sopenharmony_ci#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 6062306a36Sopenharmony_ci#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 6162306a36Sopenharmony_ci#define NCHUNKS (TOTAL_CHUNKS - ZHDR_CHUNKS) 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#define BUDDY_MASK (0x3) 6462306a36Sopenharmony_ci#define BUDDY_SHIFT 2 6562306a36Sopenharmony_ci#define SLOTS_ALIGN (0x40) 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci/***************** 6862306a36Sopenharmony_ci * Structures 6962306a36Sopenharmony_ci*****************/ 7062306a36Sopenharmony_cistruct z3fold_pool; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cienum buddy { 7362306a36Sopenharmony_ci HEADLESS = 0, 7462306a36Sopenharmony_ci FIRST, 7562306a36Sopenharmony_ci MIDDLE, 7662306a36Sopenharmony_ci LAST, 7762306a36Sopenharmony_ci BUDDIES_MAX = LAST 7862306a36Sopenharmony_ci}; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cistruct z3fold_buddy_slots { 8162306a36Sopenharmony_ci /* 8262306a36Sopenharmony_ci * we are using BUDDY_MASK in handle_to_buddy etc. so there should 8362306a36Sopenharmony_ci * be enough slots to hold all possible variants 8462306a36Sopenharmony_ci */ 8562306a36Sopenharmony_ci unsigned long slot[BUDDY_MASK + 1]; 8662306a36Sopenharmony_ci unsigned long pool; /* back link */ 8762306a36Sopenharmony_ci rwlock_t lock; 8862306a36Sopenharmony_ci}; 8962306a36Sopenharmony_ci#define HANDLE_FLAG_MASK (0x03) 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci/* 9262306a36Sopenharmony_ci * struct z3fold_header - z3fold page metadata occupying first chunks of each 9362306a36Sopenharmony_ci * z3fold page, except for HEADLESS pages 9462306a36Sopenharmony_ci * @buddy: links the z3fold page into the relevant list in the 9562306a36Sopenharmony_ci * pool 9662306a36Sopenharmony_ci * @page_lock: per-page lock 9762306a36Sopenharmony_ci * @refcount: reference count for the z3fold page 9862306a36Sopenharmony_ci * @work: work_struct for page layout optimization 9962306a36Sopenharmony_ci * @slots: pointer to the structure holding buddy slots 10062306a36Sopenharmony_ci * @pool: pointer to the containing pool 10162306a36Sopenharmony_ci * @cpu: CPU which this page "belongs" to 10262306a36Sopenharmony_ci * @first_chunks: the size of the first buddy in chunks, 0 if free 10362306a36Sopenharmony_ci * @middle_chunks: the size of the middle buddy in chunks, 0 if free 10462306a36Sopenharmony_ci * @last_chunks: the size of the last buddy in chunks, 0 if free 10562306a36Sopenharmony_ci * @first_num: the starting number (for the first handle) 10662306a36Sopenharmony_ci * @mapped_count: the number of objects currently mapped 10762306a36Sopenharmony_ci */ 10862306a36Sopenharmony_cistruct z3fold_header { 10962306a36Sopenharmony_ci struct list_head buddy; 11062306a36Sopenharmony_ci spinlock_t page_lock; 11162306a36Sopenharmony_ci struct kref refcount; 11262306a36Sopenharmony_ci struct work_struct work; 11362306a36Sopenharmony_ci struct z3fold_buddy_slots *slots; 11462306a36Sopenharmony_ci struct z3fold_pool *pool; 11562306a36Sopenharmony_ci short cpu; 11662306a36Sopenharmony_ci unsigned short first_chunks; 11762306a36Sopenharmony_ci unsigned short middle_chunks; 11862306a36Sopenharmony_ci unsigned short last_chunks; 11962306a36Sopenharmony_ci unsigned short start_middle; 12062306a36Sopenharmony_ci unsigned short first_num:2; 12162306a36Sopenharmony_ci unsigned short mapped_count:2; 12262306a36Sopenharmony_ci unsigned short foreign_handles:2; 12362306a36Sopenharmony_ci}; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci/** 12662306a36Sopenharmony_ci * struct z3fold_pool - stores metadata for each z3fold pool 12762306a36Sopenharmony_ci * @name: pool name 12862306a36Sopenharmony_ci * @lock: protects pool unbuddied lists 12962306a36Sopenharmony_ci * @stale_lock: protects pool stale page list 13062306a36Sopenharmony_ci * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 13162306a36Sopenharmony_ci * buddies; the list each z3fold page is added to depends on 13262306a36Sopenharmony_ci * the size of its free region. 13362306a36Sopenharmony_ci * @stale: list of pages marked for freeing 13462306a36Sopenharmony_ci * @pages_nr: number of z3fold pages in the pool. 13562306a36Sopenharmony_ci * @c_handle: cache for z3fold_buddy_slots allocation 13662306a36Sopenharmony_ci * @compact_wq: workqueue for page layout background optimization 13762306a36Sopenharmony_ci * @release_wq: workqueue for safe page release 13862306a36Sopenharmony_ci * @work: work_struct for safe page release 13962306a36Sopenharmony_ci * 14062306a36Sopenharmony_ci * This structure is allocated at pool creation time and maintains metadata 14162306a36Sopenharmony_ci * pertaining to a particular z3fold pool. 14262306a36Sopenharmony_ci */ 14362306a36Sopenharmony_cistruct z3fold_pool { 14462306a36Sopenharmony_ci const char *name; 14562306a36Sopenharmony_ci spinlock_t lock; 14662306a36Sopenharmony_ci spinlock_t stale_lock; 14762306a36Sopenharmony_ci struct list_head *unbuddied; 14862306a36Sopenharmony_ci struct list_head stale; 14962306a36Sopenharmony_ci atomic64_t pages_nr; 15062306a36Sopenharmony_ci struct kmem_cache *c_handle; 15162306a36Sopenharmony_ci struct workqueue_struct *compact_wq; 15262306a36Sopenharmony_ci struct workqueue_struct *release_wq; 15362306a36Sopenharmony_ci struct work_struct work; 15462306a36Sopenharmony_ci}; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci/* 15762306a36Sopenharmony_ci * Internal z3fold page flags 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_cienum z3fold_page_flags { 16062306a36Sopenharmony_ci PAGE_HEADLESS = 0, 16162306a36Sopenharmony_ci MIDDLE_CHUNK_MAPPED, 16262306a36Sopenharmony_ci NEEDS_COMPACTING, 16362306a36Sopenharmony_ci PAGE_STALE, 16462306a36Sopenharmony_ci PAGE_CLAIMED, /* by either reclaim or free */ 16562306a36Sopenharmony_ci PAGE_MIGRATED, /* page is migrated and soon to be released */ 16662306a36Sopenharmony_ci}; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci/* 16962306a36Sopenharmony_ci * handle flags, go under HANDLE_FLAG_MASK 17062306a36Sopenharmony_ci */ 17162306a36Sopenharmony_cienum z3fold_handle_flags { 17262306a36Sopenharmony_ci HANDLES_NOFREE = 0, 17362306a36Sopenharmony_ci}; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci/* 17662306a36Sopenharmony_ci * Forward declarations 17762306a36Sopenharmony_ci */ 17862306a36Sopenharmony_cistatic struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool); 17962306a36Sopenharmony_cistatic void compact_page_work(struct work_struct *w); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci/***************** 18262306a36Sopenharmony_ci * Helpers 18362306a36Sopenharmony_ci*****************/ 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci/* Converts an allocation size in bytes to size in z3fold chunks */ 18662306a36Sopenharmony_cistatic int size_to_chunks(size_t size) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 18962306a36Sopenharmony_ci} 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci#define for_each_unbuddied_list(_iter, _begin) \ 19262306a36Sopenharmony_ci for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_cistatic inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 19562306a36Sopenharmony_ci gfp_t gfp) 19662306a36Sopenharmony_ci{ 19762306a36Sopenharmony_ci struct z3fold_buddy_slots *slots = kmem_cache_zalloc(pool->c_handle, 19862306a36Sopenharmony_ci gfp); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci if (slots) { 20162306a36Sopenharmony_ci /* It will be freed separately in free_handle(). */ 20262306a36Sopenharmony_ci kmemleak_not_leak(slots); 20362306a36Sopenharmony_ci slots->pool = (unsigned long)pool; 20462306a36Sopenharmony_ci rwlock_init(&slots->lock); 20562306a36Sopenharmony_ci } 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci return slots; 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 21362306a36Sopenharmony_ci} 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_cistatic inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 21662306a36Sopenharmony_ci{ 21762306a36Sopenharmony_ci return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci/* Lock a z3fold page */ 22162306a36Sopenharmony_cistatic inline void z3fold_page_lock(struct z3fold_header *zhdr) 22262306a36Sopenharmony_ci{ 22362306a36Sopenharmony_ci spin_lock(&zhdr->page_lock); 22462306a36Sopenharmony_ci} 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci/* Try to lock a z3fold page */ 22762306a36Sopenharmony_cistatic inline int z3fold_page_trylock(struct z3fold_header *zhdr) 22862306a36Sopenharmony_ci{ 22962306a36Sopenharmony_ci return spin_trylock(&zhdr->page_lock); 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci/* Unlock a z3fold page */ 23362306a36Sopenharmony_cistatic inline void z3fold_page_unlock(struct z3fold_header *zhdr) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci spin_unlock(&zhdr->page_lock); 23662306a36Sopenharmony_ci} 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci/* return locked z3fold page if it's not headless */ 23962306a36Sopenharmony_cistatic inline struct z3fold_header *get_z3fold_header(unsigned long handle) 24062306a36Sopenharmony_ci{ 24162306a36Sopenharmony_ci struct z3fold_buddy_slots *slots; 24262306a36Sopenharmony_ci struct z3fold_header *zhdr; 24362306a36Sopenharmony_ci int locked = 0; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci if (!(handle & (1 << PAGE_HEADLESS))) { 24662306a36Sopenharmony_ci slots = handle_to_slots(handle); 24762306a36Sopenharmony_ci do { 24862306a36Sopenharmony_ci unsigned long addr; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci read_lock(&slots->lock); 25162306a36Sopenharmony_ci addr = *(unsigned long *)handle; 25262306a36Sopenharmony_ci zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 25362306a36Sopenharmony_ci locked = z3fold_page_trylock(zhdr); 25462306a36Sopenharmony_ci read_unlock(&slots->lock); 25562306a36Sopenharmony_ci if (locked) { 25662306a36Sopenharmony_ci struct page *page = virt_to_page(zhdr); 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci if (!test_bit(PAGE_MIGRATED, &page->private)) 25962306a36Sopenharmony_ci break; 26062306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 26162306a36Sopenharmony_ci } 26262306a36Sopenharmony_ci cpu_relax(); 26362306a36Sopenharmony_ci } while (true); 26462306a36Sopenharmony_ci } else { 26562306a36Sopenharmony_ci zhdr = (struct z3fold_header *)(handle & PAGE_MASK); 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci return zhdr; 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_cistatic inline void put_z3fold_header(struct z3fold_header *zhdr) 27262306a36Sopenharmony_ci{ 27362306a36Sopenharmony_ci struct page *page = virt_to_page(zhdr); 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci if (!test_bit(PAGE_HEADLESS, &page->private)) 27662306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 27762306a36Sopenharmony_ci} 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_cistatic inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) 28062306a36Sopenharmony_ci{ 28162306a36Sopenharmony_ci struct z3fold_buddy_slots *slots; 28262306a36Sopenharmony_ci int i; 28362306a36Sopenharmony_ci bool is_free; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci if (WARN_ON(*(unsigned long *)handle == 0)) 28662306a36Sopenharmony_ci return; 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci slots = handle_to_slots(handle); 28962306a36Sopenharmony_ci write_lock(&slots->lock); 29062306a36Sopenharmony_ci *(unsigned long *)handle = 0; 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci if (test_bit(HANDLES_NOFREE, &slots->pool)) { 29362306a36Sopenharmony_ci write_unlock(&slots->lock); 29462306a36Sopenharmony_ci return; /* simple case, nothing else to do */ 29562306a36Sopenharmony_ci } 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci if (zhdr->slots != slots) 29862306a36Sopenharmony_ci zhdr->foreign_handles--; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci is_free = true; 30162306a36Sopenharmony_ci for (i = 0; i <= BUDDY_MASK; i++) { 30262306a36Sopenharmony_ci if (slots->slot[i]) { 30362306a36Sopenharmony_ci is_free = false; 30462306a36Sopenharmony_ci break; 30562306a36Sopenharmony_ci } 30662306a36Sopenharmony_ci } 30762306a36Sopenharmony_ci write_unlock(&slots->lock); 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci if (is_free) { 31062306a36Sopenharmony_ci struct z3fold_pool *pool = slots_to_pool(slots); 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci if (zhdr->slots == slots) 31362306a36Sopenharmony_ci zhdr->slots = NULL; 31462306a36Sopenharmony_ci kmem_cache_free(pool->c_handle, slots); 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci/* Initializes the z3fold header of a newly allocated z3fold page */ 31962306a36Sopenharmony_cistatic struct z3fold_header *init_z3fold_page(struct page *page, bool headless, 32062306a36Sopenharmony_ci struct z3fold_pool *pool, gfp_t gfp) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci struct z3fold_header *zhdr = page_address(page); 32362306a36Sopenharmony_ci struct z3fold_buddy_slots *slots; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci clear_bit(PAGE_HEADLESS, &page->private); 32662306a36Sopenharmony_ci clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 32762306a36Sopenharmony_ci clear_bit(NEEDS_COMPACTING, &page->private); 32862306a36Sopenharmony_ci clear_bit(PAGE_STALE, &page->private); 32962306a36Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 33062306a36Sopenharmony_ci clear_bit(PAGE_MIGRATED, &page->private); 33162306a36Sopenharmony_ci if (headless) 33262306a36Sopenharmony_ci return zhdr; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci slots = alloc_slots(pool, gfp); 33562306a36Sopenharmony_ci if (!slots) 33662306a36Sopenharmony_ci return NULL; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci memset(zhdr, 0, sizeof(*zhdr)); 33962306a36Sopenharmony_ci spin_lock_init(&zhdr->page_lock); 34062306a36Sopenharmony_ci kref_init(&zhdr->refcount); 34162306a36Sopenharmony_ci zhdr->cpu = -1; 34262306a36Sopenharmony_ci zhdr->slots = slots; 34362306a36Sopenharmony_ci zhdr->pool = pool; 34462306a36Sopenharmony_ci INIT_LIST_HEAD(&zhdr->buddy); 34562306a36Sopenharmony_ci INIT_WORK(&zhdr->work, compact_page_work); 34662306a36Sopenharmony_ci return zhdr; 34762306a36Sopenharmony_ci} 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci/* Resets the struct page fields and frees the page */ 35062306a36Sopenharmony_cistatic void free_z3fold_page(struct page *page, bool headless) 35162306a36Sopenharmony_ci{ 35262306a36Sopenharmony_ci if (!headless) { 35362306a36Sopenharmony_ci lock_page(page); 35462306a36Sopenharmony_ci __ClearPageMovable(page); 35562306a36Sopenharmony_ci unlock_page(page); 35662306a36Sopenharmony_ci } 35762306a36Sopenharmony_ci __free_page(page); 35862306a36Sopenharmony_ci} 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci/* Helper function to build the index */ 36162306a36Sopenharmony_cistatic inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 36262306a36Sopenharmony_ci{ 36362306a36Sopenharmony_ci return (bud + zhdr->first_num) & BUDDY_MASK; 36462306a36Sopenharmony_ci} 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci/* 36762306a36Sopenharmony_ci * Encodes the handle of a particular buddy within a z3fold page 36862306a36Sopenharmony_ci * Pool lock should be held as this function accesses first_num 36962306a36Sopenharmony_ci */ 37062306a36Sopenharmony_cistatic unsigned long __encode_handle(struct z3fold_header *zhdr, 37162306a36Sopenharmony_ci struct z3fold_buddy_slots *slots, 37262306a36Sopenharmony_ci enum buddy bud) 37362306a36Sopenharmony_ci{ 37462306a36Sopenharmony_ci unsigned long h = (unsigned long)zhdr; 37562306a36Sopenharmony_ci int idx = 0; 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci /* 37862306a36Sopenharmony_ci * For a headless page, its handle is its pointer with the extra 37962306a36Sopenharmony_ci * PAGE_HEADLESS bit set 38062306a36Sopenharmony_ci */ 38162306a36Sopenharmony_ci if (bud == HEADLESS) 38262306a36Sopenharmony_ci return h | (1 << PAGE_HEADLESS); 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci /* otherwise, return pointer to encoded handle */ 38562306a36Sopenharmony_ci idx = __idx(zhdr, bud); 38662306a36Sopenharmony_ci h += idx; 38762306a36Sopenharmony_ci if (bud == LAST) 38862306a36Sopenharmony_ci h |= (zhdr->last_chunks << BUDDY_SHIFT); 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci write_lock(&slots->lock); 39162306a36Sopenharmony_ci slots->slot[idx] = h; 39262306a36Sopenharmony_ci write_unlock(&slots->lock); 39362306a36Sopenharmony_ci return (unsigned long)&slots->slot[idx]; 39462306a36Sopenharmony_ci} 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_cistatic unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci return __encode_handle(zhdr, zhdr->slots, bud); 39962306a36Sopenharmony_ci} 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci/* only for LAST bud, returns zero otherwise */ 40262306a36Sopenharmony_cistatic unsigned short handle_to_chunks(unsigned long handle) 40362306a36Sopenharmony_ci{ 40462306a36Sopenharmony_ci struct z3fold_buddy_slots *slots = handle_to_slots(handle); 40562306a36Sopenharmony_ci unsigned long addr; 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci read_lock(&slots->lock); 40862306a36Sopenharmony_ci addr = *(unsigned long *)handle; 40962306a36Sopenharmony_ci read_unlock(&slots->lock); 41062306a36Sopenharmony_ci return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci/* 41462306a36Sopenharmony_ci * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 41562306a36Sopenharmony_ci * but that doesn't matter. because the masking will result in the 41662306a36Sopenharmony_ci * correct buddy number. 41762306a36Sopenharmony_ci */ 41862306a36Sopenharmony_cistatic enum buddy handle_to_buddy(unsigned long handle) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci struct z3fold_header *zhdr; 42162306a36Sopenharmony_ci struct z3fold_buddy_slots *slots = handle_to_slots(handle); 42262306a36Sopenharmony_ci unsigned long addr; 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci read_lock(&slots->lock); 42562306a36Sopenharmony_ci WARN_ON(handle & (1 << PAGE_HEADLESS)); 42662306a36Sopenharmony_ci addr = *(unsigned long *)handle; 42762306a36Sopenharmony_ci read_unlock(&slots->lock); 42862306a36Sopenharmony_ci zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 42962306a36Sopenharmony_ci return (addr - zhdr->first_num) & BUDDY_MASK; 43062306a36Sopenharmony_ci} 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_cistatic inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 43362306a36Sopenharmony_ci{ 43462306a36Sopenharmony_ci return zhdr->pool; 43562306a36Sopenharmony_ci} 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_cistatic void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 43862306a36Sopenharmony_ci{ 43962306a36Sopenharmony_ci struct page *page = virt_to_page(zhdr); 44062306a36Sopenharmony_ci struct z3fold_pool *pool = zhdr_to_pool(zhdr); 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci WARN_ON(!list_empty(&zhdr->buddy)); 44362306a36Sopenharmony_ci set_bit(PAGE_STALE, &page->private); 44462306a36Sopenharmony_ci clear_bit(NEEDS_COMPACTING, &page->private); 44562306a36Sopenharmony_ci spin_lock(&pool->lock); 44662306a36Sopenharmony_ci spin_unlock(&pool->lock); 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci if (locked) 44962306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci spin_lock(&pool->stale_lock); 45262306a36Sopenharmony_ci list_add(&zhdr->buddy, &pool->stale); 45362306a36Sopenharmony_ci queue_work(pool->release_wq, &pool->work); 45462306a36Sopenharmony_ci spin_unlock(&pool->stale_lock); 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci atomic64_dec(&pool->pages_nr); 45762306a36Sopenharmony_ci} 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_cistatic void release_z3fold_page_locked(struct kref *ref) 46062306a36Sopenharmony_ci{ 46162306a36Sopenharmony_ci struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 46262306a36Sopenharmony_ci refcount); 46362306a36Sopenharmony_ci WARN_ON(z3fold_page_trylock(zhdr)); 46462306a36Sopenharmony_ci __release_z3fold_page(zhdr, true); 46562306a36Sopenharmony_ci} 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_cistatic void release_z3fold_page_locked_list(struct kref *ref) 46862306a36Sopenharmony_ci{ 46962306a36Sopenharmony_ci struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 47062306a36Sopenharmony_ci refcount); 47162306a36Sopenharmony_ci struct z3fold_pool *pool = zhdr_to_pool(zhdr); 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci spin_lock(&pool->lock); 47462306a36Sopenharmony_ci list_del_init(&zhdr->buddy); 47562306a36Sopenharmony_ci spin_unlock(&pool->lock); 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci WARN_ON(z3fold_page_trylock(zhdr)); 47862306a36Sopenharmony_ci __release_z3fold_page(zhdr, true); 47962306a36Sopenharmony_ci} 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_cistatic inline int put_z3fold_locked(struct z3fold_header *zhdr) 48262306a36Sopenharmony_ci{ 48362306a36Sopenharmony_ci return kref_put(&zhdr->refcount, release_z3fold_page_locked); 48462306a36Sopenharmony_ci} 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_cistatic inline int put_z3fold_locked_list(struct z3fold_header *zhdr) 48762306a36Sopenharmony_ci{ 48862306a36Sopenharmony_ci return kref_put(&zhdr->refcount, release_z3fold_page_locked_list); 48962306a36Sopenharmony_ci} 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_cistatic void free_pages_work(struct work_struct *w) 49262306a36Sopenharmony_ci{ 49362306a36Sopenharmony_ci struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci spin_lock(&pool->stale_lock); 49662306a36Sopenharmony_ci while (!list_empty(&pool->stale)) { 49762306a36Sopenharmony_ci struct z3fold_header *zhdr = list_first_entry(&pool->stale, 49862306a36Sopenharmony_ci struct z3fold_header, buddy); 49962306a36Sopenharmony_ci struct page *page = virt_to_page(zhdr); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci list_del(&zhdr->buddy); 50262306a36Sopenharmony_ci if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 50362306a36Sopenharmony_ci continue; 50462306a36Sopenharmony_ci spin_unlock(&pool->stale_lock); 50562306a36Sopenharmony_ci cancel_work_sync(&zhdr->work); 50662306a36Sopenharmony_ci free_z3fold_page(page, false); 50762306a36Sopenharmony_ci cond_resched(); 50862306a36Sopenharmony_ci spin_lock(&pool->stale_lock); 50962306a36Sopenharmony_ci } 51062306a36Sopenharmony_ci spin_unlock(&pool->stale_lock); 51162306a36Sopenharmony_ci} 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci/* 51462306a36Sopenharmony_ci * Returns the number of free chunks in a z3fold page. 51562306a36Sopenharmony_ci * NB: can't be used with HEADLESS pages. 51662306a36Sopenharmony_ci */ 51762306a36Sopenharmony_cistatic int num_free_chunks(struct z3fold_header *zhdr) 51862306a36Sopenharmony_ci{ 51962306a36Sopenharmony_ci int nfree; 52062306a36Sopenharmony_ci /* 52162306a36Sopenharmony_ci * If there is a middle object, pick up the bigger free space 52262306a36Sopenharmony_ci * either before or after it. Otherwise just subtract the number 52362306a36Sopenharmony_ci * of chunks occupied by the first and the last objects. 52462306a36Sopenharmony_ci */ 52562306a36Sopenharmony_ci if (zhdr->middle_chunks != 0) { 52662306a36Sopenharmony_ci int nfree_before = zhdr->first_chunks ? 52762306a36Sopenharmony_ci 0 : zhdr->start_middle - ZHDR_CHUNKS; 52862306a36Sopenharmony_ci int nfree_after = zhdr->last_chunks ? 52962306a36Sopenharmony_ci 0 : TOTAL_CHUNKS - 53062306a36Sopenharmony_ci (zhdr->start_middle + zhdr->middle_chunks); 53162306a36Sopenharmony_ci nfree = max(nfree_before, nfree_after); 53262306a36Sopenharmony_ci } else 53362306a36Sopenharmony_ci nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 53462306a36Sopenharmony_ci return nfree; 53562306a36Sopenharmony_ci} 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci/* Add to the appropriate unbuddied list */ 53862306a36Sopenharmony_cistatic inline void add_to_unbuddied(struct z3fold_pool *pool, 53962306a36Sopenharmony_ci struct z3fold_header *zhdr) 54062306a36Sopenharmony_ci{ 54162306a36Sopenharmony_ci if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 54262306a36Sopenharmony_ci zhdr->middle_chunks == 0) { 54362306a36Sopenharmony_ci struct list_head *unbuddied; 54462306a36Sopenharmony_ci int freechunks = num_free_chunks(zhdr); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci migrate_disable(); 54762306a36Sopenharmony_ci unbuddied = this_cpu_ptr(pool->unbuddied); 54862306a36Sopenharmony_ci spin_lock(&pool->lock); 54962306a36Sopenharmony_ci list_add(&zhdr->buddy, &unbuddied[freechunks]); 55062306a36Sopenharmony_ci spin_unlock(&pool->lock); 55162306a36Sopenharmony_ci zhdr->cpu = smp_processor_id(); 55262306a36Sopenharmony_ci migrate_enable(); 55362306a36Sopenharmony_ci } 55462306a36Sopenharmony_ci} 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_cistatic inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks) 55762306a36Sopenharmony_ci{ 55862306a36Sopenharmony_ci enum buddy bud = HEADLESS; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci if (zhdr->middle_chunks) { 56162306a36Sopenharmony_ci if (!zhdr->first_chunks && 56262306a36Sopenharmony_ci chunks <= zhdr->start_middle - ZHDR_CHUNKS) 56362306a36Sopenharmony_ci bud = FIRST; 56462306a36Sopenharmony_ci else if (!zhdr->last_chunks) 56562306a36Sopenharmony_ci bud = LAST; 56662306a36Sopenharmony_ci } else { 56762306a36Sopenharmony_ci if (!zhdr->first_chunks) 56862306a36Sopenharmony_ci bud = FIRST; 56962306a36Sopenharmony_ci else if (!zhdr->last_chunks) 57062306a36Sopenharmony_ci bud = LAST; 57162306a36Sopenharmony_ci else 57262306a36Sopenharmony_ci bud = MIDDLE; 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci return bud; 57662306a36Sopenharmony_ci} 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_cistatic inline void *mchunk_memmove(struct z3fold_header *zhdr, 57962306a36Sopenharmony_ci unsigned short dst_chunk) 58062306a36Sopenharmony_ci{ 58162306a36Sopenharmony_ci void *beg = zhdr; 58262306a36Sopenharmony_ci return memmove(beg + (dst_chunk << CHUNK_SHIFT), 58362306a36Sopenharmony_ci beg + (zhdr->start_middle << CHUNK_SHIFT), 58462306a36Sopenharmony_ci zhdr->middle_chunks << CHUNK_SHIFT); 58562306a36Sopenharmony_ci} 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_cistatic inline bool buddy_single(struct z3fold_header *zhdr) 58862306a36Sopenharmony_ci{ 58962306a36Sopenharmony_ci return !((zhdr->first_chunks && zhdr->middle_chunks) || 59062306a36Sopenharmony_ci (zhdr->first_chunks && zhdr->last_chunks) || 59162306a36Sopenharmony_ci (zhdr->middle_chunks && zhdr->last_chunks)); 59262306a36Sopenharmony_ci} 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_cistatic struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) 59562306a36Sopenharmony_ci{ 59662306a36Sopenharmony_ci struct z3fold_pool *pool = zhdr_to_pool(zhdr); 59762306a36Sopenharmony_ci void *p = zhdr; 59862306a36Sopenharmony_ci unsigned long old_handle = 0; 59962306a36Sopenharmony_ci size_t sz = 0; 60062306a36Sopenharmony_ci struct z3fold_header *new_zhdr = NULL; 60162306a36Sopenharmony_ci int first_idx = __idx(zhdr, FIRST); 60262306a36Sopenharmony_ci int middle_idx = __idx(zhdr, MIDDLE); 60362306a36Sopenharmony_ci int last_idx = __idx(zhdr, LAST); 60462306a36Sopenharmony_ci unsigned short *moved_chunks = NULL; 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci /* 60762306a36Sopenharmony_ci * No need to protect slots here -- all the slots are "local" and 60862306a36Sopenharmony_ci * the page lock is already taken 60962306a36Sopenharmony_ci */ 61062306a36Sopenharmony_ci if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) { 61162306a36Sopenharmony_ci p += ZHDR_SIZE_ALIGNED; 61262306a36Sopenharmony_ci sz = zhdr->first_chunks << CHUNK_SHIFT; 61362306a36Sopenharmony_ci old_handle = (unsigned long)&zhdr->slots->slot[first_idx]; 61462306a36Sopenharmony_ci moved_chunks = &zhdr->first_chunks; 61562306a36Sopenharmony_ci } else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) { 61662306a36Sopenharmony_ci p += zhdr->start_middle << CHUNK_SHIFT; 61762306a36Sopenharmony_ci sz = zhdr->middle_chunks << CHUNK_SHIFT; 61862306a36Sopenharmony_ci old_handle = (unsigned long)&zhdr->slots->slot[middle_idx]; 61962306a36Sopenharmony_ci moved_chunks = &zhdr->middle_chunks; 62062306a36Sopenharmony_ci } else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) { 62162306a36Sopenharmony_ci p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); 62262306a36Sopenharmony_ci sz = zhdr->last_chunks << CHUNK_SHIFT; 62362306a36Sopenharmony_ci old_handle = (unsigned long)&zhdr->slots->slot[last_idx]; 62462306a36Sopenharmony_ci moved_chunks = &zhdr->last_chunks; 62562306a36Sopenharmony_ci } 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci if (sz > 0) { 62862306a36Sopenharmony_ci enum buddy new_bud = HEADLESS; 62962306a36Sopenharmony_ci short chunks = size_to_chunks(sz); 63062306a36Sopenharmony_ci void *q; 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci new_zhdr = __z3fold_alloc(pool, sz, false); 63362306a36Sopenharmony_ci if (!new_zhdr) 63462306a36Sopenharmony_ci return NULL; 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci if (WARN_ON(new_zhdr == zhdr)) 63762306a36Sopenharmony_ci goto out_fail; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci new_bud = get_free_buddy(new_zhdr, chunks); 64062306a36Sopenharmony_ci q = new_zhdr; 64162306a36Sopenharmony_ci switch (new_bud) { 64262306a36Sopenharmony_ci case FIRST: 64362306a36Sopenharmony_ci new_zhdr->first_chunks = chunks; 64462306a36Sopenharmony_ci q += ZHDR_SIZE_ALIGNED; 64562306a36Sopenharmony_ci break; 64662306a36Sopenharmony_ci case MIDDLE: 64762306a36Sopenharmony_ci new_zhdr->middle_chunks = chunks; 64862306a36Sopenharmony_ci new_zhdr->start_middle = 64962306a36Sopenharmony_ci new_zhdr->first_chunks + ZHDR_CHUNKS; 65062306a36Sopenharmony_ci q += new_zhdr->start_middle << CHUNK_SHIFT; 65162306a36Sopenharmony_ci break; 65262306a36Sopenharmony_ci case LAST: 65362306a36Sopenharmony_ci new_zhdr->last_chunks = chunks; 65462306a36Sopenharmony_ci q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT); 65562306a36Sopenharmony_ci break; 65662306a36Sopenharmony_ci default: 65762306a36Sopenharmony_ci goto out_fail; 65862306a36Sopenharmony_ci } 65962306a36Sopenharmony_ci new_zhdr->foreign_handles++; 66062306a36Sopenharmony_ci memcpy(q, p, sz); 66162306a36Sopenharmony_ci write_lock(&zhdr->slots->lock); 66262306a36Sopenharmony_ci *(unsigned long *)old_handle = (unsigned long)new_zhdr + 66362306a36Sopenharmony_ci __idx(new_zhdr, new_bud); 66462306a36Sopenharmony_ci if (new_bud == LAST) 66562306a36Sopenharmony_ci *(unsigned long *)old_handle |= 66662306a36Sopenharmony_ci (new_zhdr->last_chunks << BUDDY_SHIFT); 66762306a36Sopenharmony_ci write_unlock(&zhdr->slots->lock); 66862306a36Sopenharmony_ci add_to_unbuddied(pool, new_zhdr); 66962306a36Sopenharmony_ci z3fold_page_unlock(new_zhdr); 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci *moved_chunks = 0; 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci return new_zhdr; 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ciout_fail: 67762306a36Sopenharmony_ci if (new_zhdr && !put_z3fold_locked(new_zhdr)) { 67862306a36Sopenharmony_ci add_to_unbuddied(pool, new_zhdr); 67962306a36Sopenharmony_ci z3fold_page_unlock(new_zhdr); 68062306a36Sopenharmony_ci } 68162306a36Sopenharmony_ci return NULL; 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci} 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci#define BIG_CHUNK_GAP 3 68662306a36Sopenharmony_ci/* Has to be called with lock held */ 68762306a36Sopenharmony_cistatic int z3fold_compact_page(struct z3fold_header *zhdr) 68862306a36Sopenharmony_ci{ 68962306a36Sopenharmony_ci struct page *page = virt_to_page(zhdr); 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 69262306a36Sopenharmony_ci return 0; /* can't move middle chunk, it's used */ 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci if (unlikely(PageIsolated(page))) 69562306a36Sopenharmony_ci return 0; 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci if (zhdr->middle_chunks == 0) 69862306a36Sopenharmony_ci return 0; /* nothing to compact */ 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 70162306a36Sopenharmony_ci /* move to the beginning */ 70262306a36Sopenharmony_ci mchunk_memmove(zhdr, ZHDR_CHUNKS); 70362306a36Sopenharmony_ci zhdr->first_chunks = zhdr->middle_chunks; 70462306a36Sopenharmony_ci zhdr->middle_chunks = 0; 70562306a36Sopenharmony_ci zhdr->start_middle = 0; 70662306a36Sopenharmony_ci zhdr->first_num++; 70762306a36Sopenharmony_ci return 1; 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci /* 71162306a36Sopenharmony_ci * moving data is expensive, so let's only do that if 71262306a36Sopenharmony_ci * there's substantial gain (at least BIG_CHUNK_GAP chunks) 71362306a36Sopenharmony_ci */ 71462306a36Sopenharmony_ci if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 71562306a36Sopenharmony_ci zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 71662306a36Sopenharmony_ci BIG_CHUNK_GAP) { 71762306a36Sopenharmony_ci mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 71862306a36Sopenharmony_ci zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 71962306a36Sopenharmony_ci return 1; 72062306a36Sopenharmony_ci } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 72162306a36Sopenharmony_ci TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 72262306a36Sopenharmony_ci + zhdr->middle_chunks) >= 72362306a36Sopenharmony_ci BIG_CHUNK_GAP) { 72462306a36Sopenharmony_ci unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 72562306a36Sopenharmony_ci zhdr->middle_chunks; 72662306a36Sopenharmony_ci mchunk_memmove(zhdr, new_start); 72762306a36Sopenharmony_ci zhdr->start_middle = new_start; 72862306a36Sopenharmony_ci return 1; 72962306a36Sopenharmony_ci } 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci return 0; 73262306a36Sopenharmony_ci} 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_cistatic void do_compact_page(struct z3fold_header *zhdr, bool locked) 73562306a36Sopenharmony_ci{ 73662306a36Sopenharmony_ci struct z3fold_pool *pool = zhdr_to_pool(zhdr); 73762306a36Sopenharmony_ci struct page *page; 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci page = virt_to_page(zhdr); 74062306a36Sopenharmony_ci if (locked) 74162306a36Sopenharmony_ci WARN_ON(z3fold_page_trylock(zhdr)); 74262306a36Sopenharmony_ci else 74362306a36Sopenharmony_ci z3fold_page_lock(zhdr); 74462306a36Sopenharmony_ci if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 74562306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 74662306a36Sopenharmony_ci return; 74762306a36Sopenharmony_ci } 74862306a36Sopenharmony_ci spin_lock(&pool->lock); 74962306a36Sopenharmony_ci list_del_init(&zhdr->buddy); 75062306a36Sopenharmony_ci spin_unlock(&pool->lock); 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci if (put_z3fold_locked(zhdr)) 75362306a36Sopenharmony_ci return; 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci if (test_bit(PAGE_STALE, &page->private) || 75662306a36Sopenharmony_ci test_and_set_bit(PAGE_CLAIMED, &page->private)) { 75762306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 75862306a36Sopenharmony_ci return; 75962306a36Sopenharmony_ci } 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci if (!zhdr->foreign_handles && buddy_single(zhdr) && 76262306a36Sopenharmony_ci zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { 76362306a36Sopenharmony_ci if (!put_z3fold_locked(zhdr)) { 76462306a36Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 76562306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 76662306a36Sopenharmony_ci } 76762306a36Sopenharmony_ci return; 76862306a36Sopenharmony_ci } 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci z3fold_compact_page(zhdr); 77162306a36Sopenharmony_ci add_to_unbuddied(pool, zhdr); 77262306a36Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 77362306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 77462306a36Sopenharmony_ci} 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_cistatic void compact_page_work(struct work_struct *w) 77762306a36Sopenharmony_ci{ 77862306a36Sopenharmony_ci struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 77962306a36Sopenharmony_ci work); 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci do_compact_page(zhdr, false); 78262306a36Sopenharmony_ci} 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci/* returns _locked_ z3fold page header or NULL */ 78562306a36Sopenharmony_cistatic inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 78662306a36Sopenharmony_ci size_t size, bool can_sleep) 78762306a36Sopenharmony_ci{ 78862306a36Sopenharmony_ci struct z3fold_header *zhdr = NULL; 78962306a36Sopenharmony_ci struct page *page; 79062306a36Sopenharmony_ci struct list_head *unbuddied; 79162306a36Sopenharmony_ci int chunks = size_to_chunks(size), i; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_cilookup: 79462306a36Sopenharmony_ci migrate_disable(); 79562306a36Sopenharmony_ci /* First, try to find an unbuddied z3fold page. */ 79662306a36Sopenharmony_ci unbuddied = this_cpu_ptr(pool->unbuddied); 79762306a36Sopenharmony_ci for_each_unbuddied_list(i, chunks) { 79862306a36Sopenharmony_ci struct list_head *l = &unbuddied[i]; 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci zhdr = list_first_entry_or_null(READ_ONCE(l), 80162306a36Sopenharmony_ci struct z3fold_header, buddy); 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci if (!zhdr) 80462306a36Sopenharmony_ci continue; 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci /* Re-check under lock. */ 80762306a36Sopenharmony_ci spin_lock(&pool->lock); 80862306a36Sopenharmony_ci if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 80962306a36Sopenharmony_ci struct z3fold_header, buddy)) || 81062306a36Sopenharmony_ci !z3fold_page_trylock(zhdr)) { 81162306a36Sopenharmony_ci spin_unlock(&pool->lock); 81262306a36Sopenharmony_ci zhdr = NULL; 81362306a36Sopenharmony_ci migrate_enable(); 81462306a36Sopenharmony_ci if (can_sleep) 81562306a36Sopenharmony_ci cond_resched(); 81662306a36Sopenharmony_ci goto lookup; 81762306a36Sopenharmony_ci } 81862306a36Sopenharmony_ci list_del_init(&zhdr->buddy); 81962306a36Sopenharmony_ci zhdr->cpu = -1; 82062306a36Sopenharmony_ci spin_unlock(&pool->lock); 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci page = virt_to_page(zhdr); 82362306a36Sopenharmony_ci if (test_bit(NEEDS_COMPACTING, &page->private) || 82462306a36Sopenharmony_ci test_bit(PAGE_CLAIMED, &page->private)) { 82562306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 82662306a36Sopenharmony_ci zhdr = NULL; 82762306a36Sopenharmony_ci migrate_enable(); 82862306a36Sopenharmony_ci if (can_sleep) 82962306a36Sopenharmony_ci cond_resched(); 83062306a36Sopenharmony_ci goto lookup; 83162306a36Sopenharmony_ci } 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci /* 83462306a36Sopenharmony_ci * this page could not be removed from its unbuddied 83562306a36Sopenharmony_ci * list while pool lock was held, and then we've taken 83662306a36Sopenharmony_ci * page lock so kref_put could not be called before 83762306a36Sopenharmony_ci * we got here, so it's safe to just call kref_get() 83862306a36Sopenharmony_ci */ 83962306a36Sopenharmony_ci kref_get(&zhdr->refcount); 84062306a36Sopenharmony_ci break; 84162306a36Sopenharmony_ci } 84262306a36Sopenharmony_ci migrate_enable(); 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci if (!zhdr) { 84562306a36Sopenharmony_ci int cpu; 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci /* look for _exact_ match on other cpus' lists */ 84862306a36Sopenharmony_ci for_each_online_cpu(cpu) { 84962306a36Sopenharmony_ci struct list_head *l; 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ci unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 85262306a36Sopenharmony_ci spin_lock(&pool->lock); 85362306a36Sopenharmony_ci l = &unbuddied[chunks]; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci zhdr = list_first_entry_or_null(READ_ONCE(l), 85662306a36Sopenharmony_ci struct z3fold_header, buddy); 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci if (!zhdr || !z3fold_page_trylock(zhdr)) { 85962306a36Sopenharmony_ci spin_unlock(&pool->lock); 86062306a36Sopenharmony_ci zhdr = NULL; 86162306a36Sopenharmony_ci continue; 86262306a36Sopenharmony_ci } 86362306a36Sopenharmony_ci list_del_init(&zhdr->buddy); 86462306a36Sopenharmony_ci zhdr->cpu = -1; 86562306a36Sopenharmony_ci spin_unlock(&pool->lock); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci page = virt_to_page(zhdr); 86862306a36Sopenharmony_ci if (test_bit(NEEDS_COMPACTING, &page->private) || 86962306a36Sopenharmony_ci test_bit(PAGE_CLAIMED, &page->private)) { 87062306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 87162306a36Sopenharmony_ci zhdr = NULL; 87262306a36Sopenharmony_ci if (can_sleep) 87362306a36Sopenharmony_ci cond_resched(); 87462306a36Sopenharmony_ci continue; 87562306a36Sopenharmony_ci } 87662306a36Sopenharmony_ci kref_get(&zhdr->refcount); 87762306a36Sopenharmony_ci break; 87862306a36Sopenharmony_ci } 87962306a36Sopenharmony_ci } 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci if (zhdr && !zhdr->slots) { 88262306a36Sopenharmony_ci zhdr->slots = alloc_slots(pool, GFP_ATOMIC); 88362306a36Sopenharmony_ci if (!zhdr->slots) 88462306a36Sopenharmony_ci goto out_fail; 88562306a36Sopenharmony_ci } 88662306a36Sopenharmony_ci return zhdr; 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ciout_fail: 88962306a36Sopenharmony_ci if (!put_z3fold_locked(zhdr)) { 89062306a36Sopenharmony_ci add_to_unbuddied(pool, zhdr); 89162306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 89262306a36Sopenharmony_ci } 89362306a36Sopenharmony_ci return NULL; 89462306a36Sopenharmony_ci} 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci/* 89762306a36Sopenharmony_ci * API Functions 89862306a36Sopenharmony_ci */ 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_ci/** 90162306a36Sopenharmony_ci * z3fold_create_pool() - create a new z3fold pool 90262306a36Sopenharmony_ci * @name: pool name 90362306a36Sopenharmony_ci * @gfp: gfp flags when allocating the z3fold pool structure 90462306a36Sopenharmony_ci * 90562306a36Sopenharmony_ci * Return: pointer to the new z3fold pool or NULL if the metadata allocation 90662306a36Sopenharmony_ci * failed. 90762306a36Sopenharmony_ci */ 90862306a36Sopenharmony_cistatic struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp) 90962306a36Sopenharmony_ci{ 91062306a36Sopenharmony_ci struct z3fold_pool *pool = NULL; 91162306a36Sopenharmony_ci int i, cpu; 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci pool = kzalloc(sizeof(struct z3fold_pool), gfp); 91462306a36Sopenharmony_ci if (!pool) 91562306a36Sopenharmony_ci goto out; 91662306a36Sopenharmony_ci pool->c_handle = kmem_cache_create("z3fold_handle", 91762306a36Sopenharmony_ci sizeof(struct z3fold_buddy_slots), 91862306a36Sopenharmony_ci SLOTS_ALIGN, 0, NULL); 91962306a36Sopenharmony_ci if (!pool->c_handle) 92062306a36Sopenharmony_ci goto out_c; 92162306a36Sopenharmony_ci spin_lock_init(&pool->lock); 92262306a36Sopenharmony_ci spin_lock_init(&pool->stale_lock); 92362306a36Sopenharmony_ci pool->unbuddied = __alloc_percpu(sizeof(struct list_head) * NCHUNKS, 92462306a36Sopenharmony_ci __alignof__(struct list_head)); 92562306a36Sopenharmony_ci if (!pool->unbuddied) 92662306a36Sopenharmony_ci goto out_pool; 92762306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 92862306a36Sopenharmony_ci struct list_head *unbuddied = 92962306a36Sopenharmony_ci per_cpu_ptr(pool->unbuddied, cpu); 93062306a36Sopenharmony_ci for_each_unbuddied_list(i, 0) 93162306a36Sopenharmony_ci INIT_LIST_HEAD(&unbuddied[i]); 93262306a36Sopenharmony_ci } 93362306a36Sopenharmony_ci INIT_LIST_HEAD(&pool->stale); 93462306a36Sopenharmony_ci atomic64_set(&pool->pages_nr, 0); 93562306a36Sopenharmony_ci pool->name = name; 93662306a36Sopenharmony_ci pool->compact_wq = create_singlethread_workqueue(pool->name); 93762306a36Sopenharmony_ci if (!pool->compact_wq) 93862306a36Sopenharmony_ci goto out_unbuddied; 93962306a36Sopenharmony_ci pool->release_wq = create_singlethread_workqueue(pool->name); 94062306a36Sopenharmony_ci if (!pool->release_wq) 94162306a36Sopenharmony_ci goto out_wq; 94262306a36Sopenharmony_ci INIT_WORK(&pool->work, free_pages_work); 94362306a36Sopenharmony_ci return pool; 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ciout_wq: 94662306a36Sopenharmony_ci destroy_workqueue(pool->compact_wq); 94762306a36Sopenharmony_ciout_unbuddied: 94862306a36Sopenharmony_ci free_percpu(pool->unbuddied); 94962306a36Sopenharmony_ciout_pool: 95062306a36Sopenharmony_ci kmem_cache_destroy(pool->c_handle); 95162306a36Sopenharmony_ciout_c: 95262306a36Sopenharmony_ci kfree(pool); 95362306a36Sopenharmony_ciout: 95462306a36Sopenharmony_ci return NULL; 95562306a36Sopenharmony_ci} 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci/** 95862306a36Sopenharmony_ci * z3fold_destroy_pool() - destroys an existing z3fold pool 95962306a36Sopenharmony_ci * @pool: the z3fold pool to be destroyed 96062306a36Sopenharmony_ci * 96162306a36Sopenharmony_ci * The pool should be emptied before this function is called. 96262306a36Sopenharmony_ci */ 96362306a36Sopenharmony_cistatic void z3fold_destroy_pool(struct z3fold_pool *pool) 96462306a36Sopenharmony_ci{ 96562306a36Sopenharmony_ci kmem_cache_destroy(pool->c_handle); 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci /* 96862306a36Sopenharmony_ci * We need to destroy pool->compact_wq before pool->release_wq, 96962306a36Sopenharmony_ci * as any pending work on pool->compact_wq will call 97062306a36Sopenharmony_ci * queue_work(pool->release_wq, &pool->work). 97162306a36Sopenharmony_ci * 97262306a36Sopenharmony_ci * There are still outstanding pages until both workqueues are drained, 97362306a36Sopenharmony_ci * so we cannot unregister migration until then. 97462306a36Sopenharmony_ci */ 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci destroy_workqueue(pool->compact_wq); 97762306a36Sopenharmony_ci destroy_workqueue(pool->release_wq); 97862306a36Sopenharmony_ci free_percpu(pool->unbuddied); 97962306a36Sopenharmony_ci kfree(pool); 98062306a36Sopenharmony_ci} 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_cistatic const struct movable_operations z3fold_mops; 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci/** 98562306a36Sopenharmony_ci * z3fold_alloc() - allocates a region of a given size 98662306a36Sopenharmony_ci * @pool: z3fold pool from which to allocate 98762306a36Sopenharmony_ci * @size: size in bytes of the desired allocation 98862306a36Sopenharmony_ci * @gfp: gfp flags used if the pool needs to grow 98962306a36Sopenharmony_ci * @handle: handle of the new allocation 99062306a36Sopenharmony_ci * 99162306a36Sopenharmony_ci * This function will attempt to find a free region in the pool large enough to 99262306a36Sopenharmony_ci * satisfy the allocation request. A search of the unbuddied lists is 99362306a36Sopenharmony_ci * performed first. If no suitable free region is found, then a new page is 99462306a36Sopenharmony_ci * allocated and added to the pool to satisfy the request. 99562306a36Sopenharmony_ci * 99662306a36Sopenharmony_ci * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 99762306a36Sopenharmony_ci * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 99862306a36Sopenharmony_ci * a new page. 99962306a36Sopenharmony_ci */ 100062306a36Sopenharmony_cistatic int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 100162306a36Sopenharmony_ci unsigned long *handle) 100262306a36Sopenharmony_ci{ 100362306a36Sopenharmony_ci int chunks = size_to_chunks(size); 100462306a36Sopenharmony_ci struct z3fold_header *zhdr = NULL; 100562306a36Sopenharmony_ci struct page *page = NULL; 100662306a36Sopenharmony_ci enum buddy bud; 100762306a36Sopenharmony_ci bool can_sleep = gfpflags_allow_blocking(gfp); 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci if (!size || (gfp & __GFP_HIGHMEM)) 101062306a36Sopenharmony_ci return -EINVAL; 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci if (size > PAGE_SIZE) 101362306a36Sopenharmony_ci return -ENOSPC; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 101662306a36Sopenharmony_ci bud = HEADLESS; 101762306a36Sopenharmony_ci else { 101862306a36Sopenharmony_ciretry: 101962306a36Sopenharmony_ci zhdr = __z3fold_alloc(pool, size, can_sleep); 102062306a36Sopenharmony_ci if (zhdr) { 102162306a36Sopenharmony_ci bud = get_free_buddy(zhdr, chunks); 102262306a36Sopenharmony_ci if (bud == HEADLESS) { 102362306a36Sopenharmony_ci if (!put_z3fold_locked(zhdr)) 102462306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 102562306a36Sopenharmony_ci pr_err("No free chunks in unbuddied\n"); 102662306a36Sopenharmony_ci WARN_ON(1); 102762306a36Sopenharmony_ci goto retry; 102862306a36Sopenharmony_ci } 102962306a36Sopenharmony_ci page = virt_to_page(zhdr); 103062306a36Sopenharmony_ci goto found; 103162306a36Sopenharmony_ci } 103262306a36Sopenharmony_ci bud = FIRST; 103362306a36Sopenharmony_ci } 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci page = alloc_page(gfp); 103662306a36Sopenharmony_ci if (!page) 103762306a36Sopenharmony_ci return -ENOMEM; 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_ci zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); 104062306a36Sopenharmony_ci if (!zhdr) { 104162306a36Sopenharmony_ci __free_page(page); 104262306a36Sopenharmony_ci return -ENOMEM; 104362306a36Sopenharmony_ci } 104462306a36Sopenharmony_ci atomic64_inc(&pool->pages_nr); 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci if (bud == HEADLESS) { 104762306a36Sopenharmony_ci set_bit(PAGE_HEADLESS, &page->private); 104862306a36Sopenharmony_ci goto headless; 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci if (can_sleep) { 105162306a36Sopenharmony_ci lock_page(page); 105262306a36Sopenharmony_ci __SetPageMovable(page, &z3fold_mops); 105362306a36Sopenharmony_ci unlock_page(page); 105462306a36Sopenharmony_ci } else { 105562306a36Sopenharmony_ci WARN_ON(!trylock_page(page)); 105662306a36Sopenharmony_ci __SetPageMovable(page, &z3fold_mops); 105762306a36Sopenharmony_ci unlock_page(page); 105862306a36Sopenharmony_ci } 105962306a36Sopenharmony_ci z3fold_page_lock(zhdr); 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_cifound: 106262306a36Sopenharmony_ci if (bud == FIRST) 106362306a36Sopenharmony_ci zhdr->first_chunks = chunks; 106462306a36Sopenharmony_ci else if (bud == LAST) 106562306a36Sopenharmony_ci zhdr->last_chunks = chunks; 106662306a36Sopenharmony_ci else { 106762306a36Sopenharmony_ci zhdr->middle_chunks = chunks; 106862306a36Sopenharmony_ci zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 106962306a36Sopenharmony_ci } 107062306a36Sopenharmony_ci add_to_unbuddied(pool, zhdr); 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ciheadless: 107362306a36Sopenharmony_ci spin_lock(&pool->lock); 107462306a36Sopenharmony_ci *handle = encode_handle(zhdr, bud); 107562306a36Sopenharmony_ci spin_unlock(&pool->lock); 107662306a36Sopenharmony_ci if (bud != HEADLESS) 107762306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci return 0; 108062306a36Sopenharmony_ci} 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci/** 108362306a36Sopenharmony_ci * z3fold_free() - frees the allocation associated with the given handle 108462306a36Sopenharmony_ci * @pool: pool in which the allocation resided 108562306a36Sopenharmony_ci * @handle: handle associated with the allocation returned by z3fold_alloc() 108662306a36Sopenharmony_ci * 108762306a36Sopenharmony_ci * In the case that the z3fold page in which the allocation resides is under 108862306a36Sopenharmony_ci * reclaim, as indicated by the PAGE_CLAIMED flag being set, this function 108962306a36Sopenharmony_ci * only sets the first|middle|last_chunks to 0. The page is actually freed 109062306a36Sopenharmony_ci * once all buddies are evicted (see z3fold_reclaim_page() below). 109162306a36Sopenharmony_ci */ 109262306a36Sopenharmony_cistatic void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 109362306a36Sopenharmony_ci{ 109462306a36Sopenharmony_ci struct z3fold_header *zhdr; 109562306a36Sopenharmony_ci struct page *page; 109662306a36Sopenharmony_ci enum buddy bud; 109762306a36Sopenharmony_ci bool page_claimed; 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci zhdr = get_z3fold_header(handle); 110062306a36Sopenharmony_ci page = virt_to_page(zhdr); 110162306a36Sopenharmony_ci page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) { 110462306a36Sopenharmony_ci /* if a headless page is under reclaim, just leave. 110562306a36Sopenharmony_ci * NB: we use test_and_set_bit for a reason: if the bit 110662306a36Sopenharmony_ci * has not been set before, we release this page 110762306a36Sopenharmony_ci * immediately so we don't care about its value any more. 110862306a36Sopenharmony_ci */ 110962306a36Sopenharmony_ci if (!page_claimed) { 111062306a36Sopenharmony_ci put_z3fold_header(zhdr); 111162306a36Sopenharmony_ci free_z3fold_page(page, true); 111262306a36Sopenharmony_ci atomic64_dec(&pool->pages_nr); 111362306a36Sopenharmony_ci } 111462306a36Sopenharmony_ci return; 111562306a36Sopenharmony_ci } 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci /* Non-headless case */ 111862306a36Sopenharmony_ci bud = handle_to_buddy(handle); 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_ci switch (bud) { 112162306a36Sopenharmony_ci case FIRST: 112262306a36Sopenharmony_ci zhdr->first_chunks = 0; 112362306a36Sopenharmony_ci break; 112462306a36Sopenharmony_ci case MIDDLE: 112562306a36Sopenharmony_ci zhdr->middle_chunks = 0; 112662306a36Sopenharmony_ci break; 112762306a36Sopenharmony_ci case LAST: 112862306a36Sopenharmony_ci zhdr->last_chunks = 0; 112962306a36Sopenharmony_ci break; 113062306a36Sopenharmony_ci default: 113162306a36Sopenharmony_ci pr_err("%s: unknown bud %d\n", __func__, bud); 113262306a36Sopenharmony_ci WARN_ON(1); 113362306a36Sopenharmony_ci put_z3fold_header(zhdr); 113462306a36Sopenharmony_ci return; 113562306a36Sopenharmony_ci } 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci if (!page_claimed) 113862306a36Sopenharmony_ci free_handle(handle, zhdr); 113962306a36Sopenharmony_ci if (put_z3fold_locked_list(zhdr)) 114062306a36Sopenharmony_ci return; 114162306a36Sopenharmony_ci if (page_claimed) { 114262306a36Sopenharmony_ci /* the page has not been claimed by us */ 114362306a36Sopenharmony_ci put_z3fold_header(zhdr); 114462306a36Sopenharmony_ci return; 114562306a36Sopenharmony_ci } 114662306a36Sopenharmony_ci if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 114762306a36Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 114862306a36Sopenharmony_ci put_z3fold_header(zhdr); 114962306a36Sopenharmony_ci return; 115062306a36Sopenharmony_ci } 115162306a36Sopenharmony_ci if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 115262306a36Sopenharmony_ci zhdr->cpu = -1; 115362306a36Sopenharmony_ci kref_get(&zhdr->refcount); 115462306a36Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 115562306a36Sopenharmony_ci do_compact_page(zhdr, true); 115662306a36Sopenharmony_ci return; 115762306a36Sopenharmony_ci } 115862306a36Sopenharmony_ci kref_get(&zhdr->refcount); 115962306a36Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 116062306a36Sopenharmony_ci queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 116162306a36Sopenharmony_ci put_z3fold_header(zhdr); 116262306a36Sopenharmony_ci} 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci/** 116562306a36Sopenharmony_ci * z3fold_map() - maps the allocation associated with the given handle 116662306a36Sopenharmony_ci * @pool: pool in which the allocation resides 116762306a36Sopenharmony_ci * @handle: handle associated with the allocation to be mapped 116862306a36Sopenharmony_ci * 116962306a36Sopenharmony_ci * Extracts the buddy number from handle and constructs the pointer to the 117062306a36Sopenharmony_ci * correct starting chunk within the page. 117162306a36Sopenharmony_ci * 117262306a36Sopenharmony_ci * Returns: a pointer to the mapped allocation 117362306a36Sopenharmony_ci */ 117462306a36Sopenharmony_cistatic void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 117562306a36Sopenharmony_ci{ 117662306a36Sopenharmony_ci struct z3fold_header *zhdr; 117762306a36Sopenharmony_ci struct page *page; 117862306a36Sopenharmony_ci void *addr; 117962306a36Sopenharmony_ci enum buddy buddy; 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci zhdr = get_z3fold_header(handle); 118262306a36Sopenharmony_ci addr = zhdr; 118362306a36Sopenharmony_ci page = virt_to_page(zhdr); 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) 118662306a36Sopenharmony_ci goto out; 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci buddy = handle_to_buddy(handle); 118962306a36Sopenharmony_ci switch (buddy) { 119062306a36Sopenharmony_ci case FIRST: 119162306a36Sopenharmony_ci addr += ZHDR_SIZE_ALIGNED; 119262306a36Sopenharmony_ci break; 119362306a36Sopenharmony_ci case MIDDLE: 119462306a36Sopenharmony_ci addr += zhdr->start_middle << CHUNK_SHIFT; 119562306a36Sopenharmony_ci set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 119662306a36Sopenharmony_ci break; 119762306a36Sopenharmony_ci case LAST: 119862306a36Sopenharmony_ci addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 119962306a36Sopenharmony_ci break; 120062306a36Sopenharmony_ci default: 120162306a36Sopenharmony_ci pr_err("unknown buddy id %d\n", buddy); 120262306a36Sopenharmony_ci WARN_ON(1); 120362306a36Sopenharmony_ci addr = NULL; 120462306a36Sopenharmony_ci break; 120562306a36Sopenharmony_ci } 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci if (addr) 120862306a36Sopenharmony_ci zhdr->mapped_count++; 120962306a36Sopenharmony_ciout: 121062306a36Sopenharmony_ci put_z3fold_header(zhdr); 121162306a36Sopenharmony_ci return addr; 121262306a36Sopenharmony_ci} 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci/** 121562306a36Sopenharmony_ci * z3fold_unmap() - unmaps the allocation associated with the given handle 121662306a36Sopenharmony_ci * @pool: pool in which the allocation resides 121762306a36Sopenharmony_ci * @handle: handle associated with the allocation to be unmapped 121862306a36Sopenharmony_ci */ 121962306a36Sopenharmony_cistatic void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 122062306a36Sopenharmony_ci{ 122162306a36Sopenharmony_ci struct z3fold_header *zhdr; 122262306a36Sopenharmony_ci struct page *page; 122362306a36Sopenharmony_ci enum buddy buddy; 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci zhdr = get_z3fold_header(handle); 122662306a36Sopenharmony_ci page = virt_to_page(zhdr); 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) 122962306a36Sopenharmony_ci return; 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci buddy = handle_to_buddy(handle); 123262306a36Sopenharmony_ci if (buddy == MIDDLE) 123362306a36Sopenharmony_ci clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 123462306a36Sopenharmony_ci zhdr->mapped_count--; 123562306a36Sopenharmony_ci put_z3fold_header(zhdr); 123662306a36Sopenharmony_ci} 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci/** 123962306a36Sopenharmony_ci * z3fold_get_pool_size() - gets the z3fold pool size in pages 124062306a36Sopenharmony_ci * @pool: pool whose size is being queried 124162306a36Sopenharmony_ci * 124262306a36Sopenharmony_ci * Returns: size in pages of the given pool. 124362306a36Sopenharmony_ci */ 124462306a36Sopenharmony_cistatic u64 z3fold_get_pool_size(struct z3fold_pool *pool) 124562306a36Sopenharmony_ci{ 124662306a36Sopenharmony_ci return atomic64_read(&pool->pages_nr); 124762306a36Sopenharmony_ci} 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_cistatic bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 125062306a36Sopenharmony_ci{ 125162306a36Sopenharmony_ci struct z3fold_header *zhdr; 125262306a36Sopenharmony_ci struct z3fold_pool *pool; 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci VM_BUG_ON_PAGE(PageIsolated(page), page); 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci if (test_bit(PAGE_HEADLESS, &page->private)) 125762306a36Sopenharmony_ci return false; 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci zhdr = page_address(page); 126062306a36Sopenharmony_ci z3fold_page_lock(zhdr); 126162306a36Sopenharmony_ci if (test_bit(NEEDS_COMPACTING, &page->private) || 126262306a36Sopenharmony_ci test_bit(PAGE_STALE, &page->private)) 126362306a36Sopenharmony_ci goto out; 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) 126662306a36Sopenharmony_ci goto out; 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 126962306a36Sopenharmony_ci goto out; 127062306a36Sopenharmony_ci pool = zhdr_to_pool(zhdr); 127162306a36Sopenharmony_ci spin_lock(&pool->lock); 127262306a36Sopenharmony_ci if (!list_empty(&zhdr->buddy)) 127362306a36Sopenharmony_ci list_del_init(&zhdr->buddy); 127462306a36Sopenharmony_ci spin_unlock(&pool->lock); 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci kref_get(&zhdr->refcount); 127762306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 127862306a36Sopenharmony_ci return true; 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ciout: 128162306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 128262306a36Sopenharmony_ci return false; 128362306a36Sopenharmony_ci} 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_cistatic int z3fold_page_migrate(struct page *newpage, struct page *page, 128662306a36Sopenharmony_ci enum migrate_mode mode) 128762306a36Sopenharmony_ci{ 128862306a36Sopenharmony_ci struct z3fold_header *zhdr, *new_zhdr; 128962306a36Sopenharmony_ci struct z3fold_pool *pool; 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci VM_BUG_ON_PAGE(!PageIsolated(page), page); 129262306a36Sopenharmony_ci VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page); 129362306a36Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 129462306a36Sopenharmony_ci 129562306a36Sopenharmony_ci zhdr = page_address(page); 129662306a36Sopenharmony_ci pool = zhdr_to_pool(zhdr); 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci if (!z3fold_page_trylock(zhdr)) 129962306a36Sopenharmony_ci return -EAGAIN; 130062306a36Sopenharmony_ci if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { 130162306a36Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 130262306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 130362306a36Sopenharmony_ci return -EBUSY; 130462306a36Sopenharmony_ci } 130562306a36Sopenharmony_ci if (work_pending(&zhdr->work)) { 130662306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 130762306a36Sopenharmony_ci return -EAGAIN; 130862306a36Sopenharmony_ci } 130962306a36Sopenharmony_ci new_zhdr = page_address(newpage); 131062306a36Sopenharmony_ci memcpy(new_zhdr, zhdr, PAGE_SIZE); 131162306a36Sopenharmony_ci newpage->private = page->private; 131262306a36Sopenharmony_ci set_bit(PAGE_MIGRATED, &page->private); 131362306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 131462306a36Sopenharmony_ci spin_lock_init(&new_zhdr->page_lock); 131562306a36Sopenharmony_ci INIT_WORK(&new_zhdr->work, compact_page_work); 131662306a36Sopenharmony_ci /* 131762306a36Sopenharmony_ci * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 131862306a36Sopenharmony_ci * so we only have to reinitialize it. 131962306a36Sopenharmony_ci */ 132062306a36Sopenharmony_ci INIT_LIST_HEAD(&new_zhdr->buddy); 132162306a36Sopenharmony_ci __ClearPageMovable(page); 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci get_page(newpage); 132462306a36Sopenharmony_ci z3fold_page_lock(new_zhdr); 132562306a36Sopenharmony_ci if (new_zhdr->first_chunks) 132662306a36Sopenharmony_ci encode_handle(new_zhdr, FIRST); 132762306a36Sopenharmony_ci if (new_zhdr->last_chunks) 132862306a36Sopenharmony_ci encode_handle(new_zhdr, LAST); 132962306a36Sopenharmony_ci if (new_zhdr->middle_chunks) 133062306a36Sopenharmony_ci encode_handle(new_zhdr, MIDDLE); 133162306a36Sopenharmony_ci set_bit(NEEDS_COMPACTING, &newpage->private); 133262306a36Sopenharmony_ci new_zhdr->cpu = smp_processor_id(); 133362306a36Sopenharmony_ci __SetPageMovable(newpage, &z3fold_mops); 133462306a36Sopenharmony_ci z3fold_page_unlock(new_zhdr); 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci /* PAGE_CLAIMED and PAGE_MIGRATED are cleared now. */ 133962306a36Sopenharmony_ci page->private = 0; 134062306a36Sopenharmony_ci put_page(page); 134162306a36Sopenharmony_ci return 0; 134262306a36Sopenharmony_ci} 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_cistatic void z3fold_page_putback(struct page *page) 134562306a36Sopenharmony_ci{ 134662306a36Sopenharmony_ci struct z3fold_header *zhdr; 134762306a36Sopenharmony_ci struct z3fold_pool *pool; 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci zhdr = page_address(page); 135062306a36Sopenharmony_ci pool = zhdr_to_pool(zhdr); 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci z3fold_page_lock(zhdr); 135362306a36Sopenharmony_ci if (!list_empty(&zhdr->buddy)) 135462306a36Sopenharmony_ci list_del_init(&zhdr->buddy); 135562306a36Sopenharmony_ci INIT_LIST_HEAD(&page->lru); 135662306a36Sopenharmony_ci if (put_z3fold_locked(zhdr)) 135762306a36Sopenharmony_ci return; 135862306a36Sopenharmony_ci if (list_empty(&zhdr->buddy)) 135962306a36Sopenharmony_ci add_to_unbuddied(pool, zhdr); 136062306a36Sopenharmony_ci clear_bit(PAGE_CLAIMED, &page->private); 136162306a36Sopenharmony_ci z3fold_page_unlock(zhdr); 136262306a36Sopenharmony_ci} 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_cistatic const struct movable_operations z3fold_mops = { 136562306a36Sopenharmony_ci .isolate_page = z3fold_page_isolate, 136662306a36Sopenharmony_ci .migrate_page = z3fold_page_migrate, 136762306a36Sopenharmony_ci .putback_page = z3fold_page_putback, 136862306a36Sopenharmony_ci}; 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci/***************** 137162306a36Sopenharmony_ci * zpool 137262306a36Sopenharmony_ci ****************/ 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_cistatic void *z3fold_zpool_create(const char *name, gfp_t gfp) 137562306a36Sopenharmony_ci{ 137662306a36Sopenharmony_ci return z3fold_create_pool(name, gfp); 137762306a36Sopenharmony_ci} 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_cistatic void z3fold_zpool_destroy(void *pool) 138062306a36Sopenharmony_ci{ 138162306a36Sopenharmony_ci z3fold_destroy_pool(pool); 138262306a36Sopenharmony_ci} 138362306a36Sopenharmony_ci 138462306a36Sopenharmony_cistatic int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 138562306a36Sopenharmony_ci unsigned long *handle) 138662306a36Sopenharmony_ci{ 138762306a36Sopenharmony_ci return z3fold_alloc(pool, size, gfp, handle); 138862306a36Sopenharmony_ci} 138962306a36Sopenharmony_cistatic void z3fold_zpool_free(void *pool, unsigned long handle) 139062306a36Sopenharmony_ci{ 139162306a36Sopenharmony_ci z3fold_free(pool, handle); 139262306a36Sopenharmony_ci} 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_cistatic void *z3fold_zpool_map(void *pool, unsigned long handle, 139562306a36Sopenharmony_ci enum zpool_mapmode mm) 139662306a36Sopenharmony_ci{ 139762306a36Sopenharmony_ci return z3fold_map(pool, handle); 139862306a36Sopenharmony_ci} 139962306a36Sopenharmony_cistatic void z3fold_zpool_unmap(void *pool, unsigned long handle) 140062306a36Sopenharmony_ci{ 140162306a36Sopenharmony_ci z3fold_unmap(pool, handle); 140262306a36Sopenharmony_ci} 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_cistatic u64 z3fold_zpool_total_size(void *pool) 140562306a36Sopenharmony_ci{ 140662306a36Sopenharmony_ci return z3fold_get_pool_size(pool) * PAGE_SIZE; 140762306a36Sopenharmony_ci} 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_cistatic struct zpool_driver z3fold_zpool_driver = { 141062306a36Sopenharmony_ci .type = "z3fold", 141162306a36Sopenharmony_ci .sleep_mapped = true, 141262306a36Sopenharmony_ci .owner = THIS_MODULE, 141362306a36Sopenharmony_ci .create = z3fold_zpool_create, 141462306a36Sopenharmony_ci .destroy = z3fold_zpool_destroy, 141562306a36Sopenharmony_ci .malloc = z3fold_zpool_malloc, 141662306a36Sopenharmony_ci .free = z3fold_zpool_free, 141762306a36Sopenharmony_ci .map = z3fold_zpool_map, 141862306a36Sopenharmony_ci .unmap = z3fold_zpool_unmap, 141962306a36Sopenharmony_ci .total_size = z3fold_zpool_total_size, 142062306a36Sopenharmony_ci}; 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_ciMODULE_ALIAS("zpool-z3fold"); 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_cistatic int __init init_z3fold(void) 142562306a36Sopenharmony_ci{ 142662306a36Sopenharmony_ci /* 142762306a36Sopenharmony_ci * Make sure the z3fold header is not larger than the page size and 142862306a36Sopenharmony_ci * there has remaining spaces for its buddy. 142962306a36Sopenharmony_ci */ 143062306a36Sopenharmony_ci BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); 143162306a36Sopenharmony_ci zpool_register_driver(&z3fold_zpool_driver); 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci return 0; 143462306a36Sopenharmony_ci} 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_cistatic void __exit exit_z3fold(void) 143762306a36Sopenharmony_ci{ 143862306a36Sopenharmony_ci zpool_unregister_driver(&z3fold_zpool_driver); 143962306a36Sopenharmony_ci} 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_cimodule_init(init_z3fold); 144262306a36Sopenharmony_cimodule_exit(exit_z3fold); 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 144562306a36Sopenharmony_ciMODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 144662306a36Sopenharmony_ciMODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1447