18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * zbud.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2013, Seth Jennings, IBM
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Concepts based on zcache internal zbud allocator by Dan Magenheimer.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * zbud is an special purpose allocator for storing compressed pages.  Contrary
108c2ecf20Sopenharmony_ci * to what its name may suggest, zbud is not a buddy allocator, but rather an
118c2ecf20Sopenharmony_ci * allocator that "buddies" two compressed pages together in a single memory
128c2ecf20Sopenharmony_ci * page.
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * While this design limits storage density, it has simple and deterministic
158c2ecf20Sopenharmony_ci * reclaim properties that make it preferable to a higher density approach when
168c2ecf20Sopenharmony_ci * reclaim will be used.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * zbud works by storing compressed pages, or "zpages", together in pairs in a
198c2ecf20Sopenharmony_ci * single memory page called a "zbud page".  The first buddy is "left
208c2ecf20Sopenharmony_ci * justified" at the beginning of the zbud page, and the last buddy is "right
218c2ecf20Sopenharmony_ci * justified" at the end of the zbud page.  The benefit is that if either
228c2ecf20Sopenharmony_ci * buddy is freed, the freed buddy space, coalesced with whatever slack space
238c2ecf20Sopenharmony_ci * that existed between the buddies, results in the largest possible free region
248c2ecf20Sopenharmony_ci * within the zbud page.
258c2ecf20Sopenharmony_ci *
268c2ecf20Sopenharmony_ci * zbud also provides an attractive lower bound on density. The ratio of zpages
278c2ecf20Sopenharmony_ci * to zbud pages can not be less than 1.  This ensures that zbud can never "do
288c2ecf20Sopenharmony_ci * harm" by using more pages to store zpages than the uncompressed zpages would
298c2ecf20Sopenharmony_ci * have used on their own.
308c2ecf20Sopenharmony_ci *
318c2ecf20Sopenharmony_ci * zbud pages are divided into "chunks".  The size of the chunks is fixed at
328c2ecf20Sopenharmony_ci * compile time and determined by NCHUNKS_ORDER below.  Dividing zbud pages
338c2ecf20Sopenharmony_ci * into chunks allows organizing unbuddied zbud pages into a manageable number
348c2ecf20Sopenharmony_ci * of unbuddied lists according to the number of free chunks available in the
358c2ecf20Sopenharmony_ci * zbud page.
368c2ecf20Sopenharmony_ci *
378c2ecf20Sopenharmony_ci * The zbud API differs from that of conventional allocators in that the
388c2ecf20Sopenharmony_ci * allocation function, zbud_alloc(), returns an opaque handle to the user,
398c2ecf20Sopenharmony_ci * not a dereferenceable pointer.  The user must map the handle using
408c2ecf20Sopenharmony_ci * zbud_map() in order to get a usable pointer by which to access the
418c2ecf20Sopenharmony_ci * allocation data and unmap the handle with zbud_unmap() when operations
428c2ecf20Sopenharmony_ci * on the allocation data are complete.
438c2ecf20Sopenharmony_ci */
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci#include <linux/atomic.h>
488c2ecf20Sopenharmony_ci#include <linux/list.h>
498c2ecf20Sopenharmony_ci#include <linux/mm.h>
508c2ecf20Sopenharmony_ci#include <linux/module.h>
518c2ecf20Sopenharmony_ci#include <linux/preempt.h>
528c2ecf20Sopenharmony_ci#include <linux/slab.h>
538c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
548c2ecf20Sopenharmony_ci#include <linux/zbud.h>
558c2ecf20Sopenharmony_ci#include <linux/zpool.h>
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci/*****************
588c2ecf20Sopenharmony_ci * Structures
598c2ecf20Sopenharmony_ci*****************/
608c2ecf20Sopenharmony_ci/*
618c2ecf20Sopenharmony_ci * NCHUNKS_ORDER determines the internal allocation granularity, effectively
628c2ecf20Sopenharmony_ci * adjusting internal fragmentation.  It also determines the number of
638c2ecf20Sopenharmony_ci * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
648c2ecf20Sopenharmony_ci * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk
658c2ecf20Sopenharmony_ci * in allocated page is occupied by zbud header, NCHUNKS will be calculated to
668c2ecf20Sopenharmony_ci * 63 which shows the max number of free chunks in zbud page, also there will be
678c2ecf20Sopenharmony_ci * 63 freelists per pool.
688c2ecf20Sopenharmony_ci */
698c2ecf20Sopenharmony_ci#define NCHUNKS_ORDER	6
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci#define CHUNK_SHIFT	(PAGE_SHIFT - NCHUNKS_ORDER)
728c2ecf20Sopenharmony_ci#define CHUNK_SIZE	(1 << CHUNK_SHIFT)
738c2ecf20Sopenharmony_ci#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
748c2ecf20Sopenharmony_ci#define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci/**
778c2ecf20Sopenharmony_ci * struct zbud_pool - stores metadata for each zbud pool
788c2ecf20Sopenharmony_ci * @lock:	protects all pool fields and first|last_chunk fields of any
798c2ecf20Sopenharmony_ci *		zbud page in the pool
808c2ecf20Sopenharmony_ci * @unbuddied:	array of lists tracking zbud pages that only contain one buddy;
818c2ecf20Sopenharmony_ci *		the lists each zbud page is added to depends on the size of
828c2ecf20Sopenharmony_ci *		its free region.
838c2ecf20Sopenharmony_ci * @buddied:	list tracking the zbud pages that contain two buddies;
848c2ecf20Sopenharmony_ci *		these zbud pages are full
858c2ecf20Sopenharmony_ci * @lru:	list tracking the zbud pages in LRU order by most recently
868c2ecf20Sopenharmony_ci *		added buddy.
878c2ecf20Sopenharmony_ci * @pages_nr:	number of zbud pages in the pool.
888c2ecf20Sopenharmony_ci * @ops:	pointer to a structure of user defined operations specified at
898c2ecf20Sopenharmony_ci *		pool creation time.
908c2ecf20Sopenharmony_ci *
918c2ecf20Sopenharmony_ci * This structure is allocated at pool creation time and maintains metadata
928c2ecf20Sopenharmony_ci * pertaining to a particular zbud pool.
938c2ecf20Sopenharmony_ci */
948c2ecf20Sopenharmony_cistruct zbud_pool {
958c2ecf20Sopenharmony_ci	spinlock_t lock;
968c2ecf20Sopenharmony_ci	struct list_head unbuddied[NCHUNKS];
978c2ecf20Sopenharmony_ci	struct list_head buddied;
988c2ecf20Sopenharmony_ci	struct list_head lru;
998c2ecf20Sopenharmony_ci	u64 pages_nr;
1008c2ecf20Sopenharmony_ci	const struct zbud_ops *ops;
1018c2ecf20Sopenharmony_ci#ifdef CONFIG_ZPOOL
1028c2ecf20Sopenharmony_ci	struct zpool *zpool;
1038c2ecf20Sopenharmony_ci	const struct zpool_ops *zpool_ops;
1048c2ecf20Sopenharmony_ci#endif
1058c2ecf20Sopenharmony_ci};
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci/*
1088c2ecf20Sopenharmony_ci * struct zbud_header - zbud page metadata occupying the first chunk of each
1098c2ecf20Sopenharmony_ci *			zbud page.
1108c2ecf20Sopenharmony_ci * @buddy:	links the zbud page into the unbuddied/buddied lists in the pool
1118c2ecf20Sopenharmony_ci * @lru:	links the zbud page into the lru list in the pool
1128c2ecf20Sopenharmony_ci * @first_chunks:	the size of the first buddy in chunks, 0 if free
1138c2ecf20Sopenharmony_ci * @last_chunks:	the size of the last buddy in chunks, 0 if free
1148c2ecf20Sopenharmony_ci */
1158c2ecf20Sopenharmony_cistruct zbud_header {
1168c2ecf20Sopenharmony_ci	struct list_head buddy;
1178c2ecf20Sopenharmony_ci	struct list_head lru;
1188c2ecf20Sopenharmony_ci	unsigned int first_chunks;
1198c2ecf20Sopenharmony_ci	unsigned int last_chunks;
1208c2ecf20Sopenharmony_ci	bool under_reclaim;
1218c2ecf20Sopenharmony_ci};
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci/*****************
1248c2ecf20Sopenharmony_ci * zpool
1258c2ecf20Sopenharmony_ci ****************/
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci#ifdef CONFIG_ZPOOL
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_cistatic int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
1308c2ecf20Sopenharmony_ci{
1318c2ecf20Sopenharmony_ci	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
1328c2ecf20Sopenharmony_ci		return pool->zpool_ops->evict(pool->zpool, handle);
1338c2ecf20Sopenharmony_ci	else
1348c2ecf20Sopenharmony_ci		return -ENOENT;
1358c2ecf20Sopenharmony_ci}
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_cistatic const struct zbud_ops zbud_zpool_ops = {
1388c2ecf20Sopenharmony_ci	.evict =	zbud_zpool_evict
1398c2ecf20Sopenharmony_ci};
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_cistatic void *zbud_zpool_create(const char *name, gfp_t gfp,
1428c2ecf20Sopenharmony_ci			       const struct zpool_ops *zpool_ops,
1438c2ecf20Sopenharmony_ci			       struct zpool *zpool)
1448c2ecf20Sopenharmony_ci{
1458c2ecf20Sopenharmony_ci	struct zbud_pool *pool;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	pool = zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL);
1488c2ecf20Sopenharmony_ci	if (pool) {
1498c2ecf20Sopenharmony_ci		pool->zpool = zpool;
1508c2ecf20Sopenharmony_ci		pool->zpool_ops = zpool_ops;
1518c2ecf20Sopenharmony_ci	}
1528c2ecf20Sopenharmony_ci	return pool;
1538c2ecf20Sopenharmony_ci}
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_cistatic void zbud_zpool_destroy(void *pool)
1568c2ecf20Sopenharmony_ci{
1578c2ecf20Sopenharmony_ci	zbud_destroy_pool(pool);
1588c2ecf20Sopenharmony_ci}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_cistatic int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
1618c2ecf20Sopenharmony_ci			unsigned long *handle)
1628c2ecf20Sopenharmony_ci{
1638c2ecf20Sopenharmony_ci	return zbud_alloc(pool, size, gfp, handle);
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_cistatic void zbud_zpool_free(void *pool, unsigned long handle)
1668c2ecf20Sopenharmony_ci{
1678c2ecf20Sopenharmony_ci	zbud_free(pool, handle);
1688c2ecf20Sopenharmony_ci}
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_cistatic int zbud_zpool_shrink(void *pool, unsigned int pages,
1718c2ecf20Sopenharmony_ci			unsigned int *reclaimed)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	unsigned int total = 0;
1748c2ecf20Sopenharmony_ci	int ret = -EINVAL;
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	while (total < pages) {
1778c2ecf20Sopenharmony_ci		ret = zbud_reclaim_page(pool, 8);
1788c2ecf20Sopenharmony_ci		if (ret < 0)
1798c2ecf20Sopenharmony_ci			break;
1808c2ecf20Sopenharmony_ci		total++;
1818c2ecf20Sopenharmony_ci	}
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	if (reclaimed)
1848c2ecf20Sopenharmony_ci		*reclaimed = total;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	return ret;
1878c2ecf20Sopenharmony_ci}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_cistatic void *zbud_zpool_map(void *pool, unsigned long handle,
1908c2ecf20Sopenharmony_ci			enum zpool_mapmode mm)
1918c2ecf20Sopenharmony_ci{
1928c2ecf20Sopenharmony_ci	return zbud_map(pool, handle);
1938c2ecf20Sopenharmony_ci}
1948c2ecf20Sopenharmony_cistatic void zbud_zpool_unmap(void *pool, unsigned long handle)
1958c2ecf20Sopenharmony_ci{
1968c2ecf20Sopenharmony_ci	zbud_unmap(pool, handle);
1978c2ecf20Sopenharmony_ci}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_cistatic u64 zbud_zpool_total_size(void *pool)
2008c2ecf20Sopenharmony_ci{
2018c2ecf20Sopenharmony_ci	return zbud_get_pool_size(pool) * PAGE_SIZE;
2028c2ecf20Sopenharmony_ci}
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_cistatic struct zpool_driver zbud_zpool_driver = {
2058c2ecf20Sopenharmony_ci	.type =		"zbud",
2068c2ecf20Sopenharmony_ci	.sleep_mapped = true,
2078c2ecf20Sopenharmony_ci	.owner =	THIS_MODULE,
2088c2ecf20Sopenharmony_ci	.create =	zbud_zpool_create,
2098c2ecf20Sopenharmony_ci	.destroy =	zbud_zpool_destroy,
2108c2ecf20Sopenharmony_ci	.malloc =	zbud_zpool_malloc,
2118c2ecf20Sopenharmony_ci	.free =		zbud_zpool_free,
2128c2ecf20Sopenharmony_ci	.shrink =	zbud_zpool_shrink,
2138c2ecf20Sopenharmony_ci	.map =		zbud_zpool_map,
2148c2ecf20Sopenharmony_ci	.unmap =	zbud_zpool_unmap,
2158c2ecf20Sopenharmony_ci	.total_size =	zbud_zpool_total_size,
2168c2ecf20Sopenharmony_ci};
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ciMODULE_ALIAS("zpool-zbud");
2198c2ecf20Sopenharmony_ci#endif /* CONFIG_ZPOOL */
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci/*****************
2228c2ecf20Sopenharmony_ci * Helpers
2238c2ecf20Sopenharmony_ci*****************/
2248c2ecf20Sopenharmony_ci/* Just to make the code easier to read */
2258c2ecf20Sopenharmony_cienum buddy {
2268c2ecf20Sopenharmony_ci	FIRST,
2278c2ecf20Sopenharmony_ci	LAST
2288c2ecf20Sopenharmony_ci};
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci/* Converts an allocation size in bytes to size in zbud chunks */
2318c2ecf20Sopenharmony_cistatic int size_to_chunks(size_t size)
2328c2ecf20Sopenharmony_ci{
2338c2ecf20Sopenharmony_ci	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
2348c2ecf20Sopenharmony_ci}
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci#define for_each_unbuddied_list(_iter, _begin) \
2378c2ecf20Sopenharmony_ci	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci/* Initializes the zbud header of a newly allocated zbud page */
2408c2ecf20Sopenharmony_cistatic struct zbud_header *init_zbud_page(struct page *page)
2418c2ecf20Sopenharmony_ci{
2428c2ecf20Sopenharmony_ci	struct zbud_header *zhdr = page_address(page);
2438c2ecf20Sopenharmony_ci	zhdr->first_chunks = 0;
2448c2ecf20Sopenharmony_ci	zhdr->last_chunks = 0;
2458c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&zhdr->buddy);
2468c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&zhdr->lru);
2478c2ecf20Sopenharmony_ci	zhdr->under_reclaim = false;
2488c2ecf20Sopenharmony_ci	return zhdr;
2498c2ecf20Sopenharmony_ci}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci/* Resets the struct page fields and frees the page */
2528c2ecf20Sopenharmony_cistatic void free_zbud_page(struct zbud_header *zhdr)
2538c2ecf20Sopenharmony_ci{
2548c2ecf20Sopenharmony_ci	__free_page(virt_to_page(zhdr));
2558c2ecf20Sopenharmony_ci}
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci/*
2588c2ecf20Sopenharmony_ci * Encodes the handle of a particular buddy within a zbud page
2598c2ecf20Sopenharmony_ci * Pool lock should be held as this function accesses first|last_chunks
2608c2ecf20Sopenharmony_ci */
2618c2ecf20Sopenharmony_cistatic unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
2628c2ecf20Sopenharmony_ci{
2638c2ecf20Sopenharmony_ci	unsigned long handle;
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci	/*
2668c2ecf20Sopenharmony_ci	 * For now, the encoded handle is actually just the pointer to the data
2678c2ecf20Sopenharmony_ci	 * but this might not always be the case.  A little information hiding.
2688c2ecf20Sopenharmony_ci	 * Add CHUNK_SIZE to the handle if it is the first allocation to jump
2698c2ecf20Sopenharmony_ci	 * over the zbud header in the first chunk.
2708c2ecf20Sopenharmony_ci	 */
2718c2ecf20Sopenharmony_ci	handle = (unsigned long)zhdr;
2728c2ecf20Sopenharmony_ci	if (bud == FIRST)
2738c2ecf20Sopenharmony_ci		/* skip over zbud header */
2748c2ecf20Sopenharmony_ci		handle += ZHDR_SIZE_ALIGNED;
2758c2ecf20Sopenharmony_ci	else /* bud == LAST */
2768c2ecf20Sopenharmony_ci		handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT);
2778c2ecf20Sopenharmony_ci	return handle;
2788c2ecf20Sopenharmony_ci}
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci/* Returns the zbud page where a given handle is stored */
2818c2ecf20Sopenharmony_cistatic struct zbud_header *handle_to_zbud_header(unsigned long handle)
2828c2ecf20Sopenharmony_ci{
2838c2ecf20Sopenharmony_ci	return (struct zbud_header *)(handle & PAGE_MASK);
2848c2ecf20Sopenharmony_ci}
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci/* Returns the number of free chunks in a zbud page */
2878c2ecf20Sopenharmony_cistatic int num_free_chunks(struct zbud_header *zhdr)
2888c2ecf20Sopenharmony_ci{
2898c2ecf20Sopenharmony_ci	/*
2908c2ecf20Sopenharmony_ci	 * Rather than branch for different situations, just use the fact that
2918c2ecf20Sopenharmony_ci	 * free buddies have a length of zero to simplify everything.
2928c2ecf20Sopenharmony_ci	 */
2938c2ecf20Sopenharmony_ci	return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
2948c2ecf20Sopenharmony_ci}
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci/*****************
2978c2ecf20Sopenharmony_ci * API Functions
2988c2ecf20Sopenharmony_ci*****************/
2998c2ecf20Sopenharmony_ci/**
3008c2ecf20Sopenharmony_ci * zbud_create_pool() - create a new zbud pool
3018c2ecf20Sopenharmony_ci * @gfp:	gfp flags when allocating the zbud pool structure
3028c2ecf20Sopenharmony_ci * @ops:	user-defined operations for the zbud pool
3038c2ecf20Sopenharmony_ci *
3048c2ecf20Sopenharmony_ci * Return: pointer to the new zbud pool or NULL if the metadata allocation
3058c2ecf20Sopenharmony_ci * failed.
3068c2ecf20Sopenharmony_ci */
3078c2ecf20Sopenharmony_cistruct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops)
3088c2ecf20Sopenharmony_ci{
3098c2ecf20Sopenharmony_ci	struct zbud_pool *pool;
3108c2ecf20Sopenharmony_ci	int i;
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci	pool = kzalloc(sizeof(struct zbud_pool), gfp);
3138c2ecf20Sopenharmony_ci	if (!pool)
3148c2ecf20Sopenharmony_ci		return NULL;
3158c2ecf20Sopenharmony_ci	spin_lock_init(&pool->lock);
3168c2ecf20Sopenharmony_ci	for_each_unbuddied_list(i, 0)
3178c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&pool->unbuddied[i]);
3188c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&pool->buddied);
3198c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&pool->lru);
3208c2ecf20Sopenharmony_ci	pool->pages_nr = 0;
3218c2ecf20Sopenharmony_ci	pool->ops = ops;
3228c2ecf20Sopenharmony_ci	return pool;
3238c2ecf20Sopenharmony_ci}
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_ci/**
3268c2ecf20Sopenharmony_ci * zbud_destroy_pool() - destroys an existing zbud pool
3278c2ecf20Sopenharmony_ci * @pool:	the zbud pool to be destroyed
3288c2ecf20Sopenharmony_ci *
3298c2ecf20Sopenharmony_ci * The pool should be emptied before this function is called.
3308c2ecf20Sopenharmony_ci */
3318c2ecf20Sopenharmony_civoid zbud_destroy_pool(struct zbud_pool *pool)
3328c2ecf20Sopenharmony_ci{
3338c2ecf20Sopenharmony_ci	kfree(pool);
3348c2ecf20Sopenharmony_ci}
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci/**
3378c2ecf20Sopenharmony_ci * zbud_alloc() - allocates a region of a given size
3388c2ecf20Sopenharmony_ci * @pool:	zbud pool from which to allocate
3398c2ecf20Sopenharmony_ci * @size:	size in bytes of the desired allocation
3408c2ecf20Sopenharmony_ci * @gfp:	gfp flags used if the pool needs to grow
3418c2ecf20Sopenharmony_ci * @handle:	handle of the new allocation
3428c2ecf20Sopenharmony_ci *
3438c2ecf20Sopenharmony_ci * This function will attempt to find a free region in the pool large enough to
3448c2ecf20Sopenharmony_ci * satisfy the allocation request.  A search of the unbuddied lists is
3458c2ecf20Sopenharmony_ci * performed first. If no suitable free region is found, then a new page is
3468c2ecf20Sopenharmony_ci * allocated and added to the pool to satisfy the request.
3478c2ecf20Sopenharmony_ci *
3488c2ecf20Sopenharmony_ci * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
3498c2ecf20Sopenharmony_ci * as zbud pool pages.
3508c2ecf20Sopenharmony_ci *
3518c2ecf20Sopenharmony_ci * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
3528c2ecf20Sopenharmony_ci * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
3538c2ecf20Sopenharmony_ci * a new page.
3548c2ecf20Sopenharmony_ci */
3558c2ecf20Sopenharmony_ciint zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
3568c2ecf20Sopenharmony_ci			unsigned long *handle)
3578c2ecf20Sopenharmony_ci{
3588c2ecf20Sopenharmony_ci	int chunks, i, freechunks;
3598c2ecf20Sopenharmony_ci	struct zbud_header *zhdr = NULL;
3608c2ecf20Sopenharmony_ci	enum buddy bud;
3618c2ecf20Sopenharmony_ci	struct page *page;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	if (!size || (gfp & __GFP_HIGHMEM))
3648c2ecf20Sopenharmony_ci		return -EINVAL;
3658c2ecf20Sopenharmony_ci	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
3668c2ecf20Sopenharmony_ci		return -ENOSPC;
3678c2ecf20Sopenharmony_ci	chunks = size_to_chunks(size);
3688c2ecf20Sopenharmony_ci	spin_lock(&pool->lock);
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci	/* First, try to find an unbuddied zbud page. */
3718c2ecf20Sopenharmony_ci	for_each_unbuddied_list(i, chunks) {
3728c2ecf20Sopenharmony_ci		if (!list_empty(&pool->unbuddied[i])) {
3738c2ecf20Sopenharmony_ci			zhdr = list_first_entry(&pool->unbuddied[i],
3748c2ecf20Sopenharmony_ci					struct zbud_header, buddy);
3758c2ecf20Sopenharmony_ci			list_del(&zhdr->buddy);
3768c2ecf20Sopenharmony_ci			if (zhdr->first_chunks == 0)
3778c2ecf20Sopenharmony_ci				bud = FIRST;
3788c2ecf20Sopenharmony_ci			else
3798c2ecf20Sopenharmony_ci				bud = LAST;
3808c2ecf20Sopenharmony_ci			goto found;
3818c2ecf20Sopenharmony_ci		}
3828c2ecf20Sopenharmony_ci	}
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci	/* Couldn't find unbuddied zbud page, create new one */
3858c2ecf20Sopenharmony_ci	spin_unlock(&pool->lock);
3868c2ecf20Sopenharmony_ci	page = alloc_page(gfp);
3878c2ecf20Sopenharmony_ci	if (!page)
3888c2ecf20Sopenharmony_ci		return -ENOMEM;
3898c2ecf20Sopenharmony_ci	spin_lock(&pool->lock);
3908c2ecf20Sopenharmony_ci	pool->pages_nr++;
3918c2ecf20Sopenharmony_ci	zhdr = init_zbud_page(page);
3928c2ecf20Sopenharmony_ci	bud = FIRST;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_cifound:
3958c2ecf20Sopenharmony_ci	if (bud == FIRST)
3968c2ecf20Sopenharmony_ci		zhdr->first_chunks = chunks;
3978c2ecf20Sopenharmony_ci	else
3988c2ecf20Sopenharmony_ci		zhdr->last_chunks = chunks;
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
4018c2ecf20Sopenharmony_ci		/* Add to unbuddied list */
4028c2ecf20Sopenharmony_ci		freechunks = num_free_chunks(zhdr);
4038c2ecf20Sopenharmony_ci		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
4048c2ecf20Sopenharmony_ci	} else {
4058c2ecf20Sopenharmony_ci		/* Add to buddied list */
4068c2ecf20Sopenharmony_ci		list_add(&zhdr->buddy, &pool->buddied);
4078c2ecf20Sopenharmony_ci	}
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	/* Add/move zbud page to beginning of LRU */
4108c2ecf20Sopenharmony_ci	if (!list_empty(&zhdr->lru))
4118c2ecf20Sopenharmony_ci		list_del(&zhdr->lru);
4128c2ecf20Sopenharmony_ci	list_add(&zhdr->lru, &pool->lru);
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	*handle = encode_handle(zhdr, bud);
4158c2ecf20Sopenharmony_ci	spin_unlock(&pool->lock);
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	return 0;
4188c2ecf20Sopenharmony_ci}
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci/**
4218c2ecf20Sopenharmony_ci * zbud_free() - frees the allocation associated with the given handle
4228c2ecf20Sopenharmony_ci * @pool:	pool in which the allocation resided
4238c2ecf20Sopenharmony_ci * @handle:	handle associated with the allocation returned by zbud_alloc()
4248c2ecf20Sopenharmony_ci *
4258c2ecf20Sopenharmony_ci * In the case that the zbud page in which the allocation resides is under
4268c2ecf20Sopenharmony_ci * reclaim, as indicated by the PG_reclaim flag being set, this function
4278c2ecf20Sopenharmony_ci * only sets the first|last_chunks to 0.  The page is actually freed
4288c2ecf20Sopenharmony_ci * once both buddies are evicted (see zbud_reclaim_page() below).
4298c2ecf20Sopenharmony_ci */
4308c2ecf20Sopenharmony_civoid zbud_free(struct zbud_pool *pool, unsigned long handle)
4318c2ecf20Sopenharmony_ci{
4328c2ecf20Sopenharmony_ci	struct zbud_header *zhdr;
4338c2ecf20Sopenharmony_ci	int freechunks;
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci	spin_lock(&pool->lock);
4368c2ecf20Sopenharmony_ci	zhdr = handle_to_zbud_header(handle);
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	/* If first buddy, handle will be page aligned */
4398c2ecf20Sopenharmony_ci	if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
4408c2ecf20Sopenharmony_ci		zhdr->last_chunks = 0;
4418c2ecf20Sopenharmony_ci	else
4428c2ecf20Sopenharmony_ci		zhdr->first_chunks = 0;
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	if (zhdr->under_reclaim) {
4458c2ecf20Sopenharmony_ci		/* zbud page is under reclaim, reclaim will free */
4468c2ecf20Sopenharmony_ci		spin_unlock(&pool->lock);
4478c2ecf20Sopenharmony_ci		return;
4488c2ecf20Sopenharmony_ci	}
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci	/* Remove from existing buddy list */
4518c2ecf20Sopenharmony_ci	list_del(&zhdr->buddy);
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
4548c2ecf20Sopenharmony_ci		/* zbud page is empty, free */
4558c2ecf20Sopenharmony_ci		list_del(&zhdr->lru);
4568c2ecf20Sopenharmony_ci		free_zbud_page(zhdr);
4578c2ecf20Sopenharmony_ci		pool->pages_nr--;
4588c2ecf20Sopenharmony_ci	} else {
4598c2ecf20Sopenharmony_ci		/* Add to unbuddied list */
4608c2ecf20Sopenharmony_ci		freechunks = num_free_chunks(zhdr);
4618c2ecf20Sopenharmony_ci		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
4628c2ecf20Sopenharmony_ci	}
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci	spin_unlock(&pool->lock);
4658c2ecf20Sopenharmony_ci}
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci/**
4688c2ecf20Sopenharmony_ci * zbud_reclaim_page() - evicts allocations from a pool page and frees it
4698c2ecf20Sopenharmony_ci * @pool:	pool from which a page will attempt to be evicted
4708c2ecf20Sopenharmony_ci * @retries:	number of pages on the LRU list for which eviction will
4718c2ecf20Sopenharmony_ci *		be attempted before failing
4728c2ecf20Sopenharmony_ci *
4738c2ecf20Sopenharmony_ci * zbud reclaim is different from normal system reclaim in that the reclaim is
4748c2ecf20Sopenharmony_ci * done from the bottom, up.  This is because only the bottom layer, zbud, has
4758c2ecf20Sopenharmony_ci * information on how the allocations are organized within each zbud page. This
4768c2ecf20Sopenharmony_ci * has the potential to create interesting locking situations between zbud and
4778c2ecf20Sopenharmony_ci * the user, however.
4788c2ecf20Sopenharmony_ci *
4798c2ecf20Sopenharmony_ci * To avoid these, this is how zbud_reclaim_page() should be called:
4808c2ecf20Sopenharmony_ci *
4818c2ecf20Sopenharmony_ci * The user detects a page should be reclaimed and calls zbud_reclaim_page().
4828c2ecf20Sopenharmony_ci * zbud_reclaim_page() will remove a zbud page from the pool LRU list and call
4838c2ecf20Sopenharmony_ci * the user-defined eviction handler with the pool and handle as arguments.
4848c2ecf20Sopenharmony_ci *
4858c2ecf20Sopenharmony_ci * If the handle can not be evicted, the eviction handler should return
4868c2ecf20Sopenharmony_ci * non-zero. zbud_reclaim_page() will add the zbud page back to the
4878c2ecf20Sopenharmony_ci * appropriate list and try the next zbud page on the LRU up to
4888c2ecf20Sopenharmony_ci * a user defined number of retries.
4898c2ecf20Sopenharmony_ci *
4908c2ecf20Sopenharmony_ci * If the handle is successfully evicted, the eviction handler should
4918c2ecf20Sopenharmony_ci * return 0 _and_ should have called zbud_free() on the handle. zbud_free()
4928c2ecf20Sopenharmony_ci * contains logic to delay freeing the page if the page is under reclaim,
4938c2ecf20Sopenharmony_ci * as indicated by the setting of the PG_reclaim flag on the underlying page.
4948c2ecf20Sopenharmony_ci *
4958c2ecf20Sopenharmony_ci * If all buddies in the zbud page are successfully evicted, then the
4968c2ecf20Sopenharmony_ci * zbud page can be freed.
4978c2ecf20Sopenharmony_ci *
4988c2ecf20Sopenharmony_ci * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
4998c2ecf20Sopenharmony_ci * no pages to evict or an eviction handler is not registered, -EAGAIN if
5008c2ecf20Sopenharmony_ci * the retry limit was hit.
5018c2ecf20Sopenharmony_ci */
5028c2ecf20Sopenharmony_ciint zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
5038c2ecf20Sopenharmony_ci{
5048c2ecf20Sopenharmony_ci	int i, ret, freechunks;
5058c2ecf20Sopenharmony_ci	struct zbud_header *zhdr;
5068c2ecf20Sopenharmony_ci	unsigned long first_handle = 0, last_handle = 0;
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci	spin_lock(&pool->lock);
5098c2ecf20Sopenharmony_ci	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
5108c2ecf20Sopenharmony_ci			retries == 0) {
5118c2ecf20Sopenharmony_ci		spin_unlock(&pool->lock);
5128c2ecf20Sopenharmony_ci		return -EINVAL;
5138c2ecf20Sopenharmony_ci	}
5148c2ecf20Sopenharmony_ci	for (i = 0; i < retries; i++) {
5158c2ecf20Sopenharmony_ci		zhdr = list_last_entry(&pool->lru, struct zbud_header, lru);
5168c2ecf20Sopenharmony_ci		list_del(&zhdr->lru);
5178c2ecf20Sopenharmony_ci		list_del(&zhdr->buddy);
5188c2ecf20Sopenharmony_ci		/* Protect zbud page against free */
5198c2ecf20Sopenharmony_ci		zhdr->under_reclaim = true;
5208c2ecf20Sopenharmony_ci		/*
5218c2ecf20Sopenharmony_ci		 * We need encode the handles before unlocking, since we can
5228c2ecf20Sopenharmony_ci		 * race with free that will set (first|last)_chunks to 0
5238c2ecf20Sopenharmony_ci		 */
5248c2ecf20Sopenharmony_ci		first_handle = 0;
5258c2ecf20Sopenharmony_ci		last_handle = 0;
5268c2ecf20Sopenharmony_ci		if (zhdr->first_chunks)
5278c2ecf20Sopenharmony_ci			first_handle = encode_handle(zhdr, FIRST);
5288c2ecf20Sopenharmony_ci		if (zhdr->last_chunks)
5298c2ecf20Sopenharmony_ci			last_handle = encode_handle(zhdr, LAST);
5308c2ecf20Sopenharmony_ci		spin_unlock(&pool->lock);
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci		/* Issue the eviction callback(s) */
5338c2ecf20Sopenharmony_ci		if (first_handle) {
5348c2ecf20Sopenharmony_ci			ret = pool->ops->evict(pool, first_handle);
5358c2ecf20Sopenharmony_ci			if (ret)
5368c2ecf20Sopenharmony_ci				goto next;
5378c2ecf20Sopenharmony_ci		}
5388c2ecf20Sopenharmony_ci		if (last_handle) {
5398c2ecf20Sopenharmony_ci			ret = pool->ops->evict(pool, last_handle);
5408c2ecf20Sopenharmony_ci			if (ret)
5418c2ecf20Sopenharmony_ci				goto next;
5428c2ecf20Sopenharmony_ci		}
5438c2ecf20Sopenharmony_cinext:
5448c2ecf20Sopenharmony_ci		spin_lock(&pool->lock);
5458c2ecf20Sopenharmony_ci		zhdr->under_reclaim = false;
5468c2ecf20Sopenharmony_ci		if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
5478c2ecf20Sopenharmony_ci			/*
5488c2ecf20Sopenharmony_ci			 * Both buddies are now free, free the zbud page and
5498c2ecf20Sopenharmony_ci			 * return success.
5508c2ecf20Sopenharmony_ci			 */
5518c2ecf20Sopenharmony_ci			free_zbud_page(zhdr);
5528c2ecf20Sopenharmony_ci			pool->pages_nr--;
5538c2ecf20Sopenharmony_ci			spin_unlock(&pool->lock);
5548c2ecf20Sopenharmony_ci			return 0;
5558c2ecf20Sopenharmony_ci		} else if (zhdr->first_chunks == 0 ||
5568c2ecf20Sopenharmony_ci				zhdr->last_chunks == 0) {
5578c2ecf20Sopenharmony_ci			/* add to unbuddied list */
5588c2ecf20Sopenharmony_ci			freechunks = num_free_chunks(zhdr);
5598c2ecf20Sopenharmony_ci			list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
5608c2ecf20Sopenharmony_ci		} else {
5618c2ecf20Sopenharmony_ci			/* add to buddied list */
5628c2ecf20Sopenharmony_ci			list_add(&zhdr->buddy, &pool->buddied);
5638c2ecf20Sopenharmony_ci		}
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci		/* add to beginning of LRU */
5668c2ecf20Sopenharmony_ci		list_add(&zhdr->lru, &pool->lru);
5678c2ecf20Sopenharmony_ci	}
5688c2ecf20Sopenharmony_ci	spin_unlock(&pool->lock);
5698c2ecf20Sopenharmony_ci	return -EAGAIN;
5708c2ecf20Sopenharmony_ci}
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci/**
5738c2ecf20Sopenharmony_ci * zbud_map() - maps the allocation associated with the given handle
5748c2ecf20Sopenharmony_ci * @pool:	pool in which the allocation resides
5758c2ecf20Sopenharmony_ci * @handle:	handle associated with the allocation to be mapped
5768c2ecf20Sopenharmony_ci *
5778c2ecf20Sopenharmony_ci * While trivial for zbud, the mapping functions for others allocators
5788c2ecf20Sopenharmony_ci * implementing this allocation API could have more complex information encoded
5798c2ecf20Sopenharmony_ci * in the handle and could create temporary mappings to make the data
5808c2ecf20Sopenharmony_ci * accessible to the user.
5818c2ecf20Sopenharmony_ci *
5828c2ecf20Sopenharmony_ci * Returns: a pointer to the mapped allocation
5838c2ecf20Sopenharmony_ci */
5848c2ecf20Sopenharmony_civoid *zbud_map(struct zbud_pool *pool, unsigned long handle)
5858c2ecf20Sopenharmony_ci{
5868c2ecf20Sopenharmony_ci	return (void *)(handle);
5878c2ecf20Sopenharmony_ci}
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci/**
5908c2ecf20Sopenharmony_ci * zbud_unmap() - maps the allocation associated with the given handle
5918c2ecf20Sopenharmony_ci * @pool:	pool in which the allocation resides
5928c2ecf20Sopenharmony_ci * @handle:	handle associated with the allocation to be unmapped
5938c2ecf20Sopenharmony_ci */
5948c2ecf20Sopenharmony_civoid zbud_unmap(struct zbud_pool *pool, unsigned long handle)
5958c2ecf20Sopenharmony_ci{
5968c2ecf20Sopenharmony_ci}
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci/**
5998c2ecf20Sopenharmony_ci * zbud_get_pool_size() - gets the zbud pool size in pages
6008c2ecf20Sopenharmony_ci * @pool:	pool whose size is being queried
6018c2ecf20Sopenharmony_ci *
6028c2ecf20Sopenharmony_ci * Returns: size in pages of the given pool.  The pool lock need not be
6038c2ecf20Sopenharmony_ci * taken to access pages_nr.
6048c2ecf20Sopenharmony_ci */
6058c2ecf20Sopenharmony_ciu64 zbud_get_pool_size(struct zbud_pool *pool)
6068c2ecf20Sopenharmony_ci{
6078c2ecf20Sopenharmony_ci	return pool->pages_nr;
6088c2ecf20Sopenharmony_ci}
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_cistatic int __init init_zbud(void)
6118c2ecf20Sopenharmony_ci{
6128c2ecf20Sopenharmony_ci	/* Make sure the zbud header will fit in one chunk */
6138c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
6148c2ecf20Sopenharmony_ci	pr_info("loaded\n");
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci#ifdef CONFIG_ZPOOL
6178c2ecf20Sopenharmony_ci	zpool_register_driver(&zbud_zpool_driver);
6188c2ecf20Sopenharmony_ci#endif
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	return 0;
6218c2ecf20Sopenharmony_ci}
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_cistatic void __exit exit_zbud(void)
6248c2ecf20Sopenharmony_ci{
6258c2ecf20Sopenharmony_ci#ifdef CONFIG_ZPOOL
6268c2ecf20Sopenharmony_ci	zpool_unregister_driver(&zbud_zpool_driver);
6278c2ecf20Sopenharmony_ci#endif
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	pr_info("unloaded\n");
6308c2ecf20Sopenharmony_ci}
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_cimodule_init(init_zbud);
6338c2ecf20Sopenharmony_cimodule_exit(exit_zbud);
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
6368c2ecf20Sopenharmony_ciMODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
6378c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Buddy Allocator for Compressed Pages");
638