18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci *
38c2ecf20Sopenharmony_ci * page_pool.c
48c2ecf20Sopenharmony_ci *	Author:	Jesper Dangaard Brouer <netoptimizer@brouer.com>
58c2ecf20Sopenharmony_ci *	Copyright (C) 2016 Red Hat, Inc.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/types.h>
98c2ecf20Sopenharmony_ci#include <linux/kernel.h>
108c2ecf20Sopenharmony_ci#include <linux/slab.h>
118c2ecf20Sopenharmony_ci#include <linux/device.h>
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <net/page_pool.h>
148c2ecf20Sopenharmony_ci#include <linux/dma-direction.h>
158c2ecf20Sopenharmony_ci#include <linux/dma-mapping.h>
168c2ecf20Sopenharmony_ci#include <linux/page-flags.h>
178c2ecf20Sopenharmony_ci#include <linux/mm.h> /* for __put_page() */
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#include <trace/events/page_pool.h>
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci#define DEFER_TIME (msecs_to_jiffies(1000))
228c2ecf20Sopenharmony_ci#define DEFER_WARN_INTERVAL (60 * HZ)
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistatic int page_pool_init(struct page_pool *pool,
258c2ecf20Sopenharmony_ci			  const struct page_pool_params *params)
268c2ecf20Sopenharmony_ci{
278c2ecf20Sopenharmony_ci	unsigned int ring_qsize = 1024; /* Default */
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	memcpy(&pool->p, params, sizeof(pool->p));
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	/* Validate only known flags were used */
328c2ecf20Sopenharmony_ci	if (pool->p.flags & ~(PP_FLAG_ALL))
338c2ecf20Sopenharmony_ci		return -EINVAL;
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	if (pool->p.pool_size)
368c2ecf20Sopenharmony_ci		ring_qsize = pool->p.pool_size;
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci	/* Sanity limit mem that can be pinned down */
398c2ecf20Sopenharmony_ci	if (ring_qsize > 32768)
408c2ecf20Sopenharmony_ci		return -E2BIG;
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci	/* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
438c2ecf20Sopenharmony_ci	 * DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
448c2ecf20Sopenharmony_ci	 * which is the XDP_TX use-case.
458c2ecf20Sopenharmony_ci	 */
468c2ecf20Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_MAP) {
478c2ecf20Sopenharmony_ci		if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
488c2ecf20Sopenharmony_ci		    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
498c2ecf20Sopenharmony_ci			return -EINVAL;
508c2ecf20Sopenharmony_ci	}
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
538c2ecf20Sopenharmony_ci		/* In order to request DMA-sync-for-device the page
548c2ecf20Sopenharmony_ci		 * needs to be mapped
558c2ecf20Sopenharmony_ci		 */
568c2ecf20Sopenharmony_ci		if (!(pool->p.flags & PP_FLAG_DMA_MAP))
578c2ecf20Sopenharmony_ci			return -EINVAL;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci		if (!pool->p.max_len)
608c2ecf20Sopenharmony_ci			return -EINVAL;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci		/* pool->p.offset has to be set according to the address
638c2ecf20Sopenharmony_ci		 * offset used by the DMA engine to start copying rx data
648c2ecf20Sopenharmony_ci		 */
658c2ecf20Sopenharmony_ci	}
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
688c2ecf20Sopenharmony_ci		return -ENOMEM;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	atomic_set(&pool->pages_state_release_cnt, 0);
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci	/* Driver calling page_pool_create() also call page_pool_destroy() */
738c2ecf20Sopenharmony_ci	refcount_set(&pool->user_cnt, 1);
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_MAP)
768c2ecf20Sopenharmony_ci		get_device(pool->p.dev);
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	return 0;
798c2ecf20Sopenharmony_ci}
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_cistruct page_pool *page_pool_create(const struct page_pool_params *params)
828c2ecf20Sopenharmony_ci{
838c2ecf20Sopenharmony_ci	struct page_pool *pool;
848c2ecf20Sopenharmony_ci	int err;
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
878c2ecf20Sopenharmony_ci	if (!pool)
888c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	err = page_pool_init(pool, params);
918c2ecf20Sopenharmony_ci	if (err < 0) {
928c2ecf20Sopenharmony_ci		pr_warn("%s() gave up with errno %d\n", __func__, err);
938c2ecf20Sopenharmony_ci		kfree(pool);
948c2ecf20Sopenharmony_ci		return ERR_PTR(err);
958c2ecf20Sopenharmony_ci	}
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	return pool;
988c2ecf20Sopenharmony_ci}
998c2ecf20Sopenharmony_ciEXPORT_SYMBOL(page_pool_create);
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_cistatic void page_pool_return_page(struct page_pool *pool, struct page *page);
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_cinoinline
1048c2ecf20Sopenharmony_cistatic struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
1058c2ecf20Sopenharmony_ci{
1068c2ecf20Sopenharmony_ci	struct ptr_ring *r = &pool->ring;
1078c2ecf20Sopenharmony_ci	struct page *page;
1088c2ecf20Sopenharmony_ci	int pref_nid; /* preferred NUMA node */
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	/* Quicker fallback, avoid locks when ring is empty */
1118c2ecf20Sopenharmony_ci	if (__ptr_ring_empty(r))
1128c2ecf20Sopenharmony_ci		return NULL;
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	/* Softirq guarantee CPU and thus NUMA node is stable. This,
1158c2ecf20Sopenharmony_ci	 * assumes CPU refilling driver RX-ring will also run RX-NAPI.
1168c2ecf20Sopenharmony_ci	 */
1178c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA
1188c2ecf20Sopenharmony_ci	pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
1198c2ecf20Sopenharmony_ci#else
1208c2ecf20Sopenharmony_ci	/* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
1218c2ecf20Sopenharmony_ci	pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
1228c2ecf20Sopenharmony_ci#endif
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	/* Slower-path: Get pages from locked ring queue */
1258c2ecf20Sopenharmony_ci	spin_lock(&r->consumer_lock);
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	/* Refill alloc array, but only if NUMA match */
1288c2ecf20Sopenharmony_ci	do {
1298c2ecf20Sopenharmony_ci		page = __ptr_ring_consume(r);
1308c2ecf20Sopenharmony_ci		if (unlikely(!page))
1318c2ecf20Sopenharmony_ci			break;
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci		if (likely(page_to_nid(page) == pref_nid)) {
1348c2ecf20Sopenharmony_ci			pool->alloc.cache[pool->alloc.count++] = page;
1358c2ecf20Sopenharmony_ci		} else {
1368c2ecf20Sopenharmony_ci			/* NUMA mismatch;
1378c2ecf20Sopenharmony_ci			 * (1) release 1 page to page-allocator and
1388c2ecf20Sopenharmony_ci			 * (2) break out to fallthrough to alloc_pages_node.
1398c2ecf20Sopenharmony_ci			 * This limit stress on page buddy alloactor.
1408c2ecf20Sopenharmony_ci			 */
1418c2ecf20Sopenharmony_ci			page_pool_return_page(pool, page);
1428c2ecf20Sopenharmony_ci			page = NULL;
1438c2ecf20Sopenharmony_ci			break;
1448c2ecf20Sopenharmony_ci		}
1458c2ecf20Sopenharmony_ci	} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	/* Return last page */
1488c2ecf20Sopenharmony_ci	if (likely(pool->alloc.count > 0))
1498c2ecf20Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	spin_unlock(&r->consumer_lock);
1528c2ecf20Sopenharmony_ci	return page;
1538c2ecf20Sopenharmony_ci}
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci/* fast path */
1568c2ecf20Sopenharmony_cistatic struct page *__page_pool_get_cached(struct page_pool *pool)
1578c2ecf20Sopenharmony_ci{
1588c2ecf20Sopenharmony_ci	struct page *page;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	/* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
1618c2ecf20Sopenharmony_ci	if (likely(pool->alloc.count)) {
1628c2ecf20Sopenharmony_ci		/* Fast-path */
1638c2ecf20Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
1648c2ecf20Sopenharmony_ci	} else {
1658c2ecf20Sopenharmony_ci		page = page_pool_refill_alloc_cache(pool);
1668c2ecf20Sopenharmony_ci	}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	return page;
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_cistatic void page_pool_dma_sync_for_device(struct page_pool *pool,
1728c2ecf20Sopenharmony_ci					  struct page *page,
1738c2ecf20Sopenharmony_ci					  unsigned int dma_sync_size)
1748c2ecf20Sopenharmony_ci{
1758c2ecf20Sopenharmony_ci	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	dma_sync_size = min(dma_sync_size, pool->p.max_len);
1788c2ecf20Sopenharmony_ci	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
1798c2ecf20Sopenharmony_ci					 pool->p.offset, dma_sync_size,
1808c2ecf20Sopenharmony_ci					 pool->p.dma_dir);
1818c2ecf20Sopenharmony_ci}
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci/* slow path */
1848c2ecf20Sopenharmony_cinoinline
1858c2ecf20Sopenharmony_cistatic struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
1868c2ecf20Sopenharmony_ci						 gfp_t _gfp)
1878c2ecf20Sopenharmony_ci{
1888c2ecf20Sopenharmony_ci	struct page *page;
1898c2ecf20Sopenharmony_ci	gfp_t gfp = _gfp;
1908c2ecf20Sopenharmony_ci	dma_addr_t dma;
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	/* We could always set __GFP_COMP, and avoid this branch, as
1938c2ecf20Sopenharmony_ci	 * prep_new_page() can handle order-0 with __GFP_COMP.
1948c2ecf20Sopenharmony_ci	 */
1958c2ecf20Sopenharmony_ci	if (pool->p.order)
1968c2ecf20Sopenharmony_ci		gfp |= __GFP_COMP;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	/* FUTURE development:
1998c2ecf20Sopenharmony_ci	 *
2008c2ecf20Sopenharmony_ci	 * Current slow-path essentially falls back to single page
2018c2ecf20Sopenharmony_ci	 * allocations, which doesn't improve performance.  This code
2028c2ecf20Sopenharmony_ci	 * need bulk allocation support from the page allocator code.
2038c2ecf20Sopenharmony_ci	 */
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci	/* Cache was empty, do real allocation */
2068c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA
2078c2ecf20Sopenharmony_ci	page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
2088c2ecf20Sopenharmony_ci#else
2098c2ecf20Sopenharmony_ci	page = alloc_pages(gfp, pool->p.order);
2108c2ecf20Sopenharmony_ci#endif
2118c2ecf20Sopenharmony_ci	if (!page)
2128c2ecf20Sopenharmony_ci		return NULL;
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
2158c2ecf20Sopenharmony_ci		goto skip_dma_map;
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
2188c2ecf20Sopenharmony_ci	 * since dma_addr_t can be either 32 or 64 bits and does not always fit
2198c2ecf20Sopenharmony_ci	 * into page private data (i.e 32bit cpu with 64bit DMA caps)
2208c2ecf20Sopenharmony_ci	 * This mapping is kept for lifetime of page, until leaving pool.
2218c2ecf20Sopenharmony_ci	 */
2228c2ecf20Sopenharmony_ci	dma = dma_map_page_attrs(pool->p.dev, page, 0,
2238c2ecf20Sopenharmony_ci				 (PAGE_SIZE << pool->p.order),
2248c2ecf20Sopenharmony_ci				 pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
2258c2ecf20Sopenharmony_ci	if (dma_mapping_error(pool->p.dev, dma)) {
2268c2ecf20Sopenharmony_ci		put_page(page);
2278c2ecf20Sopenharmony_ci		return NULL;
2288c2ecf20Sopenharmony_ci	}
2298c2ecf20Sopenharmony_ci	page_pool_set_dma_addr(page, dma);
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
2328c2ecf20Sopenharmony_ci		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ciskip_dma_map:
2358c2ecf20Sopenharmony_ci	/* Track how many pages are held 'in-flight' */
2368c2ecf20Sopenharmony_ci	pool->pages_state_hold_cnt++;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	/* When page just alloc'ed is should/must have refcnt 1. */
2418c2ecf20Sopenharmony_ci	return page;
2428c2ecf20Sopenharmony_ci}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci/* For using page_pool replace: alloc_pages() API calls, but provide
2458c2ecf20Sopenharmony_ci * synchronization guarantee for allocation side.
2468c2ecf20Sopenharmony_ci */
2478c2ecf20Sopenharmony_cistruct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
2488c2ecf20Sopenharmony_ci{
2498c2ecf20Sopenharmony_ci	struct page *page;
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	/* Fast-path: Get a page from cache */
2528c2ecf20Sopenharmony_ci	page = __page_pool_get_cached(pool);
2538c2ecf20Sopenharmony_ci	if (page)
2548c2ecf20Sopenharmony_ci		return page;
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	/* Slow-path: cache empty, do real allocation */
2578c2ecf20Sopenharmony_ci	page = __page_pool_alloc_pages_slow(pool, gfp);
2588c2ecf20Sopenharmony_ci	return page;
2598c2ecf20Sopenharmony_ci}
2608c2ecf20Sopenharmony_ciEXPORT_SYMBOL(page_pool_alloc_pages);
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci/* Calculate distance between two u32 values, valid if distance is below 2^(31)
2638c2ecf20Sopenharmony_ci *  https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
2648c2ecf20Sopenharmony_ci */
2658c2ecf20Sopenharmony_ci#define _distance(a, b)	(s32)((a) - (b))
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_cistatic s32 page_pool_inflight(struct page_pool *pool)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
2708c2ecf20Sopenharmony_ci	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
2718c2ecf20Sopenharmony_ci	s32 inflight;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	inflight = _distance(hold_cnt, release_cnt);
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
2768c2ecf20Sopenharmony_ci	WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	return inflight;
2798c2ecf20Sopenharmony_ci}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci/* Disconnects a page (from a page_pool).  API users can have a need
2828c2ecf20Sopenharmony_ci * to disconnect a page (from a page_pool), to allow it to be used as
2838c2ecf20Sopenharmony_ci * a regular page (that will eventually be returned to the normal
2848c2ecf20Sopenharmony_ci * page-allocator via put_page).
2858c2ecf20Sopenharmony_ci */
2868c2ecf20Sopenharmony_civoid page_pool_release_page(struct page_pool *pool, struct page *page)
2878c2ecf20Sopenharmony_ci{
2888c2ecf20Sopenharmony_ci	dma_addr_t dma;
2898c2ecf20Sopenharmony_ci	int count;
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
2928c2ecf20Sopenharmony_ci		/* Always account for inflight pages, even if we didn't
2938c2ecf20Sopenharmony_ci		 * map them
2948c2ecf20Sopenharmony_ci		 */
2958c2ecf20Sopenharmony_ci		goto skip_dma_unmap;
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci	dma = page_pool_get_dma_addr(page);
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	/* When page is unmapped, it cannot be returned to our pool */
3008c2ecf20Sopenharmony_ci	dma_unmap_page_attrs(pool->p.dev, dma,
3018c2ecf20Sopenharmony_ci			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
3028c2ecf20Sopenharmony_ci			     DMA_ATTR_SKIP_CPU_SYNC);
3038c2ecf20Sopenharmony_ci	page_pool_set_dma_addr(page, 0);
3048c2ecf20Sopenharmony_ciskip_dma_unmap:
3058c2ecf20Sopenharmony_ci	/* This may be the last page returned, releasing the pool, so
3068c2ecf20Sopenharmony_ci	 * it is not safe to reference pool afterwards.
3078c2ecf20Sopenharmony_ci	 */
3088c2ecf20Sopenharmony_ci	count = atomic_inc_return(&pool->pages_state_release_cnt);
3098c2ecf20Sopenharmony_ci	trace_page_pool_state_release(pool, page, count);
3108c2ecf20Sopenharmony_ci}
3118c2ecf20Sopenharmony_ciEXPORT_SYMBOL(page_pool_release_page);
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci/* Return a page to the page allocator, cleaning up our state */
3148c2ecf20Sopenharmony_cistatic void page_pool_return_page(struct page_pool *pool, struct page *page)
3158c2ecf20Sopenharmony_ci{
3168c2ecf20Sopenharmony_ci	page_pool_release_page(pool, page);
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	put_page(page);
3198c2ecf20Sopenharmony_ci	/* An optimization would be to call __free_pages(page, pool->p.order)
3208c2ecf20Sopenharmony_ci	 * knowing page is not part of page-cache (thus avoiding a
3218c2ecf20Sopenharmony_ci	 * __page_cache_release() call).
3228c2ecf20Sopenharmony_ci	 */
3238c2ecf20Sopenharmony_ci}
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_cistatic bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
3268c2ecf20Sopenharmony_ci{
3278c2ecf20Sopenharmony_ci	int ret;
3288c2ecf20Sopenharmony_ci	/* BH protection not needed if current is serving softirq */
3298c2ecf20Sopenharmony_ci	if (in_serving_softirq())
3308c2ecf20Sopenharmony_ci		ret = ptr_ring_produce(&pool->ring, page);
3318c2ecf20Sopenharmony_ci	else
3328c2ecf20Sopenharmony_ci		ret = ptr_ring_produce_bh(&pool->ring, page);
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	return (ret == 0) ? true : false;
3358c2ecf20Sopenharmony_ci}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci/* Only allow direct recycling in special circumstances, into the
3388c2ecf20Sopenharmony_ci * alloc side cache.  E.g. during RX-NAPI processing for XDP_DROP use-case.
3398c2ecf20Sopenharmony_ci *
3408c2ecf20Sopenharmony_ci * Caller must provide appropriate safe context.
3418c2ecf20Sopenharmony_ci */
3428c2ecf20Sopenharmony_cistatic bool page_pool_recycle_in_cache(struct page *page,
3438c2ecf20Sopenharmony_ci				       struct page_pool *pool)
3448c2ecf20Sopenharmony_ci{
3458c2ecf20Sopenharmony_ci	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
3468c2ecf20Sopenharmony_ci		return false;
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
3498c2ecf20Sopenharmony_ci	pool->alloc.cache[pool->alloc.count++] = page;
3508c2ecf20Sopenharmony_ci	return true;
3518c2ecf20Sopenharmony_ci}
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci/* page is NOT reusable when:
3548c2ecf20Sopenharmony_ci * 1) allocated when system is under some pressure. (page_is_pfmemalloc)
3558c2ecf20Sopenharmony_ci */
3568c2ecf20Sopenharmony_cistatic bool pool_page_reusable(struct page_pool *pool, struct page *page)
3578c2ecf20Sopenharmony_ci{
3588c2ecf20Sopenharmony_ci	return !page_is_pfmemalloc(page);
3598c2ecf20Sopenharmony_ci}
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci/* If the page refcnt == 1, this will try to recycle the page.
3628c2ecf20Sopenharmony_ci * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
3638c2ecf20Sopenharmony_ci * the configured size min(dma_sync_size, pool->max_len).
3648c2ecf20Sopenharmony_ci * If the page refcnt != 1, then the page will be returned to memory
3658c2ecf20Sopenharmony_ci * subsystem.
3668c2ecf20Sopenharmony_ci */
3678c2ecf20Sopenharmony_civoid page_pool_put_page(struct page_pool *pool, struct page *page,
3688c2ecf20Sopenharmony_ci			unsigned int dma_sync_size, bool allow_direct)
3698c2ecf20Sopenharmony_ci{
3708c2ecf20Sopenharmony_ci	/* This allocator is optimized for the XDP mode that uses
3718c2ecf20Sopenharmony_ci	 * one-frame-per-page, but have fallbacks that act like the
3728c2ecf20Sopenharmony_ci	 * regular page allocator APIs.
3738c2ecf20Sopenharmony_ci	 *
3748c2ecf20Sopenharmony_ci	 * refcnt == 1 means page_pool owns page, and can recycle it.
3758c2ecf20Sopenharmony_ci	 */
3768c2ecf20Sopenharmony_ci	if (likely(page_ref_count(page) == 1 &&
3778c2ecf20Sopenharmony_ci		   pool_page_reusable(pool, page))) {
3788c2ecf20Sopenharmony_ci		/* Read barrier done in page_ref_count / READ_ONCE */
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci		if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
3818c2ecf20Sopenharmony_ci			page_pool_dma_sync_for_device(pool, page,
3828c2ecf20Sopenharmony_ci						      dma_sync_size);
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci		if (allow_direct && in_serving_softirq())
3858c2ecf20Sopenharmony_ci			if (page_pool_recycle_in_cache(page, pool))
3868c2ecf20Sopenharmony_ci				return;
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci		if (!page_pool_recycle_in_ring(pool, page)) {
3898c2ecf20Sopenharmony_ci			/* Cache full, fallback to free pages */
3908c2ecf20Sopenharmony_ci			page_pool_return_page(pool, page);
3918c2ecf20Sopenharmony_ci		}
3928c2ecf20Sopenharmony_ci		return;
3938c2ecf20Sopenharmony_ci	}
3948c2ecf20Sopenharmony_ci	/* Fallback/non-XDP mode: API user have elevated refcnt.
3958c2ecf20Sopenharmony_ci	 *
3968c2ecf20Sopenharmony_ci	 * Many drivers split up the page into fragments, and some
3978c2ecf20Sopenharmony_ci	 * want to keep doing this to save memory and do refcnt based
3988c2ecf20Sopenharmony_ci	 * recycling. Support this use case too, to ease drivers
3998c2ecf20Sopenharmony_ci	 * switching between XDP/non-XDP.
4008c2ecf20Sopenharmony_ci	 *
4018c2ecf20Sopenharmony_ci	 * In-case page_pool maintains the DMA mapping, API user must
4028c2ecf20Sopenharmony_ci	 * call page_pool_put_page once.  In this elevated refcnt
4038c2ecf20Sopenharmony_ci	 * case, the DMA is unmapped/released, as driver is likely
4048c2ecf20Sopenharmony_ci	 * doing refcnt based recycle tricks, meaning another process
4058c2ecf20Sopenharmony_ci	 * will be invoking put_page.
4068c2ecf20Sopenharmony_ci	 */
4078c2ecf20Sopenharmony_ci	/* Do not replace this with page_pool_return_page() */
4088c2ecf20Sopenharmony_ci	page_pool_release_page(pool, page);
4098c2ecf20Sopenharmony_ci	put_page(page);
4108c2ecf20Sopenharmony_ci}
4118c2ecf20Sopenharmony_ciEXPORT_SYMBOL(page_pool_put_page);
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_cistatic void page_pool_empty_ring(struct page_pool *pool)
4148c2ecf20Sopenharmony_ci{
4158c2ecf20Sopenharmony_ci	struct page *page;
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	/* Empty recycle ring */
4188c2ecf20Sopenharmony_ci	while ((page = ptr_ring_consume_bh(&pool->ring))) {
4198c2ecf20Sopenharmony_ci		/* Verify the refcnt invariant of cached pages */
4208c2ecf20Sopenharmony_ci		if (!(page_ref_count(page) == 1))
4218c2ecf20Sopenharmony_ci			pr_crit("%s() page_pool refcnt %d violation\n",
4228c2ecf20Sopenharmony_ci				__func__, page_ref_count(page));
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci		page_pool_return_page(pool, page);
4258c2ecf20Sopenharmony_ci	}
4268c2ecf20Sopenharmony_ci}
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_cistatic void page_pool_free(struct page_pool *pool)
4298c2ecf20Sopenharmony_ci{
4308c2ecf20Sopenharmony_ci	if (pool->disconnect)
4318c2ecf20Sopenharmony_ci		pool->disconnect(pool);
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	ptr_ring_cleanup(&pool->ring, NULL);
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_MAP)
4368c2ecf20Sopenharmony_ci		put_device(pool->p.dev);
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	kfree(pool);
4398c2ecf20Sopenharmony_ci}
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_cistatic void page_pool_empty_alloc_cache_once(struct page_pool *pool)
4428c2ecf20Sopenharmony_ci{
4438c2ecf20Sopenharmony_ci	struct page *page;
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci	if (pool->destroy_cnt)
4468c2ecf20Sopenharmony_ci		return;
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	/* Empty alloc cache, assume caller made sure this is
4498c2ecf20Sopenharmony_ci	 * no-longer in use, and page_pool_alloc_pages() cannot be
4508c2ecf20Sopenharmony_ci	 * call concurrently.
4518c2ecf20Sopenharmony_ci	 */
4528c2ecf20Sopenharmony_ci	while (pool->alloc.count) {
4538c2ecf20Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
4548c2ecf20Sopenharmony_ci		page_pool_return_page(pool, page);
4558c2ecf20Sopenharmony_ci	}
4568c2ecf20Sopenharmony_ci}
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_cistatic void page_pool_scrub(struct page_pool *pool)
4598c2ecf20Sopenharmony_ci{
4608c2ecf20Sopenharmony_ci	page_pool_empty_alloc_cache_once(pool);
4618c2ecf20Sopenharmony_ci	pool->destroy_cnt++;
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	/* No more consumers should exist, but producers could still
4648c2ecf20Sopenharmony_ci	 * be in-flight.
4658c2ecf20Sopenharmony_ci	 */
4668c2ecf20Sopenharmony_ci	page_pool_empty_ring(pool);
4678c2ecf20Sopenharmony_ci}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_cistatic int page_pool_release(struct page_pool *pool)
4708c2ecf20Sopenharmony_ci{
4718c2ecf20Sopenharmony_ci	int inflight;
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci	page_pool_scrub(pool);
4748c2ecf20Sopenharmony_ci	inflight = page_pool_inflight(pool);
4758c2ecf20Sopenharmony_ci	if (!inflight)
4768c2ecf20Sopenharmony_ci		page_pool_free(pool);
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	return inflight;
4798c2ecf20Sopenharmony_ci}
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_cistatic void page_pool_release_retry(struct work_struct *wq)
4828c2ecf20Sopenharmony_ci{
4838c2ecf20Sopenharmony_ci	struct delayed_work *dwq = to_delayed_work(wq);
4848c2ecf20Sopenharmony_ci	struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
4858c2ecf20Sopenharmony_ci	int inflight;
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	inflight = page_pool_release(pool);
4888c2ecf20Sopenharmony_ci	if (!inflight)
4898c2ecf20Sopenharmony_ci		return;
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci	/* Periodic warning */
4928c2ecf20Sopenharmony_ci	if (time_after_eq(jiffies, pool->defer_warn)) {
4938c2ecf20Sopenharmony_ci		int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci		pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
4968c2ecf20Sopenharmony_ci			__func__, inflight, sec);
4978c2ecf20Sopenharmony_ci		pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
4988c2ecf20Sopenharmony_ci	}
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci	/* Still not ready to be disconnected, retry later */
5018c2ecf20Sopenharmony_ci	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
5028c2ecf20Sopenharmony_ci}
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_civoid page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
5058c2ecf20Sopenharmony_ci{
5068c2ecf20Sopenharmony_ci	refcount_inc(&pool->user_cnt);
5078c2ecf20Sopenharmony_ci	pool->disconnect = disconnect;
5088c2ecf20Sopenharmony_ci}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_civoid page_pool_destroy(struct page_pool *pool)
5118c2ecf20Sopenharmony_ci{
5128c2ecf20Sopenharmony_ci	if (!pool)
5138c2ecf20Sopenharmony_ci		return;
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	if (!page_pool_put(pool))
5168c2ecf20Sopenharmony_ci		return;
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci	if (!page_pool_release(pool))
5198c2ecf20Sopenharmony_ci		return;
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	pool->defer_start = jiffies;
5228c2ecf20Sopenharmony_ci	pool->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
5258c2ecf20Sopenharmony_ci	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
5268c2ecf20Sopenharmony_ci}
5278c2ecf20Sopenharmony_ciEXPORT_SYMBOL(page_pool_destroy);
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci/* Caller must provide appropriate safe context, e.g. NAPI. */
5308c2ecf20Sopenharmony_civoid page_pool_update_nid(struct page_pool *pool, int new_nid)
5318c2ecf20Sopenharmony_ci{
5328c2ecf20Sopenharmony_ci	struct page *page;
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	trace_page_pool_update_nid(pool, new_nid);
5358c2ecf20Sopenharmony_ci	pool->p.nid = new_nid;
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci	/* Flush pool alloc cache, as refill will check NUMA node */
5388c2ecf20Sopenharmony_ci	while (pool->alloc.count) {
5398c2ecf20Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
5408c2ecf20Sopenharmony_ci		page_pool_return_page(pool, page);
5418c2ecf20Sopenharmony_ci	}
5428c2ecf20Sopenharmony_ci}
5438c2ecf20Sopenharmony_ciEXPORT_SYMBOL(page_pool_update_nid);
544