162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci *
362306a36Sopenharmony_ci * page_pool.c
462306a36Sopenharmony_ci *	Author:	Jesper Dangaard Brouer <netoptimizer@brouer.com>
562306a36Sopenharmony_ci *	Copyright (C) 2016 Red Hat, Inc.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/types.h>
962306a36Sopenharmony_ci#include <linux/kernel.h>
1062306a36Sopenharmony_ci#include <linux/slab.h>
1162306a36Sopenharmony_ci#include <linux/device.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <net/page_pool/helpers.h>
1462306a36Sopenharmony_ci#include <net/xdp.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <linux/dma-direction.h>
1762306a36Sopenharmony_ci#include <linux/dma-mapping.h>
1862306a36Sopenharmony_ci#include <linux/page-flags.h>
1962306a36Sopenharmony_ci#include <linux/mm.h> /* for put_page() */
2062306a36Sopenharmony_ci#include <linux/poison.h>
2162306a36Sopenharmony_ci#include <linux/ethtool.h>
2262306a36Sopenharmony_ci#include <linux/netdevice.h>
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#include <trace/events/page_pool.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#define DEFER_TIME (msecs_to_jiffies(1000))
2762306a36Sopenharmony_ci#define DEFER_WARN_INTERVAL (60 * HZ)
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#define BIAS_MAX	LONG_MAX
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS
3262306a36Sopenharmony_ci/* alloc_stat_inc is intended to be used in softirq context */
3362306a36Sopenharmony_ci#define alloc_stat_inc(pool, __stat)	(pool->alloc_stats.__stat++)
3462306a36Sopenharmony_ci/* recycle_stat_inc is safe to use when preemption is possible. */
3562306a36Sopenharmony_ci#define recycle_stat_inc(pool, __stat)							\
3662306a36Sopenharmony_ci	do {										\
3762306a36Sopenharmony_ci		struct page_pool_recycle_stats __percpu *s = pool->recycle_stats;	\
3862306a36Sopenharmony_ci		this_cpu_inc(s->__stat);						\
3962306a36Sopenharmony_ci	} while (0)
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#define recycle_stat_add(pool, __stat, val)						\
4262306a36Sopenharmony_ci	do {										\
4362306a36Sopenharmony_ci		struct page_pool_recycle_stats __percpu *s = pool->recycle_stats;	\
4462306a36Sopenharmony_ci		this_cpu_add(s->__stat, val);						\
4562306a36Sopenharmony_ci	} while (0)
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic const char pp_stats[][ETH_GSTRING_LEN] = {
4862306a36Sopenharmony_ci	"rx_pp_alloc_fast",
4962306a36Sopenharmony_ci	"rx_pp_alloc_slow",
5062306a36Sopenharmony_ci	"rx_pp_alloc_slow_ho",
5162306a36Sopenharmony_ci	"rx_pp_alloc_empty",
5262306a36Sopenharmony_ci	"rx_pp_alloc_refill",
5362306a36Sopenharmony_ci	"rx_pp_alloc_waive",
5462306a36Sopenharmony_ci	"rx_pp_recycle_cached",
5562306a36Sopenharmony_ci	"rx_pp_recycle_cache_full",
5662306a36Sopenharmony_ci	"rx_pp_recycle_ring",
5762306a36Sopenharmony_ci	"rx_pp_recycle_ring_full",
5862306a36Sopenharmony_ci	"rx_pp_recycle_released_ref",
5962306a36Sopenharmony_ci};
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/**
6262306a36Sopenharmony_ci * page_pool_get_stats() - fetch page pool stats
6362306a36Sopenharmony_ci * @pool:	pool from which page was allocated
6462306a36Sopenharmony_ci * @stats:	struct page_pool_stats to fill in
6562306a36Sopenharmony_ci *
6662306a36Sopenharmony_ci * Retrieve statistics about the page_pool. This API is only available
6762306a36Sopenharmony_ci * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``.
6862306a36Sopenharmony_ci * A pointer to a caller allocated struct page_pool_stats structure
6962306a36Sopenharmony_ci * is passed to this API which is filled in. The caller can then report
7062306a36Sopenharmony_ci * those stats to the user (perhaps via ethtool, debugfs, etc.).
7162306a36Sopenharmony_ci */
7262306a36Sopenharmony_cibool page_pool_get_stats(struct page_pool *pool,
7362306a36Sopenharmony_ci			 struct page_pool_stats *stats)
7462306a36Sopenharmony_ci{
7562306a36Sopenharmony_ci	int cpu = 0;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	if (!stats)
7862306a36Sopenharmony_ci		return false;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	/* The caller is responsible to initialize stats. */
8162306a36Sopenharmony_ci	stats->alloc_stats.fast += pool->alloc_stats.fast;
8262306a36Sopenharmony_ci	stats->alloc_stats.slow += pool->alloc_stats.slow;
8362306a36Sopenharmony_ci	stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order;
8462306a36Sopenharmony_ci	stats->alloc_stats.empty += pool->alloc_stats.empty;
8562306a36Sopenharmony_ci	stats->alloc_stats.refill += pool->alloc_stats.refill;
8662306a36Sopenharmony_ci	stats->alloc_stats.waive += pool->alloc_stats.waive;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
8962306a36Sopenharmony_ci		const struct page_pool_recycle_stats *pcpu =
9062306a36Sopenharmony_ci			per_cpu_ptr(pool->recycle_stats, cpu);
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci		stats->recycle_stats.cached += pcpu->cached;
9362306a36Sopenharmony_ci		stats->recycle_stats.cache_full += pcpu->cache_full;
9462306a36Sopenharmony_ci		stats->recycle_stats.ring += pcpu->ring;
9562306a36Sopenharmony_ci		stats->recycle_stats.ring_full += pcpu->ring_full;
9662306a36Sopenharmony_ci		stats->recycle_stats.released_refcnt += pcpu->released_refcnt;
9762306a36Sopenharmony_ci	}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	return true;
10062306a36Sopenharmony_ci}
10162306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_get_stats);
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ciu8 *page_pool_ethtool_stats_get_strings(u8 *data)
10462306a36Sopenharmony_ci{
10562306a36Sopenharmony_ci	int i;
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(pp_stats); i++) {
10862306a36Sopenharmony_ci		memcpy(data, pp_stats[i], ETH_GSTRING_LEN);
10962306a36Sopenharmony_ci		data += ETH_GSTRING_LEN;
11062306a36Sopenharmony_ci	}
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	return data;
11362306a36Sopenharmony_ci}
11462306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_ethtool_stats_get_strings);
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ciint page_pool_ethtool_stats_get_count(void)
11762306a36Sopenharmony_ci{
11862306a36Sopenharmony_ci	return ARRAY_SIZE(pp_stats);
11962306a36Sopenharmony_ci}
12062306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_ethtool_stats_get_count);
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ciu64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
12362306a36Sopenharmony_ci{
12462306a36Sopenharmony_ci	struct page_pool_stats *pool_stats = stats;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	*data++ = pool_stats->alloc_stats.fast;
12762306a36Sopenharmony_ci	*data++ = pool_stats->alloc_stats.slow;
12862306a36Sopenharmony_ci	*data++ = pool_stats->alloc_stats.slow_high_order;
12962306a36Sopenharmony_ci	*data++ = pool_stats->alloc_stats.empty;
13062306a36Sopenharmony_ci	*data++ = pool_stats->alloc_stats.refill;
13162306a36Sopenharmony_ci	*data++ = pool_stats->alloc_stats.waive;
13262306a36Sopenharmony_ci	*data++ = pool_stats->recycle_stats.cached;
13362306a36Sopenharmony_ci	*data++ = pool_stats->recycle_stats.cache_full;
13462306a36Sopenharmony_ci	*data++ = pool_stats->recycle_stats.ring;
13562306a36Sopenharmony_ci	*data++ = pool_stats->recycle_stats.ring_full;
13662306a36Sopenharmony_ci	*data++ = pool_stats->recycle_stats.released_refcnt;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	return data;
13962306a36Sopenharmony_ci}
14062306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_ethtool_stats_get);
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci#else
14362306a36Sopenharmony_ci#define alloc_stat_inc(pool, __stat)
14462306a36Sopenharmony_ci#define recycle_stat_inc(pool, __stat)
14562306a36Sopenharmony_ci#define recycle_stat_add(pool, __stat, val)
14662306a36Sopenharmony_ci#endif
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_cistatic bool page_pool_producer_lock(struct page_pool *pool)
14962306a36Sopenharmony_ci	__acquires(&pool->ring.producer_lock)
15062306a36Sopenharmony_ci{
15162306a36Sopenharmony_ci	bool in_softirq = in_softirq();
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	if (in_softirq)
15462306a36Sopenharmony_ci		spin_lock(&pool->ring.producer_lock);
15562306a36Sopenharmony_ci	else
15662306a36Sopenharmony_ci		spin_lock_bh(&pool->ring.producer_lock);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	return in_softirq;
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cistatic void page_pool_producer_unlock(struct page_pool *pool,
16262306a36Sopenharmony_ci				      bool in_softirq)
16362306a36Sopenharmony_ci	__releases(&pool->ring.producer_lock)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	if (in_softirq)
16662306a36Sopenharmony_ci		spin_unlock(&pool->ring.producer_lock);
16762306a36Sopenharmony_ci	else
16862306a36Sopenharmony_ci		spin_unlock_bh(&pool->ring.producer_lock);
16962306a36Sopenharmony_ci}
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_cistatic int page_pool_init(struct page_pool *pool,
17262306a36Sopenharmony_ci			  const struct page_pool_params *params)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	unsigned int ring_qsize = 1024; /* Default */
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	memcpy(&pool->p, params, sizeof(pool->p));
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	/* Validate only known flags were used */
17962306a36Sopenharmony_ci	if (pool->p.flags & ~(PP_FLAG_ALL))
18062306a36Sopenharmony_ci		return -EINVAL;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	if (pool->p.pool_size)
18362306a36Sopenharmony_ci		ring_qsize = pool->p.pool_size;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	/* Sanity limit mem that can be pinned down */
18662306a36Sopenharmony_ci	if (ring_qsize > 32768)
18762306a36Sopenharmony_ci		return -E2BIG;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	/* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
19062306a36Sopenharmony_ci	 * DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
19162306a36Sopenharmony_ci	 * which is the XDP_TX use-case.
19262306a36Sopenharmony_ci	 */
19362306a36Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_MAP) {
19462306a36Sopenharmony_ci		if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
19562306a36Sopenharmony_ci		    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
19662306a36Sopenharmony_ci			return -EINVAL;
19762306a36Sopenharmony_ci	}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
20062306a36Sopenharmony_ci		/* In order to request DMA-sync-for-device the page
20162306a36Sopenharmony_ci		 * needs to be mapped
20262306a36Sopenharmony_ci		 */
20362306a36Sopenharmony_ci		if (!(pool->p.flags & PP_FLAG_DMA_MAP))
20462306a36Sopenharmony_ci			return -EINVAL;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci		if (!pool->p.max_len)
20762306a36Sopenharmony_ci			return -EINVAL;
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci		/* pool->p.offset has to be set according to the address
21062306a36Sopenharmony_ci		 * offset used by the DMA engine to start copying rx data
21162306a36Sopenharmony_ci		 */
21262306a36Sopenharmony_ci	}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
21562306a36Sopenharmony_ci	    pool->p.flags & PP_FLAG_PAGE_FRAG)
21662306a36Sopenharmony_ci		return -EINVAL;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS
21962306a36Sopenharmony_ci	pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
22062306a36Sopenharmony_ci	if (!pool->recycle_stats)
22162306a36Sopenharmony_ci		return -ENOMEM;
22262306a36Sopenharmony_ci#endif
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
22562306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS
22662306a36Sopenharmony_ci		free_percpu(pool->recycle_stats);
22762306a36Sopenharmony_ci#endif
22862306a36Sopenharmony_ci		return -ENOMEM;
22962306a36Sopenharmony_ci	}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	atomic_set(&pool->pages_state_release_cnt, 0);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	/* Driver calling page_pool_create() also call page_pool_destroy() */
23462306a36Sopenharmony_ci	refcount_set(&pool->user_cnt, 1);
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_MAP)
23762306a36Sopenharmony_ci		get_device(pool->p.dev);
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	return 0;
24062306a36Sopenharmony_ci}
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci/**
24362306a36Sopenharmony_ci * page_pool_create() - create a page pool.
24462306a36Sopenharmony_ci * @params: parameters, see struct page_pool_params
24562306a36Sopenharmony_ci */
24662306a36Sopenharmony_cistruct page_pool *page_pool_create(const struct page_pool_params *params)
24762306a36Sopenharmony_ci{
24862306a36Sopenharmony_ci	struct page_pool *pool;
24962306a36Sopenharmony_ci	int err;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
25262306a36Sopenharmony_ci	if (!pool)
25362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	err = page_pool_init(pool, params);
25662306a36Sopenharmony_ci	if (err < 0) {
25762306a36Sopenharmony_ci		pr_warn("%s() gave up with errno %d\n", __func__, err);
25862306a36Sopenharmony_ci		kfree(pool);
25962306a36Sopenharmony_ci		return ERR_PTR(err);
26062306a36Sopenharmony_ci	}
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	return pool;
26362306a36Sopenharmony_ci}
26462306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_create);
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_cistatic void page_pool_return_page(struct page_pool *pool, struct page *page);
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_cinoinline
26962306a36Sopenharmony_cistatic struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	struct ptr_ring *r = &pool->ring;
27262306a36Sopenharmony_ci	struct page *page;
27362306a36Sopenharmony_ci	int pref_nid; /* preferred NUMA node */
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	/* Quicker fallback, avoid locks when ring is empty */
27662306a36Sopenharmony_ci	if (__ptr_ring_empty(r)) {
27762306a36Sopenharmony_ci		alloc_stat_inc(pool, empty);
27862306a36Sopenharmony_ci		return NULL;
27962306a36Sopenharmony_ci	}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	/* Softirq guarantee CPU and thus NUMA node is stable. This,
28262306a36Sopenharmony_ci	 * assumes CPU refilling driver RX-ring will also run RX-NAPI.
28362306a36Sopenharmony_ci	 */
28462306a36Sopenharmony_ci#ifdef CONFIG_NUMA
28562306a36Sopenharmony_ci	pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
28662306a36Sopenharmony_ci#else
28762306a36Sopenharmony_ci	/* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
28862306a36Sopenharmony_ci	pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
28962306a36Sopenharmony_ci#endif
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	/* Refill alloc array, but only if NUMA match */
29262306a36Sopenharmony_ci	do {
29362306a36Sopenharmony_ci		page = __ptr_ring_consume(r);
29462306a36Sopenharmony_ci		if (unlikely(!page))
29562306a36Sopenharmony_ci			break;
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci		if (likely(page_to_nid(page) == pref_nid)) {
29862306a36Sopenharmony_ci			pool->alloc.cache[pool->alloc.count++] = page;
29962306a36Sopenharmony_ci		} else {
30062306a36Sopenharmony_ci			/* NUMA mismatch;
30162306a36Sopenharmony_ci			 * (1) release 1 page to page-allocator and
30262306a36Sopenharmony_ci			 * (2) break out to fallthrough to alloc_pages_node.
30362306a36Sopenharmony_ci			 * This limit stress on page buddy alloactor.
30462306a36Sopenharmony_ci			 */
30562306a36Sopenharmony_ci			page_pool_return_page(pool, page);
30662306a36Sopenharmony_ci			alloc_stat_inc(pool, waive);
30762306a36Sopenharmony_ci			page = NULL;
30862306a36Sopenharmony_ci			break;
30962306a36Sopenharmony_ci		}
31062306a36Sopenharmony_ci	} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	/* Return last page */
31362306a36Sopenharmony_ci	if (likely(pool->alloc.count > 0)) {
31462306a36Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
31562306a36Sopenharmony_ci		alloc_stat_inc(pool, refill);
31662306a36Sopenharmony_ci	}
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	return page;
31962306a36Sopenharmony_ci}
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci/* fast path */
32262306a36Sopenharmony_cistatic struct page *__page_pool_get_cached(struct page_pool *pool)
32362306a36Sopenharmony_ci{
32462306a36Sopenharmony_ci	struct page *page;
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	/* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
32762306a36Sopenharmony_ci	if (likely(pool->alloc.count)) {
32862306a36Sopenharmony_ci		/* Fast-path */
32962306a36Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
33062306a36Sopenharmony_ci		alloc_stat_inc(pool, fast);
33162306a36Sopenharmony_ci	} else {
33262306a36Sopenharmony_ci		page = page_pool_refill_alloc_cache(pool);
33362306a36Sopenharmony_ci	}
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	return page;
33662306a36Sopenharmony_ci}
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_cistatic void page_pool_dma_sync_for_device(struct page_pool *pool,
33962306a36Sopenharmony_ci					  struct page *page,
34062306a36Sopenharmony_ci					  unsigned int dma_sync_size)
34162306a36Sopenharmony_ci{
34262306a36Sopenharmony_ci	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	dma_sync_size = min(dma_sync_size, pool->p.max_len);
34562306a36Sopenharmony_ci	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
34662306a36Sopenharmony_ci					 pool->p.offset, dma_sync_size,
34762306a36Sopenharmony_ci					 pool->p.dma_dir);
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_cistatic bool page_pool_dma_map(struct page_pool *pool, struct page *page)
35162306a36Sopenharmony_ci{
35262306a36Sopenharmony_ci	dma_addr_t dma;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
35562306a36Sopenharmony_ci	 * since dma_addr_t can be either 32 or 64 bits and does not always fit
35662306a36Sopenharmony_ci	 * into page private data (i.e 32bit cpu with 64bit DMA caps)
35762306a36Sopenharmony_ci	 * This mapping is kept for lifetime of page, until leaving pool.
35862306a36Sopenharmony_ci	 */
35962306a36Sopenharmony_ci	dma = dma_map_page_attrs(pool->p.dev, page, 0,
36062306a36Sopenharmony_ci				 (PAGE_SIZE << pool->p.order),
36162306a36Sopenharmony_ci				 pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC |
36262306a36Sopenharmony_ci						  DMA_ATTR_WEAK_ORDERING);
36362306a36Sopenharmony_ci	if (dma_mapping_error(pool->p.dev, dma))
36462306a36Sopenharmony_ci		return false;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	page_pool_set_dma_addr(page, dma);
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
36962306a36Sopenharmony_ci		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	return true;
37262306a36Sopenharmony_ci}
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_cistatic void page_pool_set_pp_info(struct page_pool *pool,
37562306a36Sopenharmony_ci				  struct page *page)
37662306a36Sopenharmony_ci{
37762306a36Sopenharmony_ci	page->pp = pool;
37862306a36Sopenharmony_ci	page->pp_magic |= PP_SIGNATURE;
37962306a36Sopenharmony_ci	if (pool->p.init_callback)
38062306a36Sopenharmony_ci		pool->p.init_callback(page, pool->p.init_arg);
38162306a36Sopenharmony_ci}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_cistatic void page_pool_clear_pp_info(struct page *page)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	page->pp_magic = 0;
38662306a36Sopenharmony_ci	page->pp = NULL;
38762306a36Sopenharmony_ci}
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_cistatic struct page *__page_pool_alloc_page_order(struct page_pool *pool,
39062306a36Sopenharmony_ci						 gfp_t gfp)
39162306a36Sopenharmony_ci{
39262306a36Sopenharmony_ci	struct page *page;
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	gfp |= __GFP_COMP;
39562306a36Sopenharmony_ci	page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
39662306a36Sopenharmony_ci	if (unlikely(!page))
39762306a36Sopenharmony_ci		return NULL;
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
40062306a36Sopenharmony_ci	    unlikely(!page_pool_dma_map(pool, page))) {
40162306a36Sopenharmony_ci		put_page(page);
40262306a36Sopenharmony_ci		return NULL;
40362306a36Sopenharmony_ci	}
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	alloc_stat_inc(pool, slow_high_order);
40662306a36Sopenharmony_ci	page_pool_set_pp_info(pool, page);
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	/* Track how many pages are held 'in-flight' */
40962306a36Sopenharmony_ci	pool->pages_state_hold_cnt++;
41062306a36Sopenharmony_ci	trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
41162306a36Sopenharmony_ci	return page;
41262306a36Sopenharmony_ci}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci/* slow path */
41562306a36Sopenharmony_cinoinline
41662306a36Sopenharmony_cistatic struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
41762306a36Sopenharmony_ci						 gfp_t gfp)
41862306a36Sopenharmony_ci{
41962306a36Sopenharmony_ci	const int bulk = PP_ALLOC_CACHE_REFILL;
42062306a36Sopenharmony_ci	unsigned int pp_flags = pool->p.flags;
42162306a36Sopenharmony_ci	unsigned int pp_order = pool->p.order;
42262306a36Sopenharmony_ci	struct page *page;
42362306a36Sopenharmony_ci	int i, nr_pages;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	/* Don't support bulk alloc for high-order pages */
42662306a36Sopenharmony_ci	if (unlikely(pp_order))
42762306a36Sopenharmony_ci		return __page_pool_alloc_page_order(pool, gfp);
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	/* Unnecessary as alloc cache is empty, but guarantees zero count */
43062306a36Sopenharmony_ci	if (unlikely(pool->alloc.count > 0))
43162306a36Sopenharmony_ci		return pool->alloc.cache[--pool->alloc.count];
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	/* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
43462306a36Sopenharmony_ci	memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk,
43762306a36Sopenharmony_ci					       pool->alloc.cache);
43862306a36Sopenharmony_ci	if (unlikely(!nr_pages))
43962306a36Sopenharmony_ci		return NULL;
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	/* Pages have been filled into alloc.cache array, but count is zero and
44262306a36Sopenharmony_ci	 * page element have not been (possibly) DMA mapped.
44362306a36Sopenharmony_ci	 */
44462306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++) {
44562306a36Sopenharmony_ci		page = pool->alloc.cache[i];
44662306a36Sopenharmony_ci		if ((pp_flags & PP_FLAG_DMA_MAP) &&
44762306a36Sopenharmony_ci		    unlikely(!page_pool_dma_map(pool, page))) {
44862306a36Sopenharmony_ci			put_page(page);
44962306a36Sopenharmony_ci			continue;
45062306a36Sopenharmony_ci		}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci		page_pool_set_pp_info(pool, page);
45362306a36Sopenharmony_ci		pool->alloc.cache[pool->alloc.count++] = page;
45462306a36Sopenharmony_ci		/* Track how many pages are held 'in-flight' */
45562306a36Sopenharmony_ci		pool->pages_state_hold_cnt++;
45662306a36Sopenharmony_ci		trace_page_pool_state_hold(pool, page,
45762306a36Sopenharmony_ci					   pool->pages_state_hold_cnt);
45862306a36Sopenharmony_ci	}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	/* Return last page */
46162306a36Sopenharmony_ci	if (likely(pool->alloc.count > 0)) {
46262306a36Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
46362306a36Sopenharmony_ci		alloc_stat_inc(pool, slow);
46462306a36Sopenharmony_ci	} else {
46562306a36Sopenharmony_ci		page = NULL;
46662306a36Sopenharmony_ci	}
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	/* When page just alloc'ed is should/must have refcnt 1. */
46962306a36Sopenharmony_ci	return page;
47062306a36Sopenharmony_ci}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci/* For using page_pool replace: alloc_pages() API calls, but provide
47362306a36Sopenharmony_ci * synchronization guarantee for allocation side.
47462306a36Sopenharmony_ci */
47562306a36Sopenharmony_cistruct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
47662306a36Sopenharmony_ci{
47762306a36Sopenharmony_ci	struct page *page;
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	/* Fast-path: Get a page from cache */
48062306a36Sopenharmony_ci	page = __page_pool_get_cached(pool);
48162306a36Sopenharmony_ci	if (page)
48262306a36Sopenharmony_ci		return page;
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	/* Slow-path: cache empty, do real allocation */
48562306a36Sopenharmony_ci	page = __page_pool_alloc_pages_slow(pool, gfp);
48662306a36Sopenharmony_ci	return page;
48762306a36Sopenharmony_ci}
48862306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_alloc_pages);
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci/* Calculate distance between two u32 values, valid if distance is below 2^(31)
49162306a36Sopenharmony_ci *  https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
49262306a36Sopenharmony_ci */
49362306a36Sopenharmony_ci#define _distance(a, b)	(s32)((a) - (b))
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_cistatic s32 page_pool_inflight(struct page_pool *pool)
49662306a36Sopenharmony_ci{
49762306a36Sopenharmony_ci	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
49862306a36Sopenharmony_ci	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
49962306a36Sopenharmony_ci	s32 inflight;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	inflight = _distance(hold_cnt, release_cnt);
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci	trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
50462306a36Sopenharmony_ci	WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	return inflight;
50762306a36Sopenharmony_ci}
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci/* Disconnects a page (from a page_pool).  API users can have a need
51062306a36Sopenharmony_ci * to disconnect a page (from a page_pool), to allow it to be used as
51162306a36Sopenharmony_ci * a regular page (that will eventually be returned to the normal
51262306a36Sopenharmony_ci * page-allocator via put_page).
51362306a36Sopenharmony_ci */
51462306a36Sopenharmony_cistatic void page_pool_return_page(struct page_pool *pool, struct page *page)
51562306a36Sopenharmony_ci{
51662306a36Sopenharmony_ci	dma_addr_t dma;
51762306a36Sopenharmony_ci	int count;
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
52062306a36Sopenharmony_ci		/* Always account for inflight pages, even if we didn't
52162306a36Sopenharmony_ci		 * map them
52262306a36Sopenharmony_ci		 */
52362306a36Sopenharmony_ci		goto skip_dma_unmap;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	dma = page_pool_get_dma_addr(page);
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	/* When page is unmapped, it cannot be returned to our pool */
52862306a36Sopenharmony_ci	dma_unmap_page_attrs(pool->p.dev, dma,
52962306a36Sopenharmony_ci			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
53062306a36Sopenharmony_ci			     DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
53162306a36Sopenharmony_ci	page_pool_set_dma_addr(page, 0);
53262306a36Sopenharmony_ciskip_dma_unmap:
53362306a36Sopenharmony_ci	page_pool_clear_pp_info(page);
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	/* This may be the last page returned, releasing the pool, so
53662306a36Sopenharmony_ci	 * it is not safe to reference pool afterwards.
53762306a36Sopenharmony_ci	 */
53862306a36Sopenharmony_ci	count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
53962306a36Sopenharmony_ci	trace_page_pool_state_release(pool, page, count);
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	put_page(page);
54262306a36Sopenharmony_ci	/* An optimization would be to call __free_pages(page, pool->p.order)
54362306a36Sopenharmony_ci	 * knowing page is not part of page-cache (thus avoiding a
54462306a36Sopenharmony_ci	 * __page_cache_release() call).
54562306a36Sopenharmony_ci	 */
54662306a36Sopenharmony_ci}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_cistatic bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
54962306a36Sopenharmony_ci{
55062306a36Sopenharmony_ci	int ret;
55162306a36Sopenharmony_ci	/* BH protection not needed if current is softirq */
55262306a36Sopenharmony_ci	if (in_softirq())
55362306a36Sopenharmony_ci		ret = ptr_ring_produce(&pool->ring, page);
55462306a36Sopenharmony_ci	else
55562306a36Sopenharmony_ci		ret = ptr_ring_produce_bh(&pool->ring, page);
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	if (!ret) {
55862306a36Sopenharmony_ci		recycle_stat_inc(pool, ring);
55962306a36Sopenharmony_ci		return true;
56062306a36Sopenharmony_ci	}
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci	return false;
56362306a36Sopenharmony_ci}
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci/* Only allow direct recycling in special circumstances, into the
56662306a36Sopenharmony_ci * alloc side cache.  E.g. during RX-NAPI processing for XDP_DROP use-case.
56762306a36Sopenharmony_ci *
56862306a36Sopenharmony_ci * Caller must provide appropriate safe context.
56962306a36Sopenharmony_ci */
57062306a36Sopenharmony_cistatic bool page_pool_recycle_in_cache(struct page *page,
57162306a36Sopenharmony_ci				       struct page_pool *pool)
57262306a36Sopenharmony_ci{
57362306a36Sopenharmony_ci	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
57462306a36Sopenharmony_ci		recycle_stat_inc(pool, cache_full);
57562306a36Sopenharmony_ci		return false;
57662306a36Sopenharmony_ci	}
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
57962306a36Sopenharmony_ci	pool->alloc.cache[pool->alloc.count++] = page;
58062306a36Sopenharmony_ci	recycle_stat_inc(pool, cached);
58162306a36Sopenharmony_ci	return true;
58262306a36Sopenharmony_ci}
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci/* If the page refcnt == 1, this will try to recycle the page.
58562306a36Sopenharmony_ci * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
58662306a36Sopenharmony_ci * the configured size min(dma_sync_size, pool->max_len).
58762306a36Sopenharmony_ci * If the page refcnt != 1, then the page will be returned to memory
58862306a36Sopenharmony_ci * subsystem.
58962306a36Sopenharmony_ci */
59062306a36Sopenharmony_cistatic __always_inline struct page *
59162306a36Sopenharmony_ci__page_pool_put_page(struct page_pool *pool, struct page *page,
59262306a36Sopenharmony_ci		     unsigned int dma_sync_size, bool allow_direct)
59362306a36Sopenharmony_ci{
59462306a36Sopenharmony_ci	lockdep_assert_no_hardirq();
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	/* This allocator is optimized for the XDP mode that uses
59762306a36Sopenharmony_ci	 * one-frame-per-page, but have fallbacks that act like the
59862306a36Sopenharmony_ci	 * regular page allocator APIs.
59962306a36Sopenharmony_ci	 *
60062306a36Sopenharmony_ci	 * refcnt == 1 means page_pool owns page, and can recycle it.
60162306a36Sopenharmony_ci	 *
60262306a36Sopenharmony_ci	 * page is NOT reusable when allocated when system is under
60362306a36Sopenharmony_ci	 * some pressure. (page_is_pfmemalloc)
60462306a36Sopenharmony_ci	 */
60562306a36Sopenharmony_ci	if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) {
60662306a36Sopenharmony_ci		/* Read barrier done in page_ref_count / READ_ONCE */
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci		if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
60962306a36Sopenharmony_ci			page_pool_dma_sync_for_device(pool, page,
61062306a36Sopenharmony_ci						      dma_sync_size);
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci		if (allow_direct && in_softirq() &&
61362306a36Sopenharmony_ci		    page_pool_recycle_in_cache(page, pool))
61462306a36Sopenharmony_ci			return NULL;
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci		/* Page found as candidate for recycling */
61762306a36Sopenharmony_ci		return page;
61862306a36Sopenharmony_ci	}
61962306a36Sopenharmony_ci	/* Fallback/non-XDP mode: API user have elevated refcnt.
62062306a36Sopenharmony_ci	 *
62162306a36Sopenharmony_ci	 * Many drivers split up the page into fragments, and some
62262306a36Sopenharmony_ci	 * want to keep doing this to save memory and do refcnt based
62362306a36Sopenharmony_ci	 * recycling. Support this use case too, to ease drivers
62462306a36Sopenharmony_ci	 * switching between XDP/non-XDP.
62562306a36Sopenharmony_ci	 *
62662306a36Sopenharmony_ci	 * In-case page_pool maintains the DMA mapping, API user must
62762306a36Sopenharmony_ci	 * call page_pool_put_page once.  In this elevated refcnt
62862306a36Sopenharmony_ci	 * case, the DMA is unmapped/released, as driver is likely
62962306a36Sopenharmony_ci	 * doing refcnt based recycle tricks, meaning another process
63062306a36Sopenharmony_ci	 * will be invoking put_page.
63162306a36Sopenharmony_ci	 */
63262306a36Sopenharmony_ci	recycle_stat_inc(pool, released_refcnt);
63362306a36Sopenharmony_ci	page_pool_return_page(pool, page);
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	return NULL;
63662306a36Sopenharmony_ci}
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_civoid page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
63962306a36Sopenharmony_ci				  unsigned int dma_sync_size, bool allow_direct)
64062306a36Sopenharmony_ci{
64162306a36Sopenharmony_ci	page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
64262306a36Sopenharmony_ci	if (page && !page_pool_recycle_in_ring(pool, page)) {
64362306a36Sopenharmony_ci		/* Cache full, fallback to free pages */
64462306a36Sopenharmony_ci		recycle_stat_inc(pool, ring_full);
64562306a36Sopenharmony_ci		page_pool_return_page(pool, page);
64662306a36Sopenharmony_ci	}
64762306a36Sopenharmony_ci}
64862306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_put_defragged_page);
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci/**
65162306a36Sopenharmony_ci * page_pool_put_page_bulk() - release references on multiple pages
65262306a36Sopenharmony_ci * @pool:	pool from which pages were allocated
65362306a36Sopenharmony_ci * @data:	array holding page pointers
65462306a36Sopenharmony_ci * @count:	number of pages in @data
65562306a36Sopenharmony_ci *
65662306a36Sopenharmony_ci * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring
65762306a36Sopenharmony_ci * producer lock. If the ptr_ring is full, page_pool_put_page_bulk()
65862306a36Sopenharmony_ci * will release leftover pages to the page allocator.
65962306a36Sopenharmony_ci * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
66062306a36Sopenharmony_ci * completion loop for the XDP_REDIRECT use case.
66162306a36Sopenharmony_ci *
66262306a36Sopenharmony_ci * Please note the caller must not use data area after running
66362306a36Sopenharmony_ci * page_pool_put_page_bulk(), as this function overwrites it.
66462306a36Sopenharmony_ci */
66562306a36Sopenharmony_civoid page_pool_put_page_bulk(struct page_pool *pool, void **data,
66662306a36Sopenharmony_ci			     int count)
66762306a36Sopenharmony_ci{
66862306a36Sopenharmony_ci	int i, bulk_len = 0;
66962306a36Sopenharmony_ci	bool in_softirq;
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	for (i = 0; i < count; i++) {
67262306a36Sopenharmony_ci		struct page *page = virt_to_head_page(data[i]);
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci		/* It is not the last user for the page frag case */
67562306a36Sopenharmony_ci		if (!page_pool_is_last_frag(pool, page))
67662306a36Sopenharmony_ci			continue;
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci		page = __page_pool_put_page(pool, page, -1, false);
67962306a36Sopenharmony_ci		/* Approved for bulk recycling in ptr_ring cache */
68062306a36Sopenharmony_ci		if (page)
68162306a36Sopenharmony_ci			data[bulk_len++] = page;
68262306a36Sopenharmony_ci	}
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci	if (unlikely(!bulk_len))
68562306a36Sopenharmony_ci		return;
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	/* Bulk producer into ptr_ring page_pool cache */
68862306a36Sopenharmony_ci	in_softirq = page_pool_producer_lock(pool);
68962306a36Sopenharmony_ci	for (i = 0; i < bulk_len; i++) {
69062306a36Sopenharmony_ci		if (__ptr_ring_produce(&pool->ring, data[i])) {
69162306a36Sopenharmony_ci			/* ring full */
69262306a36Sopenharmony_ci			recycle_stat_inc(pool, ring_full);
69362306a36Sopenharmony_ci			break;
69462306a36Sopenharmony_ci		}
69562306a36Sopenharmony_ci	}
69662306a36Sopenharmony_ci	recycle_stat_add(pool, ring, i);
69762306a36Sopenharmony_ci	page_pool_producer_unlock(pool, in_softirq);
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	/* Hopefully all pages was return into ptr_ring */
70062306a36Sopenharmony_ci	if (likely(i == bulk_len))
70162306a36Sopenharmony_ci		return;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	/* ptr_ring cache full, free remaining pages outside producer lock
70462306a36Sopenharmony_ci	 * since put_page() with refcnt == 1 can be an expensive operation
70562306a36Sopenharmony_ci	 */
70662306a36Sopenharmony_ci	for (; i < bulk_len; i++)
70762306a36Sopenharmony_ci		page_pool_return_page(pool, data[i]);
70862306a36Sopenharmony_ci}
70962306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_put_page_bulk);
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_cistatic struct page *page_pool_drain_frag(struct page_pool *pool,
71262306a36Sopenharmony_ci					 struct page *page)
71362306a36Sopenharmony_ci{
71462306a36Sopenharmony_ci	long drain_count = BIAS_MAX - pool->frag_users;
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	/* Some user is still using the page frag */
71762306a36Sopenharmony_ci	if (likely(page_pool_defrag_page(page, drain_count)))
71862306a36Sopenharmony_ci		return NULL;
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci	if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
72162306a36Sopenharmony_ci		if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
72262306a36Sopenharmony_ci			page_pool_dma_sync_for_device(pool, page, -1);
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci		return page;
72562306a36Sopenharmony_ci	}
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci	page_pool_return_page(pool, page);
72862306a36Sopenharmony_ci	return NULL;
72962306a36Sopenharmony_ci}
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_cistatic void page_pool_free_frag(struct page_pool *pool)
73262306a36Sopenharmony_ci{
73362306a36Sopenharmony_ci	long drain_count = BIAS_MAX - pool->frag_users;
73462306a36Sopenharmony_ci	struct page *page = pool->frag_page;
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	pool->frag_page = NULL;
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	if (!page || page_pool_defrag_page(page, drain_count))
73962306a36Sopenharmony_ci		return;
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	page_pool_return_page(pool, page);
74262306a36Sopenharmony_ci}
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_cistruct page *page_pool_alloc_frag(struct page_pool *pool,
74562306a36Sopenharmony_ci				  unsigned int *offset,
74662306a36Sopenharmony_ci				  unsigned int size, gfp_t gfp)
74762306a36Sopenharmony_ci{
74862306a36Sopenharmony_ci	unsigned int max_size = PAGE_SIZE << pool->p.order;
74962306a36Sopenharmony_ci	struct page *page = pool->frag_page;
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
75262306a36Sopenharmony_ci		    size > max_size))
75362306a36Sopenharmony_ci		return NULL;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	size = ALIGN(size, dma_get_cache_alignment());
75662306a36Sopenharmony_ci	*offset = pool->frag_offset;
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci	if (page && *offset + size > max_size) {
75962306a36Sopenharmony_ci		page = page_pool_drain_frag(pool, page);
76062306a36Sopenharmony_ci		if (page) {
76162306a36Sopenharmony_ci			alloc_stat_inc(pool, fast);
76262306a36Sopenharmony_ci			goto frag_reset;
76362306a36Sopenharmony_ci		}
76462306a36Sopenharmony_ci	}
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci	if (!page) {
76762306a36Sopenharmony_ci		page = page_pool_alloc_pages(pool, gfp);
76862306a36Sopenharmony_ci		if (unlikely(!page)) {
76962306a36Sopenharmony_ci			pool->frag_page = NULL;
77062306a36Sopenharmony_ci			return NULL;
77162306a36Sopenharmony_ci		}
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci		pool->frag_page = page;
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_cifrag_reset:
77662306a36Sopenharmony_ci		pool->frag_users = 1;
77762306a36Sopenharmony_ci		*offset = 0;
77862306a36Sopenharmony_ci		pool->frag_offset = size;
77962306a36Sopenharmony_ci		page_pool_fragment_page(page, BIAS_MAX);
78062306a36Sopenharmony_ci		return page;
78162306a36Sopenharmony_ci	}
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	pool->frag_users++;
78462306a36Sopenharmony_ci	pool->frag_offset = *offset + size;
78562306a36Sopenharmony_ci	alloc_stat_inc(pool, fast);
78662306a36Sopenharmony_ci	return page;
78762306a36Sopenharmony_ci}
78862306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_alloc_frag);
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_cistatic void page_pool_empty_ring(struct page_pool *pool)
79162306a36Sopenharmony_ci{
79262306a36Sopenharmony_ci	struct page *page;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	/* Empty recycle ring */
79562306a36Sopenharmony_ci	while ((page = ptr_ring_consume_bh(&pool->ring))) {
79662306a36Sopenharmony_ci		/* Verify the refcnt invariant of cached pages */
79762306a36Sopenharmony_ci		if (!(page_ref_count(page) == 1))
79862306a36Sopenharmony_ci			pr_crit("%s() page_pool refcnt %d violation\n",
79962306a36Sopenharmony_ci				__func__, page_ref_count(page));
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci		page_pool_return_page(pool, page);
80262306a36Sopenharmony_ci	}
80362306a36Sopenharmony_ci}
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_cistatic void page_pool_free(struct page_pool *pool)
80662306a36Sopenharmony_ci{
80762306a36Sopenharmony_ci	if (pool->disconnect)
80862306a36Sopenharmony_ci		pool->disconnect(pool);
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	ptr_ring_cleanup(&pool->ring, NULL);
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci	if (pool->p.flags & PP_FLAG_DMA_MAP)
81362306a36Sopenharmony_ci		put_device(pool->p.dev);
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS
81662306a36Sopenharmony_ci	free_percpu(pool->recycle_stats);
81762306a36Sopenharmony_ci#endif
81862306a36Sopenharmony_ci	kfree(pool);
81962306a36Sopenharmony_ci}
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_cistatic void page_pool_empty_alloc_cache_once(struct page_pool *pool)
82262306a36Sopenharmony_ci{
82362306a36Sopenharmony_ci	struct page *page;
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	if (pool->destroy_cnt)
82662306a36Sopenharmony_ci		return;
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	/* Empty alloc cache, assume caller made sure this is
82962306a36Sopenharmony_ci	 * no-longer in use, and page_pool_alloc_pages() cannot be
83062306a36Sopenharmony_ci	 * call concurrently.
83162306a36Sopenharmony_ci	 */
83262306a36Sopenharmony_ci	while (pool->alloc.count) {
83362306a36Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
83462306a36Sopenharmony_ci		page_pool_return_page(pool, page);
83562306a36Sopenharmony_ci	}
83662306a36Sopenharmony_ci}
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_cistatic void page_pool_scrub(struct page_pool *pool)
83962306a36Sopenharmony_ci{
84062306a36Sopenharmony_ci	page_pool_empty_alloc_cache_once(pool);
84162306a36Sopenharmony_ci	pool->destroy_cnt++;
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci	/* No more consumers should exist, but producers could still
84462306a36Sopenharmony_ci	 * be in-flight.
84562306a36Sopenharmony_ci	 */
84662306a36Sopenharmony_ci	page_pool_empty_ring(pool);
84762306a36Sopenharmony_ci}
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_cistatic int page_pool_release(struct page_pool *pool)
85062306a36Sopenharmony_ci{
85162306a36Sopenharmony_ci	int inflight;
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	page_pool_scrub(pool);
85462306a36Sopenharmony_ci	inflight = page_pool_inflight(pool);
85562306a36Sopenharmony_ci	if (!inflight)
85662306a36Sopenharmony_ci		page_pool_free(pool);
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	return inflight;
85962306a36Sopenharmony_ci}
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_cistatic void page_pool_release_retry(struct work_struct *wq)
86262306a36Sopenharmony_ci{
86362306a36Sopenharmony_ci	struct delayed_work *dwq = to_delayed_work(wq);
86462306a36Sopenharmony_ci	struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
86562306a36Sopenharmony_ci	int inflight;
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	inflight = page_pool_release(pool);
86862306a36Sopenharmony_ci	if (!inflight)
86962306a36Sopenharmony_ci		return;
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	/* Periodic warning */
87262306a36Sopenharmony_ci	if (time_after_eq(jiffies, pool->defer_warn)) {
87362306a36Sopenharmony_ci		int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci		pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
87662306a36Sopenharmony_ci			__func__, inflight, sec);
87762306a36Sopenharmony_ci		pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
87862306a36Sopenharmony_ci	}
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	/* Still not ready to be disconnected, retry later */
88162306a36Sopenharmony_ci	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
88262306a36Sopenharmony_ci}
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_civoid page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
88562306a36Sopenharmony_ci			   struct xdp_mem_info *mem)
88662306a36Sopenharmony_ci{
88762306a36Sopenharmony_ci	refcount_inc(&pool->user_cnt);
88862306a36Sopenharmony_ci	pool->disconnect = disconnect;
88962306a36Sopenharmony_ci	pool->xdp_mem_id = mem->id;
89062306a36Sopenharmony_ci}
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_civoid page_pool_unlink_napi(struct page_pool *pool)
89362306a36Sopenharmony_ci{
89462306a36Sopenharmony_ci	if (!pool->p.napi)
89562306a36Sopenharmony_ci		return;
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	/* To avoid races with recycling and additional barriers make sure
89862306a36Sopenharmony_ci	 * pool and NAPI are unlinked when NAPI is disabled.
89962306a36Sopenharmony_ci	 */
90062306a36Sopenharmony_ci	WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state) ||
90162306a36Sopenharmony_ci		READ_ONCE(pool->p.napi->list_owner) != -1);
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	WRITE_ONCE(pool->p.napi, NULL);
90462306a36Sopenharmony_ci}
90562306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_unlink_napi);
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_civoid page_pool_destroy(struct page_pool *pool)
90862306a36Sopenharmony_ci{
90962306a36Sopenharmony_ci	if (!pool)
91062306a36Sopenharmony_ci		return;
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	if (!page_pool_put(pool))
91362306a36Sopenharmony_ci		return;
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_ci	page_pool_unlink_napi(pool);
91662306a36Sopenharmony_ci	page_pool_free_frag(pool);
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_ci	if (!page_pool_release(pool))
91962306a36Sopenharmony_ci		return;
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	pool->defer_start = jiffies;
92262306a36Sopenharmony_ci	pool->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci	INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
92562306a36Sopenharmony_ci	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
92662306a36Sopenharmony_ci}
92762306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_destroy);
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci/* Caller must provide appropriate safe context, e.g. NAPI. */
93062306a36Sopenharmony_civoid page_pool_update_nid(struct page_pool *pool, int new_nid)
93162306a36Sopenharmony_ci{
93262306a36Sopenharmony_ci	struct page *page;
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	trace_page_pool_update_nid(pool, new_nid);
93562306a36Sopenharmony_ci	pool->p.nid = new_nid;
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	/* Flush pool alloc cache, as refill will check NUMA node */
93862306a36Sopenharmony_ci	while (pool->alloc.count) {
93962306a36Sopenharmony_ci		page = pool->alloc.cache[--pool->alloc.count];
94062306a36Sopenharmony_ci		page_pool_return_page(pool, page);
94162306a36Sopenharmony_ci	}
94262306a36Sopenharmony_ci}
94362306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_update_nid);
944