162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci * 362306a36Sopenharmony_ci * page_pool.c 462306a36Sopenharmony_ci * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> 562306a36Sopenharmony_ci * Copyright (C) 2016 Red Hat, Inc. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/types.h> 962306a36Sopenharmony_ci#include <linux/kernel.h> 1062306a36Sopenharmony_ci#include <linux/slab.h> 1162306a36Sopenharmony_ci#include <linux/device.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <net/page_pool/helpers.h> 1462306a36Sopenharmony_ci#include <net/xdp.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <linux/dma-direction.h> 1762306a36Sopenharmony_ci#include <linux/dma-mapping.h> 1862306a36Sopenharmony_ci#include <linux/page-flags.h> 1962306a36Sopenharmony_ci#include <linux/mm.h> /* for put_page() */ 2062306a36Sopenharmony_ci#include <linux/poison.h> 2162306a36Sopenharmony_ci#include <linux/ethtool.h> 2262306a36Sopenharmony_ci#include <linux/netdevice.h> 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include <trace/events/page_pool.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#define DEFER_TIME (msecs_to_jiffies(1000)) 2762306a36Sopenharmony_ci#define DEFER_WARN_INTERVAL (60 * HZ) 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#define BIAS_MAX LONG_MAX 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS 3262306a36Sopenharmony_ci/* alloc_stat_inc is intended to be used in softirq context */ 3362306a36Sopenharmony_ci#define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) 3462306a36Sopenharmony_ci/* recycle_stat_inc is safe to use when preemption is possible. */ 3562306a36Sopenharmony_ci#define recycle_stat_inc(pool, __stat) \ 3662306a36Sopenharmony_ci do { \ 3762306a36Sopenharmony_ci struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ 3862306a36Sopenharmony_ci this_cpu_inc(s->__stat); \ 3962306a36Sopenharmony_ci } while (0) 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define recycle_stat_add(pool, __stat, val) \ 4262306a36Sopenharmony_ci do { \ 4362306a36Sopenharmony_ci struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ 4462306a36Sopenharmony_ci this_cpu_add(s->__stat, val); \ 4562306a36Sopenharmony_ci } while (0) 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_cistatic const char pp_stats[][ETH_GSTRING_LEN] = { 4862306a36Sopenharmony_ci "rx_pp_alloc_fast", 4962306a36Sopenharmony_ci "rx_pp_alloc_slow", 5062306a36Sopenharmony_ci "rx_pp_alloc_slow_ho", 5162306a36Sopenharmony_ci "rx_pp_alloc_empty", 5262306a36Sopenharmony_ci "rx_pp_alloc_refill", 5362306a36Sopenharmony_ci "rx_pp_alloc_waive", 5462306a36Sopenharmony_ci "rx_pp_recycle_cached", 5562306a36Sopenharmony_ci "rx_pp_recycle_cache_full", 5662306a36Sopenharmony_ci "rx_pp_recycle_ring", 5762306a36Sopenharmony_ci "rx_pp_recycle_ring_full", 5862306a36Sopenharmony_ci "rx_pp_recycle_released_ref", 5962306a36Sopenharmony_ci}; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci/** 6262306a36Sopenharmony_ci * page_pool_get_stats() - fetch page pool stats 6362306a36Sopenharmony_ci * @pool: pool from which page was allocated 6462306a36Sopenharmony_ci * @stats: struct page_pool_stats to fill in 6562306a36Sopenharmony_ci * 6662306a36Sopenharmony_ci * Retrieve statistics about the page_pool. This API is only available 6762306a36Sopenharmony_ci * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``. 6862306a36Sopenharmony_ci * A pointer to a caller allocated struct page_pool_stats structure 6962306a36Sopenharmony_ci * is passed to this API which is filled in. The caller can then report 7062306a36Sopenharmony_ci * those stats to the user (perhaps via ethtool, debugfs, etc.). 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_cibool page_pool_get_stats(struct page_pool *pool, 7362306a36Sopenharmony_ci struct page_pool_stats *stats) 7462306a36Sopenharmony_ci{ 7562306a36Sopenharmony_ci int cpu = 0; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci if (!stats) 7862306a36Sopenharmony_ci return false; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci /* The caller is responsible to initialize stats. */ 8162306a36Sopenharmony_ci stats->alloc_stats.fast += pool->alloc_stats.fast; 8262306a36Sopenharmony_ci stats->alloc_stats.slow += pool->alloc_stats.slow; 8362306a36Sopenharmony_ci stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order; 8462306a36Sopenharmony_ci stats->alloc_stats.empty += pool->alloc_stats.empty; 8562306a36Sopenharmony_ci stats->alloc_stats.refill += pool->alloc_stats.refill; 8662306a36Sopenharmony_ci stats->alloc_stats.waive += pool->alloc_stats.waive; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 8962306a36Sopenharmony_ci const struct page_pool_recycle_stats *pcpu = 9062306a36Sopenharmony_ci per_cpu_ptr(pool->recycle_stats, cpu); 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci stats->recycle_stats.cached += pcpu->cached; 9362306a36Sopenharmony_ci stats->recycle_stats.cache_full += pcpu->cache_full; 9462306a36Sopenharmony_ci stats->recycle_stats.ring += pcpu->ring; 9562306a36Sopenharmony_ci stats->recycle_stats.ring_full += pcpu->ring_full; 9662306a36Sopenharmony_ci stats->recycle_stats.released_refcnt += pcpu->released_refcnt; 9762306a36Sopenharmony_ci } 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci return true; 10062306a36Sopenharmony_ci} 10162306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_get_stats); 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ciu8 *page_pool_ethtool_stats_get_strings(u8 *data) 10462306a36Sopenharmony_ci{ 10562306a36Sopenharmony_ci int i; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(pp_stats); i++) { 10862306a36Sopenharmony_ci memcpy(data, pp_stats[i], ETH_GSTRING_LEN); 10962306a36Sopenharmony_ci data += ETH_GSTRING_LEN; 11062306a36Sopenharmony_ci } 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci return data; 11362306a36Sopenharmony_ci} 11462306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_ethtool_stats_get_strings); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ciint page_pool_ethtool_stats_get_count(void) 11762306a36Sopenharmony_ci{ 11862306a36Sopenharmony_ci return ARRAY_SIZE(pp_stats); 11962306a36Sopenharmony_ci} 12062306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_ethtool_stats_get_count); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ciu64 *page_pool_ethtool_stats_get(u64 *data, void *stats) 12362306a36Sopenharmony_ci{ 12462306a36Sopenharmony_ci struct page_pool_stats *pool_stats = stats; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci *data++ = pool_stats->alloc_stats.fast; 12762306a36Sopenharmony_ci *data++ = pool_stats->alloc_stats.slow; 12862306a36Sopenharmony_ci *data++ = pool_stats->alloc_stats.slow_high_order; 12962306a36Sopenharmony_ci *data++ = pool_stats->alloc_stats.empty; 13062306a36Sopenharmony_ci *data++ = pool_stats->alloc_stats.refill; 13162306a36Sopenharmony_ci *data++ = pool_stats->alloc_stats.waive; 13262306a36Sopenharmony_ci *data++ = pool_stats->recycle_stats.cached; 13362306a36Sopenharmony_ci *data++ = pool_stats->recycle_stats.cache_full; 13462306a36Sopenharmony_ci *data++ = pool_stats->recycle_stats.ring; 13562306a36Sopenharmony_ci *data++ = pool_stats->recycle_stats.ring_full; 13662306a36Sopenharmony_ci *data++ = pool_stats->recycle_stats.released_refcnt; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci return data; 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_ethtool_stats_get); 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci#else 14362306a36Sopenharmony_ci#define alloc_stat_inc(pool, __stat) 14462306a36Sopenharmony_ci#define recycle_stat_inc(pool, __stat) 14562306a36Sopenharmony_ci#define recycle_stat_add(pool, __stat, val) 14662306a36Sopenharmony_ci#endif 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_cistatic bool page_pool_producer_lock(struct page_pool *pool) 14962306a36Sopenharmony_ci __acquires(&pool->ring.producer_lock) 15062306a36Sopenharmony_ci{ 15162306a36Sopenharmony_ci bool in_softirq = in_softirq(); 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci if (in_softirq) 15462306a36Sopenharmony_ci spin_lock(&pool->ring.producer_lock); 15562306a36Sopenharmony_ci else 15662306a36Sopenharmony_ci spin_lock_bh(&pool->ring.producer_lock); 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci return in_softirq; 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cistatic void page_pool_producer_unlock(struct page_pool *pool, 16262306a36Sopenharmony_ci bool in_softirq) 16362306a36Sopenharmony_ci __releases(&pool->ring.producer_lock) 16462306a36Sopenharmony_ci{ 16562306a36Sopenharmony_ci if (in_softirq) 16662306a36Sopenharmony_ci spin_unlock(&pool->ring.producer_lock); 16762306a36Sopenharmony_ci else 16862306a36Sopenharmony_ci spin_unlock_bh(&pool->ring.producer_lock); 16962306a36Sopenharmony_ci} 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_cistatic int page_pool_init(struct page_pool *pool, 17262306a36Sopenharmony_ci const struct page_pool_params *params) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci unsigned int ring_qsize = 1024; /* Default */ 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci memcpy(&pool->p, params, sizeof(pool->p)); 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci /* Validate only known flags were used */ 17962306a36Sopenharmony_ci if (pool->p.flags & ~(PP_FLAG_ALL)) 18062306a36Sopenharmony_ci return -EINVAL; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci if (pool->p.pool_size) 18362306a36Sopenharmony_ci ring_qsize = pool->p.pool_size; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci /* Sanity limit mem that can be pinned down */ 18662306a36Sopenharmony_ci if (ring_qsize > 32768) 18762306a36Sopenharmony_ci return -E2BIG; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. 19062306a36Sopenharmony_ci * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, 19162306a36Sopenharmony_ci * which is the XDP_TX use-case. 19262306a36Sopenharmony_ci */ 19362306a36Sopenharmony_ci if (pool->p.flags & PP_FLAG_DMA_MAP) { 19462306a36Sopenharmony_ci if ((pool->p.dma_dir != DMA_FROM_DEVICE) && 19562306a36Sopenharmony_ci (pool->p.dma_dir != DMA_BIDIRECTIONAL)) 19662306a36Sopenharmony_ci return -EINVAL; 19762306a36Sopenharmony_ci } 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) { 20062306a36Sopenharmony_ci /* In order to request DMA-sync-for-device the page 20162306a36Sopenharmony_ci * needs to be mapped 20262306a36Sopenharmony_ci */ 20362306a36Sopenharmony_ci if (!(pool->p.flags & PP_FLAG_DMA_MAP)) 20462306a36Sopenharmony_ci return -EINVAL; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci if (!pool->p.max_len) 20762306a36Sopenharmony_ci return -EINVAL; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci /* pool->p.offset has to be set according to the address 21062306a36Sopenharmony_ci * offset used by the DMA engine to start copying rx data 21162306a36Sopenharmony_ci */ 21262306a36Sopenharmony_ci } 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && 21562306a36Sopenharmony_ci pool->p.flags & PP_FLAG_PAGE_FRAG) 21662306a36Sopenharmony_ci return -EINVAL; 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS 21962306a36Sopenharmony_ci pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); 22062306a36Sopenharmony_ci if (!pool->recycle_stats) 22162306a36Sopenharmony_ci return -ENOMEM; 22262306a36Sopenharmony_ci#endif 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { 22562306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS 22662306a36Sopenharmony_ci free_percpu(pool->recycle_stats); 22762306a36Sopenharmony_ci#endif 22862306a36Sopenharmony_ci return -ENOMEM; 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci atomic_set(&pool->pages_state_release_cnt, 0); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci /* Driver calling page_pool_create() also call page_pool_destroy() */ 23462306a36Sopenharmony_ci refcount_set(&pool->user_cnt, 1); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci if (pool->p.flags & PP_FLAG_DMA_MAP) 23762306a36Sopenharmony_ci get_device(pool->p.dev); 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci return 0; 24062306a36Sopenharmony_ci} 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci/** 24362306a36Sopenharmony_ci * page_pool_create() - create a page pool. 24462306a36Sopenharmony_ci * @params: parameters, see struct page_pool_params 24562306a36Sopenharmony_ci */ 24662306a36Sopenharmony_cistruct page_pool *page_pool_create(const struct page_pool_params *params) 24762306a36Sopenharmony_ci{ 24862306a36Sopenharmony_ci struct page_pool *pool; 24962306a36Sopenharmony_ci int err; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); 25262306a36Sopenharmony_ci if (!pool) 25362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci err = page_pool_init(pool, params); 25662306a36Sopenharmony_ci if (err < 0) { 25762306a36Sopenharmony_ci pr_warn("%s() gave up with errno %d\n", __func__, err); 25862306a36Sopenharmony_ci kfree(pool); 25962306a36Sopenharmony_ci return ERR_PTR(err); 26062306a36Sopenharmony_ci } 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci return pool; 26362306a36Sopenharmony_ci} 26462306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_create); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_cistatic void page_pool_return_page(struct page_pool *pool, struct page *page); 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_cinoinline 26962306a36Sopenharmony_cistatic struct page *page_pool_refill_alloc_cache(struct page_pool *pool) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci struct ptr_ring *r = &pool->ring; 27262306a36Sopenharmony_ci struct page *page; 27362306a36Sopenharmony_ci int pref_nid; /* preferred NUMA node */ 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci /* Quicker fallback, avoid locks when ring is empty */ 27662306a36Sopenharmony_ci if (__ptr_ring_empty(r)) { 27762306a36Sopenharmony_ci alloc_stat_inc(pool, empty); 27862306a36Sopenharmony_ci return NULL; 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci /* Softirq guarantee CPU and thus NUMA node is stable. This, 28262306a36Sopenharmony_ci * assumes CPU refilling driver RX-ring will also run RX-NAPI. 28362306a36Sopenharmony_ci */ 28462306a36Sopenharmony_ci#ifdef CONFIG_NUMA 28562306a36Sopenharmony_ci pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid; 28662306a36Sopenharmony_ci#else 28762306a36Sopenharmony_ci /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */ 28862306a36Sopenharmony_ci pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */ 28962306a36Sopenharmony_ci#endif 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci /* Refill alloc array, but only if NUMA match */ 29262306a36Sopenharmony_ci do { 29362306a36Sopenharmony_ci page = __ptr_ring_consume(r); 29462306a36Sopenharmony_ci if (unlikely(!page)) 29562306a36Sopenharmony_ci break; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci if (likely(page_to_nid(page) == pref_nid)) { 29862306a36Sopenharmony_ci pool->alloc.cache[pool->alloc.count++] = page; 29962306a36Sopenharmony_ci } else { 30062306a36Sopenharmony_ci /* NUMA mismatch; 30162306a36Sopenharmony_ci * (1) release 1 page to page-allocator and 30262306a36Sopenharmony_ci * (2) break out to fallthrough to alloc_pages_node. 30362306a36Sopenharmony_ci * This limit stress on page buddy alloactor. 30462306a36Sopenharmony_ci */ 30562306a36Sopenharmony_ci page_pool_return_page(pool, page); 30662306a36Sopenharmony_ci alloc_stat_inc(pool, waive); 30762306a36Sopenharmony_ci page = NULL; 30862306a36Sopenharmony_ci break; 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci /* Return last page */ 31362306a36Sopenharmony_ci if (likely(pool->alloc.count > 0)) { 31462306a36Sopenharmony_ci page = pool->alloc.cache[--pool->alloc.count]; 31562306a36Sopenharmony_ci alloc_stat_inc(pool, refill); 31662306a36Sopenharmony_ci } 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci return page; 31962306a36Sopenharmony_ci} 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci/* fast path */ 32262306a36Sopenharmony_cistatic struct page *__page_pool_get_cached(struct page_pool *pool) 32362306a36Sopenharmony_ci{ 32462306a36Sopenharmony_ci struct page *page; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ 32762306a36Sopenharmony_ci if (likely(pool->alloc.count)) { 32862306a36Sopenharmony_ci /* Fast-path */ 32962306a36Sopenharmony_ci page = pool->alloc.cache[--pool->alloc.count]; 33062306a36Sopenharmony_ci alloc_stat_inc(pool, fast); 33162306a36Sopenharmony_ci } else { 33262306a36Sopenharmony_ci page = page_pool_refill_alloc_cache(pool); 33362306a36Sopenharmony_ci } 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci return page; 33662306a36Sopenharmony_ci} 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_cistatic void page_pool_dma_sync_for_device(struct page_pool *pool, 33962306a36Sopenharmony_ci struct page *page, 34062306a36Sopenharmony_ci unsigned int dma_sync_size) 34162306a36Sopenharmony_ci{ 34262306a36Sopenharmony_ci dma_addr_t dma_addr = page_pool_get_dma_addr(page); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci dma_sync_size = min(dma_sync_size, pool->p.max_len); 34562306a36Sopenharmony_ci dma_sync_single_range_for_device(pool->p.dev, dma_addr, 34662306a36Sopenharmony_ci pool->p.offset, dma_sync_size, 34762306a36Sopenharmony_ci pool->p.dma_dir); 34862306a36Sopenharmony_ci} 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_cistatic bool page_pool_dma_map(struct page_pool *pool, struct page *page) 35162306a36Sopenharmony_ci{ 35262306a36Sopenharmony_ci dma_addr_t dma; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci /* Setup DMA mapping: use 'struct page' area for storing DMA-addr 35562306a36Sopenharmony_ci * since dma_addr_t can be either 32 or 64 bits and does not always fit 35662306a36Sopenharmony_ci * into page private data (i.e 32bit cpu with 64bit DMA caps) 35762306a36Sopenharmony_ci * This mapping is kept for lifetime of page, until leaving pool. 35862306a36Sopenharmony_ci */ 35962306a36Sopenharmony_ci dma = dma_map_page_attrs(pool->p.dev, page, 0, 36062306a36Sopenharmony_ci (PAGE_SIZE << pool->p.order), 36162306a36Sopenharmony_ci pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | 36262306a36Sopenharmony_ci DMA_ATTR_WEAK_ORDERING); 36362306a36Sopenharmony_ci if (dma_mapping_error(pool->p.dev, dma)) 36462306a36Sopenharmony_ci return false; 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci page_pool_set_dma_addr(page, dma); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) 36962306a36Sopenharmony_ci page_pool_dma_sync_for_device(pool, page, pool->p.max_len); 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci return true; 37262306a36Sopenharmony_ci} 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_cistatic void page_pool_set_pp_info(struct page_pool *pool, 37562306a36Sopenharmony_ci struct page *page) 37662306a36Sopenharmony_ci{ 37762306a36Sopenharmony_ci page->pp = pool; 37862306a36Sopenharmony_ci page->pp_magic |= PP_SIGNATURE; 37962306a36Sopenharmony_ci if (pool->p.init_callback) 38062306a36Sopenharmony_ci pool->p.init_callback(page, pool->p.init_arg); 38162306a36Sopenharmony_ci} 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_cistatic void page_pool_clear_pp_info(struct page *page) 38462306a36Sopenharmony_ci{ 38562306a36Sopenharmony_ci page->pp_magic = 0; 38662306a36Sopenharmony_ci page->pp = NULL; 38762306a36Sopenharmony_ci} 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_cistatic struct page *__page_pool_alloc_page_order(struct page_pool *pool, 39062306a36Sopenharmony_ci gfp_t gfp) 39162306a36Sopenharmony_ci{ 39262306a36Sopenharmony_ci struct page *page; 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci gfp |= __GFP_COMP; 39562306a36Sopenharmony_ci page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); 39662306a36Sopenharmony_ci if (unlikely(!page)) 39762306a36Sopenharmony_ci return NULL; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci if ((pool->p.flags & PP_FLAG_DMA_MAP) && 40062306a36Sopenharmony_ci unlikely(!page_pool_dma_map(pool, page))) { 40162306a36Sopenharmony_ci put_page(page); 40262306a36Sopenharmony_ci return NULL; 40362306a36Sopenharmony_ci } 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci alloc_stat_inc(pool, slow_high_order); 40662306a36Sopenharmony_ci page_pool_set_pp_info(pool, page); 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci /* Track how many pages are held 'in-flight' */ 40962306a36Sopenharmony_ci pool->pages_state_hold_cnt++; 41062306a36Sopenharmony_ci trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt); 41162306a36Sopenharmony_ci return page; 41262306a36Sopenharmony_ci} 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci/* slow path */ 41562306a36Sopenharmony_cinoinline 41662306a36Sopenharmony_cistatic struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, 41762306a36Sopenharmony_ci gfp_t gfp) 41862306a36Sopenharmony_ci{ 41962306a36Sopenharmony_ci const int bulk = PP_ALLOC_CACHE_REFILL; 42062306a36Sopenharmony_ci unsigned int pp_flags = pool->p.flags; 42162306a36Sopenharmony_ci unsigned int pp_order = pool->p.order; 42262306a36Sopenharmony_ci struct page *page; 42362306a36Sopenharmony_ci int i, nr_pages; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci /* Don't support bulk alloc for high-order pages */ 42662306a36Sopenharmony_ci if (unlikely(pp_order)) 42762306a36Sopenharmony_ci return __page_pool_alloc_page_order(pool, gfp); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci /* Unnecessary as alloc cache is empty, but guarantees zero count */ 43062306a36Sopenharmony_ci if (unlikely(pool->alloc.count > 0)) 43162306a36Sopenharmony_ci return pool->alloc.cache[--pool->alloc.count]; 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ 43462306a36Sopenharmony_ci memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk, 43762306a36Sopenharmony_ci pool->alloc.cache); 43862306a36Sopenharmony_ci if (unlikely(!nr_pages)) 43962306a36Sopenharmony_ci return NULL; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci /* Pages have been filled into alloc.cache array, but count is zero and 44262306a36Sopenharmony_ci * page element have not been (possibly) DMA mapped. 44362306a36Sopenharmony_ci */ 44462306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 44562306a36Sopenharmony_ci page = pool->alloc.cache[i]; 44662306a36Sopenharmony_ci if ((pp_flags & PP_FLAG_DMA_MAP) && 44762306a36Sopenharmony_ci unlikely(!page_pool_dma_map(pool, page))) { 44862306a36Sopenharmony_ci put_page(page); 44962306a36Sopenharmony_ci continue; 45062306a36Sopenharmony_ci } 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci page_pool_set_pp_info(pool, page); 45362306a36Sopenharmony_ci pool->alloc.cache[pool->alloc.count++] = page; 45462306a36Sopenharmony_ci /* Track how many pages are held 'in-flight' */ 45562306a36Sopenharmony_ci pool->pages_state_hold_cnt++; 45662306a36Sopenharmony_ci trace_page_pool_state_hold(pool, page, 45762306a36Sopenharmony_ci pool->pages_state_hold_cnt); 45862306a36Sopenharmony_ci } 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci /* Return last page */ 46162306a36Sopenharmony_ci if (likely(pool->alloc.count > 0)) { 46262306a36Sopenharmony_ci page = pool->alloc.cache[--pool->alloc.count]; 46362306a36Sopenharmony_ci alloc_stat_inc(pool, slow); 46462306a36Sopenharmony_ci } else { 46562306a36Sopenharmony_ci page = NULL; 46662306a36Sopenharmony_ci } 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci /* When page just alloc'ed is should/must have refcnt 1. */ 46962306a36Sopenharmony_ci return page; 47062306a36Sopenharmony_ci} 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci/* For using page_pool replace: alloc_pages() API calls, but provide 47362306a36Sopenharmony_ci * synchronization guarantee for allocation side. 47462306a36Sopenharmony_ci */ 47562306a36Sopenharmony_cistruct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) 47662306a36Sopenharmony_ci{ 47762306a36Sopenharmony_ci struct page *page; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci /* Fast-path: Get a page from cache */ 48062306a36Sopenharmony_ci page = __page_pool_get_cached(pool); 48162306a36Sopenharmony_ci if (page) 48262306a36Sopenharmony_ci return page; 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci /* Slow-path: cache empty, do real allocation */ 48562306a36Sopenharmony_ci page = __page_pool_alloc_pages_slow(pool, gfp); 48662306a36Sopenharmony_ci return page; 48762306a36Sopenharmony_ci} 48862306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_alloc_pages); 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci/* Calculate distance between two u32 values, valid if distance is below 2^(31) 49162306a36Sopenharmony_ci * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution 49262306a36Sopenharmony_ci */ 49362306a36Sopenharmony_ci#define _distance(a, b) (s32)((a) - (b)) 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_cistatic s32 page_pool_inflight(struct page_pool *pool) 49662306a36Sopenharmony_ci{ 49762306a36Sopenharmony_ci u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); 49862306a36Sopenharmony_ci u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); 49962306a36Sopenharmony_ci s32 inflight; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci inflight = _distance(hold_cnt, release_cnt); 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); 50462306a36Sopenharmony_ci WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci return inflight; 50762306a36Sopenharmony_ci} 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci/* Disconnects a page (from a page_pool). API users can have a need 51062306a36Sopenharmony_ci * to disconnect a page (from a page_pool), to allow it to be used as 51162306a36Sopenharmony_ci * a regular page (that will eventually be returned to the normal 51262306a36Sopenharmony_ci * page-allocator via put_page). 51362306a36Sopenharmony_ci */ 51462306a36Sopenharmony_cistatic void page_pool_return_page(struct page_pool *pool, struct page *page) 51562306a36Sopenharmony_ci{ 51662306a36Sopenharmony_ci dma_addr_t dma; 51762306a36Sopenharmony_ci int count; 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci if (!(pool->p.flags & PP_FLAG_DMA_MAP)) 52062306a36Sopenharmony_ci /* Always account for inflight pages, even if we didn't 52162306a36Sopenharmony_ci * map them 52262306a36Sopenharmony_ci */ 52362306a36Sopenharmony_ci goto skip_dma_unmap; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci dma = page_pool_get_dma_addr(page); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci /* When page is unmapped, it cannot be returned to our pool */ 52862306a36Sopenharmony_ci dma_unmap_page_attrs(pool->p.dev, dma, 52962306a36Sopenharmony_ci PAGE_SIZE << pool->p.order, pool->p.dma_dir, 53062306a36Sopenharmony_ci DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 53162306a36Sopenharmony_ci page_pool_set_dma_addr(page, 0); 53262306a36Sopenharmony_ciskip_dma_unmap: 53362306a36Sopenharmony_ci page_pool_clear_pp_info(page); 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci /* This may be the last page returned, releasing the pool, so 53662306a36Sopenharmony_ci * it is not safe to reference pool afterwards. 53762306a36Sopenharmony_ci */ 53862306a36Sopenharmony_ci count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); 53962306a36Sopenharmony_ci trace_page_pool_state_release(pool, page, count); 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci put_page(page); 54262306a36Sopenharmony_ci /* An optimization would be to call __free_pages(page, pool->p.order) 54362306a36Sopenharmony_ci * knowing page is not part of page-cache (thus avoiding a 54462306a36Sopenharmony_ci * __page_cache_release() call). 54562306a36Sopenharmony_ci */ 54662306a36Sopenharmony_ci} 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_cistatic bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) 54962306a36Sopenharmony_ci{ 55062306a36Sopenharmony_ci int ret; 55162306a36Sopenharmony_ci /* BH protection not needed if current is softirq */ 55262306a36Sopenharmony_ci if (in_softirq()) 55362306a36Sopenharmony_ci ret = ptr_ring_produce(&pool->ring, page); 55462306a36Sopenharmony_ci else 55562306a36Sopenharmony_ci ret = ptr_ring_produce_bh(&pool->ring, page); 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci if (!ret) { 55862306a36Sopenharmony_ci recycle_stat_inc(pool, ring); 55962306a36Sopenharmony_ci return true; 56062306a36Sopenharmony_ci } 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci return false; 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci/* Only allow direct recycling in special circumstances, into the 56662306a36Sopenharmony_ci * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case. 56762306a36Sopenharmony_ci * 56862306a36Sopenharmony_ci * Caller must provide appropriate safe context. 56962306a36Sopenharmony_ci */ 57062306a36Sopenharmony_cistatic bool page_pool_recycle_in_cache(struct page *page, 57162306a36Sopenharmony_ci struct page_pool *pool) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { 57462306a36Sopenharmony_ci recycle_stat_inc(pool, cache_full); 57562306a36Sopenharmony_ci return false; 57662306a36Sopenharmony_ci } 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci /* Caller MUST have verified/know (page_ref_count(page) == 1) */ 57962306a36Sopenharmony_ci pool->alloc.cache[pool->alloc.count++] = page; 58062306a36Sopenharmony_ci recycle_stat_inc(pool, cached); 58162306a36Sopenharmony_ci return true; 58262306a36Sopenharmony_ci} 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci/* If the page refcnt == 1, this will try to recycle the page. 58562306a36Sopenharmony_ci * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for 58662306a36Sopenharmony_ci * the configured size min(dma_sync_size, pool->max_len). 58762306a36Sopenharmony_ci * If the page refcnt != 1, then the page will be returned to memory 58862306a36Sopenharmony_ci * subsystem. 58962306a36Sopenharmony_ci */ 59062306a36Sopenharmony_cistatic __always_inline struct page * 59162306a36Sopenharmony_ci__page_pool_put_page(struct page_pool *pool, struct page *page, 59262306a36Sopenharmony_ci unsigned int dma_sync_size, bool allow_direct) 59362306a36Sopenharmony_ci{ 59462306a36Sopenharmony_ci lockdep_assert_no_hardirq(); 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci /* This allocator is optimized for the XDP mode that uses 59762306a36Sopenharmony_ci * one-frame-per-page, but have fallbacks that act like the 59862306a36Sopenharmony_ci * regular page allocator APIs. 59962306a36Sopenharmony_ci * 60062306a36Sopenharmony_ci * refcnt == 1 means page_pool owns page, and can recycle it. 60162306a36Sopenharmony_ci * 60262306a36Sopenharmony_ci * page is NOT reusable when allocated when system is under 60362306a36Sopenharmony_ci * some pressure. (page_is_pfmemalloc) 60462306a36Sopenharmony_ci */ 60562306a36Sopenharmony_ci if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) { 60662306a36Sopenharmony_ci /* Read barrier done in page_ref_count / READ_ONCE */ 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) 60962306a36Sopenharmony_ci page_pool_dma_sync_for_device(pool, page, 61062306a36Sopenharmony_ci dma_sync_size); 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci if (allow_direct && in_softirq() && 61362306a36Sopenharmony_ci page_pool_recycle_in_cache(page, pool)) 61462306a36Sopenharmony_ci return NULL; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci /* Page found as candidate for recycling */ 61762306a36Sopenharmony_ci return page; 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci /* Fallback/non-XDP mode: API user have elevated refcnt. 62062306a36Sopenharmony_ci * 62162306a36Sopenharmony_ci * Many drivers split up the page into fragments, and some 62262306a36Sopenharmony_ci * want to keep doing this to save memory and do refcnt based 62362306a36Sopenharmony_ci * recycling. Support this use case too, to ease drivers 62462306a36Sopenharmony_ci * switching between XDP/non-XDP. 62562306a36Sopenharmony_ci * 62662306a36Sopenharmony_ci * In-case page_pool maintains the DMA mapping, API user must 62762306a36Sopenharmony_ci * call page_pool_put_page once. In this elevated refcnt 62862306a36Sopenharmony_ci * case, the DMA is unmapped/released, as driver is likely 62962306a36Sopenharmony_ci * doing refcnt based recycle tricks, meaning another process 63062306a36Sopenharmony_ci * will be invoking put_page. 63162306a36Sopenharmony_ci */ 63262306a36Sopenharmony_ci recycle_stat_inc(pool, released_refcnt); 63362306a36Sopenharmony_ci page_pool_return_page(pool, page); 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci return NULL; 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_civoid page_pool_put_defragged_page(struct page_pool *pool, struct page *page, 63962306a36Sopenharmony_ci unsigned int dma_sync_size, bool allow_direct) 64062306a36Sopenharmony_ci{ 64162306a36Sopenharmony_ci page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); 64262306a36Sopenharmony_ci if (page && !page_pool_recycle_in_ring(pool, page)) { 64362306a36Sopenharmony_ci /* Cache full, fallback to free pages */ 64462306a36Sopenharmony_ci recycle_stat_inc(pool, ring_full); 64562306a36Sopenharmony_ci page_pool_return_page(pool, page); 64662306a36Sopenharmony_ci } 64762306a36Sopenharmony_ci} 64862306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_put_defragged_page); 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci/** 65162306a36Sopenharmony_ci * page_pool_put_page_bulk() - release references on multiple pages 65262306a36Sopenharmony_ci * @pool: pool from which pages were allocated 65362306a36Sopenharmony_ci * @data: array holding page pointers 65462306a36Sopenharmony_ci * @count: number of pages in @data 65562306a36Sopenharmony_ci * 65662306a36Sopenharmony_ci * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring 65762306a36Sopenharmony_ci * producer lock. If the ptr_ring is full, page_pool_put_page_bulk() 65862306a36Sopenharmony_ci * will release leftover pages to the page allocator. 65962306a36Sopenharmony_ci * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx 66062306a36Sopenharmony_ci * completion loop for the XDP_REDIRECT use case. 66162306a36Sopenharmony_ci * 66262306a36Sopenharmony_ci * Please note the caller must not use data area after running 66362306a36Sopenharmony_ci * page_pool_put_page_bulk(), as this function overwrites it. 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_civoid page_pool_put_page_bulk(struct page_pool *pool, void **data, 66662306a36Sopenharmony_ci int count) 66762306a36Sopenharmony_ci{ 66862306a36Sopenharmony_ci int i, bulk_len = 0; 66962306a36Sopenharmony_ci bool in_softirq; 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci for (i = 0; i < count; i++) { 67262306a36Sopenharmony_ci struct page *page = virt_to_head_page(data[i]); 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci /* It is not the last user for the page frag case */ 67562306a36Sopenharmony_ci if (!page_pool_is_last_frag(pool, page)) 67662306a36Sopenharmony_ci continue; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci page = __page_pool_put_page(pool, page, -1, false); 67962306a36Sopenharmony_ci /* Approved for bulk recycling in ptr_ring cache */ 68062306a36Sopenharmony_ci if (page) 68162306a36Sopenharmony_ci data[bulk_len++] = page; 68262306a36Sopenharmony_ci } 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci if (unlikely(!bulk_len)) 68562306a36Sopenharmony_ci return; 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci /* Bulk producer into ptr_ring page_pool cache */ 68862306a36Sopenharmony_ci in_softirq = page_pool_producer_lock(pool); 68962306a36Sopenharmony_ci for (i = 0; i < bulk_len; i++) { 69062306a36Sopenharmony_ci if (__ptr_ring_produce(&pool->ring, data[i])) { 69162306a36Sopenharmony_ci /* ring full */ 69262306a36Sopenharmony_ci recycle_stat_inc(pool, ring_full); 69362306a36Sopenharmony_ci break; 69462306a36Sopenharmony_ci } 69562306a36Sopenharmony_ci } 69662306a36Sopenharmony_ci recycle_stat_add(pool, ring, i); 69762306a36Sopenharmony_ci page_pool_producer_unlock(pool, in_softirq); 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci /* Hopefully all pages was return into ptr_ring */ 70062306a36Sopenharmony_ci if (likely(i == bulk_len)) 70162306a36Sopenharmony_ci return; 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci /* ptr_ring cache full, free remaining pages outside producer lock 70462306a36Sopenharmony_ci * since put_page() with refcnt == 1 can be an expensive operation 70562306a36Sopenharmony_ci */ 70662306a36Sopenharmony_ci for (; i < bulk_len; i++) 70762306a36Sopenharmony_ci page_pool_return_page(pool, data[i]); 70862306a36Sopenharmony_ci} 70962306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_put_page_bulk); 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_cistatic struct page *page_pool_drain_frag(struct page_pool *pool, 71262306a36Sopenharmony_ci struct page *page) 71362306a36Sopenharmony_ci{ 71462306a36Sopenharmony_ci long drain_count = BIAS_MAX - pool->frag_users; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci /* Some user is still using the page frag */ 71762306a36Sopenharmony_ci if (likely(page_pool_defrag_page(page, drain_count))) 71862306a36Sopenharmony_ci return NULL; 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { 72162306a36Sopenharmony_ci if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) 72262306a36Sopenharmony_ci page_pool_dma_sync_for_device(pool, page, -1); 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci return page; 72562306a36Sopenharmony_ci } 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci page_pool_return_page(pool, page); 72862306a36Sopenharmony_ci return NULL; 72962306a36Sopenharmony_ci} 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_cistatic void page_pool_free_frag(struct page_pool *pool) 73262306a36Sopenharmony_ci{ 73362306a36Sopenharmony_ci long drain_count = BIAS_MAX - pool->frag_users; 73462306a36Sopenharmony_ci struct page *page = pool->frag_page; 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci pool->frag_page = NULL; 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci if (!page || page_pool_defrag_page(page, drain_count)) 73962306a36Sopenharmony_ci return; 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci page_pool_return_page(pool, page); 74262306a36Sopenharmony_ci} 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_cistruct page *page_pool_alloc_frag(struct page_pool *pool, 74562306a36Sopenharmony_ci unsigned int *offset, 74662306a36Sopenharmony_ci unsigned int size, gfp_t gfp) 74762306a36Sopenharmony_ci{ 74862306a36Sopenharmony_ci unsigned int max_size = PAGE_SIZE << pool->p.order; 74962306a36Sopenharmony_ci struct page *page = pool->frag_page; 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || 75262306a36Sopenharmony_ci size > max_size)) 75362306a36Sopenharmony_ci return NULL; 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci size = ALIGN(size, dma_get_cache_alignment()); 75662306a36Sopenharmony_ci *offset = pool->frag_offset; 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci if (page && *offset + size > max_size) { 75962306a36Sopenharmony_ci page = page_pool_drain_frag(pool, page); 76062306a36Sopenharmony_ci if (page) { 76162306a36Sopenharmony_ci alloc_stat_inc(pool, fast); 76262306a36Sopenharmony_ci goto frag_reset; 76362306a36Sopenharmony_ci } 76462306a36Sopenharmony_ci } 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci if (!page) { 76762306a36Sopenharmony_ci page = page_pool_alloc_pages(pool, gfp); 76862306a36Sopenharmony_ci if (unlikely(!page)) { 76962306a36Sopenharmony_ci pool->frag_page = NULL; 77062306a36Sopenharmony_ci return NULL; 77162306a36Sopenharmony_ci } 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci pool->frag_page = page; 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_cifrag_reset: 77662306a36Sopenharmony_ci pool->frag_users = 1; 77762306a36Sopenharmony_ci *offset = 0; 77862306a36Sopenharmony_ci pool->frag_offset = size; 77962306a36Sopenharmony_ci page_pool_fragment_page(page, BIAS_MAX); 78062306a36Sopenharmony_ci return page; 78162306a36Sopenharmony_ci } 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci pool->frag_users++; 78462306a36Sopenharmony_ci pool->frag_offset = *offset + size; 78562306a36Sopenharmony_ci alloc_stat_inc(pool, fast); 78662306a36Sopenharmony_ci return page; 78762306a36Sopenharmony_ci} 78862306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_alloc_frag); 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_cistatic void page_pool_empty_ring(struct page_pool *pool) 79162306a36Sopenharmony_ci{ 79262306a36Sopenharmony_ci struct page *page; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci /* Empty recycle ring */ 79562306a36Sopenharmony_ci while ((page = ptr_ring_consume_bh(&pool->ring))) { 79662306a36Sopenharmony_ci /* Verify the refcnt invariant of cached pages */ 79762306a36Sopenharmony_ci if (!(page_ref_count(page) == 1)) 79862306a36Sopenharmony_ci pr_crit("%s() page_pool refcnt %d violation\n", 79962306a36Sopenharmony_ci __func__, page_ref_count(page)); 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci page_pool_return_page(pool, page); 80262306a36Sopenharmony_ci } 80362306a36Sopenharmony_ci} 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_cistatic void page_pool_free(struct page_pool *pool) 80662306a36Sopenharmony_ci{ 80762306a36Sopenharmony_ci if (pool->disconnect) 80862306a36Sopenharmony_ci pool->disconnect(pool); 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci ptr_ring_cleanup(&pool->ring, NULL); 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci if (pool->p.flags & PP_FLAG_DMA_MAP) 81362306a36Sopenharmony_ci put_device(pool->p.dev); 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS 81662306a36Sopenharmony_ci free_percpu(pool->recycle_stats); 81762306a36Sopenharmony_ci#endif 81862306a36Sopenharmony_ci kfree(pool); 81962306a36Sopenharmony_ci} 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_cistatic void page_pool_empty_alloc_cache_once(struct page_pool *pool) 82262306a36Sopenharmony_ci{ 82362306a36Sopenharmony_ci struct page *page; 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci if (pool->destroy_cnt) 82662306a36Sopenharmony_ci return; 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci /* Empty alloc cache, assume caller made sure this is 82962306a36Sopenharmony_ci * no-longer in use, and page_pool_alloc_pages() cannot be 83062306a36Sopenharmony_ci * call concurrently. 83162306a36Sopenharmony_ci */ 83262306a36Sopenharmony_ci while (pool->alloc.count) { 83362306a36Sopenharmony_ci page = pool->alloc.cache[--pool->alloc.count]; 83462306a36Sopenharmony_ci page_pool_return_page(pool, page); 83562306a36Sopenharmony_ci } 83662306a36Sopenharmony_ci} 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_cistatic void page_pool_scrub(struct page_pool *pool) 83962306a36Sopenharmony_ci{ 84062306a36Sopenharmony_ci page_pool_empty_alloc_cache_once(pool); 84162306a36Sopenharmony_ci pool->destroy_cnt++; 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci /* No more consumers should exist, but producers could still 84462306a36Sopenharmony_ci * be in-flight. 84562306a36Sopenharmony_ci */ 84662306a36Sopenharmony_ci page_pool_empty_ring(pool); 84762306a36Sopenharmony_ci} 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_cistatic int page_pool_release(struct page_pool *pool) 85062306a36Sopenharmony_ci{ 85162306a36Sopenharmony_ci int inflight; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci page_pool_scrub(pool); 85462306a36Sopenharmony_ci inflight = page_pool_inflight(pool); 85562306a36Sopenharmony_ci if (!inflight) 85662306a36Sopenharmony_ci page_pool_free(pool); 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci return inflight; 85962306a36Sopenharmony_ci} 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_cistatic void page_pool_release_retry(struct work_struct *wq) 86262306a36Sopenharmony_ci{ 86362306a36Sopenharmony_ci struct delayed_work *dwq = to_delayed_work(wq); 86462306a36Sopenharmony_ci struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw); 86562306a36Sopenharmony_ci int inflight; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci inflight = page_pool_release(pool); 86862306a36Sopenharmony_ci if (!inflight) 86962306a36Sopenharmony_ci return; 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci /* Periodic warning */ 87262306a36Sopenharmony_ci if (time_after_eq(jiffies, pool->defer_warn)) { 87362306a36Sopenharmony_ci int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci pr_warn("%s() stalled pool shutdown %d inflight %d sec\n", 87662306a36Sopenharmony_ci __func__, inflight, sec); 87762306a36Sopenharmony_ci pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; 87862306a36Sopenharmony_ci } 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci /* Still not ready to be disconnected, retry later */ 88162306a36Sopenharmony_ci schedule_delayed_work(&pool->release_dw, DEFER_TIME); 88262306a36Sopenharmony_ci} 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_civoid page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), 88562306a36Sopenharmony_ci struct xdp_mem_info *mem) 88662306a36Sopenharmony_ci{ 88762306a36Sopenharmony_ci refcount_inc(&pool->user_cnt); 88862306a36Sopenharmony_ci pool->disconnect = disconnect; 88962306a36Sopenharmony_ci pool->xdp_mem_id = mem->id; 89062306a36Sopenharmony_ci} 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_civoid page_pool_unlink_napi(struct page_pool *pool) 89362306a36Sopenharmony_ci{ 89462306a36Sopenharmony_ci if (!pool->p.napi) 89562306a36Sopenharmony_ci return; 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci /* To avoid races with recycling and additional barriers make sure 89862306a36Sopenharmony_ci * pool and NAPI are unlinked when NAPI is disabled. 89962306a36Sopenharmony_ci */ 90062306a36Sopenharmony_ci WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state) || 90162306a36Sopenharmony_ci READ_ONCE(pool->p.napi->list_owner) != -1); 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci WRITE_ONCE(pool->p.napi, NULL); 90462306a36Sopenharmony_ci} 90562306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_unlink_napi); 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_civoid page_pool_destroy(struct page_pool *pool) 90862306a36Sopenharmony_ci{ 90962306a36Sopenharmony_ci if (!pool) 91062306a36Sopenharmony_ci return; 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci if (!page_pool_put(pool)) 91362306a36Sopenharmony_ci return; 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci page_pool_unlink_napi(pool); 91662306a36Sopenharmony_ci page_pool_free_frag(pool); 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci if (!page_pool_release(pool)) 91962306a36Sopenharmony_ci return; 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci pool->defer_start = jiffies; 92262306a36Sopenharmony_ci pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry); 92562306a36Sopenharmony_ci schedule_delayed_work(&pool->release_dw, DEFER_TIME); 92662306a36Sopenharmony_ci} 92762306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_destroy); 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci/* Caller must provide appropriate safe context, e.g. NAPI. */ 93062306a36Sopenharmony_civoid page_pool_update_nid(struct page_pool *pool, int new_nid) 93162306a36Sopenharmony_ci{ 93262306a36Sopenharmony_ci struct page *page; 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci trace_page_pool_update_nid(pool, new_nid); 93562306a36Sopenharmony_ci pool->p.nid = new_nid; 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci /* Flush pool alloc cache, as refill will check NUMA node */ 93862306a36Sopenharmony_ci while (pool->alloc.count) { 93962306a36Sopenharmony_ci page = pool->alloc.cache[--pool->alloc.count]; 94062306a36Sopenharmony_ci page_pool_return_page(pool, page); 94162306a36Sopenharmony_ci } 94262306a36Sopenharmony_ci} 94362306a36Sopenharmony_ciEXPORT_SYMBOL(page_pool_update_nid); 944