162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/mm/slab.c 462306a36Sopenharmony_ci * Written by Mark Hemment, 1996/97. 562306a36Sopenharmony_ci * (markhe@nextd.demon.co.uk) 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Major cleanup, different bufctl logic, per-cpu arrays 1062306a36Sopenharmony_ci * (c) 2000 Manfred Spraul 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * Cleanup, make the head arrays unconditional, preparation for NUMA 1362306a36Sopenharmony_ci * (c) 2002 Manfred Spraul 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * An implementation of the Slab Allocator as described in outline in; 1662306a36Sopenharmony_ci * UNIX Internals: The New Frontiers by Uresh Vahalia 1762306a36Sopenharmony_ci * Pub: Prentice Hall ISBN 0-13-101908-2 1862306a36Sopenharmony_ci * or with a little more detail in; 1962306a36Sopenharmony_ci * The Slab Allocator: An Object-Caching Kernel Memory Allocator 2062306a36Sopenharmony_ci * Jeff Bonwick (Sun Microsystems). 2162306a36Sopenharmony_ci * Presented at: USENIX Summer 1994 Technical Conference 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * The memory is organized in caches, one cache for each object type. 2462306a36Sopenharmony_ci * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct) 2562306a36Sopenharmony_ci * Each cache consists out of many slabs (they are small (usually one 2662306a36Sopenharmony_ci * page long) and always contiguous), and each slab contains multiple 2762306a36Sopenharmony_ci * initialized objects. 2862306a36Sopenharmony_ci * 2962306a36Sopenharmony_ci * This means, that your constructor is used only for newly allocated 3062306a36Sopenharmony_ci * slabs and you must pass objects with the same initializations to 3162306a36Sopenharmony_ci * kmem_cache_free. 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, 3462306a36Sopenharmony_ci * normal). If you need a special memory type, then must create a new 3562306a36Sopenharmony_ci * cache for that memory type. 3662306a36Sopenharmony_ci * 3762306a36Sopenharmony_ci * In order to reduce fragmentation, the slabs are sorted in 3 groups: 3862306a36Sopenharmony_ci * full slabs with 0 free objects 3962306a36Sopenharmony_ci * partial slabs 4062306a36Sopenharmony_ci * empty slabs with no allocated objects 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * If partial slabs exist, then new allocations come from these slabs, 4362306a36Sopenharmony_ci * otherwise from empty slabs or new slabs are allocated. 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache 4662306a36Sopenharmony_ci * during kmem_cache_destroy(). The caller must prevent concurrent allocs. 4762306a36Sopenharmony_ci * 4862306a36Sopenharmony_ci * Each cache has a short per-cpu head array, most allocs 4962306a36Sopenharmony_ci * and frees go into that array, and if that array overflows, then 1/2 5062306a36Sopenharmony_ci * of the entries in the array are given back into the global cache. 5162306a36Sopenharmony_ci * The head array is strictly LIFO and should improve the cache hit rates. 5262306a36Sopenharmony_ci * On SMP, it additionally reduces the spinlock operations. 5362306a36Sopenharmony_ci * 5462306a36Sopenharmony_ci * The c_cpuarray may not be read with enabled local interrupts - 5562306a36Sopenharmony_ci * it's changed with a smp_call_function(). 5662306a36Sopenharmony_ci * 5762306a36Sopenharmony_ci * SMP synchronization: 5862306a36Sopenharmony_ci * constructors and destructors are called without any locking. 5962306a36Sopenharmony_ci * Several members in struct kmem_cache and struct slab never change, they 6062306a36Sopenharmony_ci * are accessed without any locking. 6162306a36Sopenharmony_ci * The per-cpu arrays are never accessed from the wrong cpu, no locking, 6262306a36Sopenharmony_ci * and local interrupts are disabled so slab code is preempt-safe. 6362306a36Sopenharmony_ci * The non-constant members are protected with a per-cache irq spinlock. 6462306a36Sopenharmony_ci * 6562306a36Sopenharmony_ci * Many thanks to Mark Hemment, who wrote another per-cpu slab patch 6662306a36Sopenharmony_ci * in 2000 - many ideas in the current implementation are derived from 6762306a36Sopenharmony_ci * his patch. 6862306a36Sopenharmony_ci * 6962306a36Sopenharmony_ci * Further notes from the original documentation: 7062306a36Sopenharmony_ci * 7162306a36Sopenharmony_ci * 11 April '97. Started multi-threading - markhe 7262306a36Sopenharmony_ci * The global cache-chain is protected by the mutex 'slab_mutex'. 7362306a36Sopenharmony_ci * The sem is only needed when accessing/extending the cache-chain, which 7462306a36Sopenharmony_ci * can never happen inside an interrupt (kmem_cache_create(), 7562306a36Sopenharmony_ci * kmem_cache_shrink() and kmem_cache_reap()). 7662306a36Sopenharmony_ci * 7762306a36Sopenharmony_ci * At present, each engine can be growing a cache. This should be blocked. 7862306a36Sopenharmony_ci * 7962306a36Sopenharmony_ci * 15 March 2005. NUMA slab allocator. 8062306a36Sopenharmony_ci * Shai Fultheim <shai@scalex86.org>. 8162306a36Sopenharmony_ci * Shobhit Dayal <shobhit@calsoftinc.com> 8262306a36Sopenharmony_ci * Alok N Kataria <alokk@calsoftinc.com> 8362306a36Sopenharmony_ci * Christoph Lameter <christoph@lameter.com> 8462306a36Sopenharmony_ci * 8562306a36Sopenharmony_ci * Modified the slab allocator to be node aware on NUMA systems. 8662306a36Sopenharmony_ci * Each node has its own list of partial, free and full slabs. 8762306a36Sopenharmony_ci * All object allocations for a node occur from node specific slab lists. 8862306a36Sopenharmony_ci */ 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci#include <linux/slab.h> 9162306a36Sopenharmony_ci#include <linux/mm.h> 9262306a36Sopenharmony_ci#include <linux/poison.h> 9362306a36Sopenharmony_ci#include <linux/swap.h> 9462306a36Sopenharmony_ci#include <linux/cache.h> 9562306a36Sopenharmony_ci#include <linux/interrupt.h> 9662306a36Sopenharmony_ci#include <linux/init.h> 9762306a36Sopenharmony_ci#include <linux/compiler.h> 9862306a36Sopenharmony_ci#include <linux/cpuset.h> 9962306a36Sopenharmony_ci#include <linux/proc_fs.h> 10062306a36Sopenharmony_ci#include <linux/seq_file.h> 10162306a36Sopenharmony_ci#include <linux/notifier.h> 10262306a36Sopenharmony_ci#include <linux/kallsyms.h> 10362306a36Sopenharmony_ci#include <linux/kfence.h> 10462306a36Sopenharmony_ci#include <linux/cpu.h> 10562306a36Sopenharmony_ci#include <linux/sysctl.h> 10662306a36Sopenharmony_ci#include <linux/module.h> 10762306a36Sopenharmony_ci#include <linux/rcupdate.h> 10862306a36Sopenharmony_ci#include <linux/string.h> 10962306a36Sopenharmony_ci#include <linux/uaccess.h> 11062306a36Sopenharmony_ci#include <linux/nodemask.h> 11162306a36Sopenharmony_ci#include <linux/kmemleak.h> 11262306a36Sopenharmony_ci#include <linux/mempolicy.h> 11362306a36Sopenharmony_ci#include <linux/mutex.h> 11462306a36Sopenharmony_ci#include <linux/fault-inject.h> 11562306a36Sopenharmony_ci#include <linux/rtmutex.h> 11662306a36Sopenharmony_ci#include <linux/reciprocal_div.h> 11762306a36Sopenharmony_ci#include <linux/debugobjects.h> 11862306a36Sopenharmony_ci#include <linux/memory.h> 11962306a36Sopenharmony_ci#include <linux/prefetch.h> 12062306a36Sopenharmony_ci#include <linux/sched/task_stack.h> 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci#include <net/sock.h> 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci#include <asm/cacheflush.h> 12562306a36Sopenharmony_ci#include <asm/tlbflush.h> 12662306a36Sopenharmony_ci#include <asm/page.h> 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci#include <trace/events/kmem.h> 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci#include "internal.h" 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci#include "slab.h" 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci/* 13562306a36Sopenharmony_ci * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. 13662306a36Sopenharmony_ci * 0 for faster, smaller code (especially in the critical paths). 13762306a36Sopenharmony_ci * 13862306a36Sopenharmony_ci * STATS - 1 to collect stats for /proc/slabinfo. 13962306a36Sopenharmony_ci * 0 for faster, smaller code (especially in the critical paths). 14062306a36Sopenharmony_ci * 14162306a36Sopenharmony_ci * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible) 14262306a36Sopenharmony_ci */ 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_SLAB 14562306a36Sopenharmony_ci#define DEBUG 1 14662306a36Sopenharmony_ci#define STATS 1 14762306a36Sopenharmony_ci#define FORCED_DEBUG 1 14862306a36Sopenharmony_ci#else 14962306a36Sopenharmony_ci#define DEBUG 0 15062306a36Sopenharmony_ci#define STATS 0 15162306a36Sopenharmony_ci#define FORCED_DEBUG 0 15262306a36Sopenharmony_ci#endif 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci/* Shouldn't this be in a header file somewhere? */ 15562306a36Sopenharmony_ci#define BYTES_PER_WORD sizeof(void *) 15662306a36Sopenharmony_ci#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long)) 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci#ifndef ARCH_KMALLOC_FLAGS 15962306a36Sopenharmony_ci#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN 16062306a36Sopenharmony_ci#endif 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci#define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \ 16362306a36Sopenharmony_ci <= SLAB_OBJ_MIN_SIZE) ? 1 : 0) 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci#if FREELIST_BYTE_INDEX 16662306a36Sopenharmony_citypedef unsigned char freelist_idx_t; 16762306a36Sopenharmony_ci#else 16862306a36Sopenharmony_citypedef unsigned short freelist_idx_t; 16962306a36Sopenharmony_ci#endif 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci#define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1) 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci/* 17462306a36Sopenharmony_ci * struct array_cache 17562306a36Sopenharmony_ci * 17662306a36Sopenharmony_ci * Purpose: 17762306a36Sopenharmony_ci * - LIFO ordering, to hand out cache-warm objects from _alloc 17862306a36Sopenharmony_ci * - reduce the number of linked list operations 17962306a36Sopenharmony_ci * - reduce spinlock operations 18062306a36Sopenharmony_ci * 18162306a36Sopenharmony_ci * The limit is stored in the per-cpu structure to reduce the data cache 18262306a36Sopenharmony_ci * footprint. 18362306a36Sopenharmony_ci * 18462306a36Sopenharmony_ci */ 18562306a36Sopenharmony_cistruct array_cache { 18662306a36Sopenharmony_ci unsigned int avail; 18762306a36Sopenharmony_ci unsigned int limit; 18862306a36Sopenharmony_ci unsigned int batchcount; 18962306a36Sopenharmony_ci unsigned int touched; 19062306a36Sopenharmony_ci void *entry[]; /* 19162306a36Sopenharmony_ci * Must have this definition in here for the proper 19262306a36Sopenharmony_ci * alignment of array_cache. Also simplifies accessing 19362306a36Sopenharmony_ci * the entries. 19462306a36Sopenharmony_ci */ 19562306a36Sopenharmony_ci}; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_cistruct alien_cache { 19862306a36Sopenharmony_ci spinlock_t lock; 19962306a36Sopenharmony_ci struct array_cache ac; 20062306a36Sopenharmony_ci}; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci/* 20362306a36Sopenharmony_ci * Need this for bootstrapping a per node allocator. 20462306a36Sopenharmony_ci */ 20562306a36Sopenharmony_ci#define NUM_INIT_LISTS (2 * MAX_NUMNODES) 20662306a36Sopenharmony_cistatic struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS]; 20762306a36Sopenharmony_ci#define CACHE_CACHE 0 20862306a36Sopenharmony_ci#define SIZE_NODE (MAX_NUMNODES) 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic int drain_freelist(struct kmem_cache *cache, 21162306a36Sopenharmony_ci struct kmem_cache_node *n, int tofree); 21262306a36Sopenharmony_cistatic void free_block(struct kmem_cache *cachep, void **objpp, int len, 21362306a36Sopenharmony_ci int node, struct list_head *list); 21462306a36Sopenharmony_cistatic void slabs_destroy(struct kmem_cache *cachep, struct list_head *list); 21562306a36Sopenharmony_cistatic int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); 21662306a36Sopenharmony_cistatic void cache_reap(struct work_struct *unused); 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_cistatic inline void fixup_objfreelist_debug(struct kmem_cache *cachep, 21962306a36Sopenharmony_ci void **list); 22062306a36Sopenharmony_cistatic inline void fixup_slab_list(struct kmem_cache *cachep, 22162306a36Sopenharmony_ci struct kmem_cache_node *n, struct slab *slab, 22262306a36Sopenharmony_ci void **list); 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node)) 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_cistatic void kmem_cache_node_init(struct kmem_cache_node *parent) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci INIT_LIST_HEAD(&parent->slabs_full); 22962306a36Sopenharmony_ci INIT_LIST_HEAD(&parent->slabs_partial); 23062306a36Sopenharmony_ci INIT_LIST_HEAD(&parent->slabs_free); 23162306a36Sopenharmony_ci parent->total_slabs = 0; 23262306a36Sopenharmony_ci parent->free_slabs = 0; 23362306a36Sopenharmony_ci parent->shared = NULL; 23462306a36Sopenharmony_ci parent->alien = NULL; 23562306a36Sopenharmony_ci parent->colour_next = 0; 23662306a36Sopenharmony_ci raw_spin_lock_init(&parent->list_lock); 23762306a36Sopenharmony_ci parent->free_objects = 0; 23862306a36Sopenharmony_ci parent->free_touched = 0; 23962306a36Sopenharmony_ci} 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci#define MAKE_LIST(cachep, listp, slab, nodeid) \ 24262306a36Sopenharmony_ci do { \ 24362306a36Sopenharmony_ci INIT_LIST_HEAD(listp); \ 24462306a36Sopenharmony_ci list_splice(&get_node(cachep, nodeid)->slab, listp); \ 24562306a36Sopenharmony_ci } while (0) 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ 24862306a36Sopenharmony_ci do { \ 24962306a36Sopenharmony_ci MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ 25062306a36Sopenharmony_ci MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ 25162306a36Sopenharmony_ci MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ 25262306a36Sopenharmony_ci } while (0) 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci#define CFLGS_OBJFREELIST_SLAB ((slab_flags_t __force)0x40000000U) 25562306a36Sopenharmony_ci#define CFLGS_OFF_SLAB ((slab_flags_t __force)0x80000000U) 25662306a36Sopenharmony_ci#define OBJFREELIST_SLAB(x) ((x)->flags & CFLGS_OBJFREELIST_SLAB) 25762306a36Sopenharmony_ci#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci#define BATCHREFILL_LIMIT 16 26062306a36Sopenharmony_ci/* 26162306a36Sopenharmony_ci * Optimization question: fewer reaps means less probability for unnecessary 26262306a36Sopenharmony_ci * cpucache drain/refill cycles. 26362306a36Sopenharmony_ci * 26462306a36Sopenharmony_ci * OTOH the cpuarrays can contain lots of objects, 26562306a36Sopenharmony_ci * which could lock up otherwise freeable slabs. 26662306a36Sopenharmony_ci */ 26762306a36Sopenharmony_ci#define REAPTIMEOUT_AC (2*HZ) 26862306a36Sopenharmony_ci#define REAPTIMEOUT_NODE (4*HZ) 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci#if STATS 27162306a36Sopenharmony_ci#define STATS_INC_ACTIVE(x) ((x)->num_active++) 27262306a36Sopenharmony_ci#define STATS_DEC_ACTIVE(x) ((x)->num_active--) 27362306a36Sopenharmony_ci#define STATS_INC_ALLOCED(x) ((x)->num_allocations++) 27462306a36Sopenharmony_ci#define STATS_INC_GROWN(x) ((x)->grown++) 27562306a36Sopenharmony_ci#define STATS_ADD_REAPED(x, y) ((x)->reaped += (y)) 27662306a36Sopenharmony_ci#define STATS_SET_HIGH(x) \ 27762306a36Sopenharmony_ci do { \ 27862306a36Sopenharmony_ci if ((x)->num_active > (x)->high_mark) \ 27962306a36Sopenharmony_ci (x)->high_mark = (x)->num_active; \ 28062306a36Sopenharmony_ci } while (0) 28162306a36Sopenharmony_ci#define STATS_INC_ERR(x) ((x)->errors++) 28262306a36Sopenharmony_ci#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) 28362306a36Sopenharmony_ci#define STATS_INC_NODEFREES(x) ((x)->node_frees++) 28462306a36Sopenharmony_ci#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++) 28562306a36Sopenharmony_ci#define STATS_SET_FREEABLE(x, i) \ 28662306a36Sopenharmony_ci do { \ 28762306a36Sopenharmony_ci if ((x)->max_freeable < i) \ 28862306a36Sopenharmony_ci (x)->max_freeable = i; \ 28962306a36Sopenharmony_ci } while (0) 29062306a36Sopenharmony_ci#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) 29162306a36Sopenharmony_ci#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) 29262306a36Sopenharmony_ci#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) 29362306a36Sopenharmony_ci#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss) 29462306a36Sopenharmony_ci#else 29562306a36Sopenharmony_ci#define STATS_INC_ACTIVE(x) do { } while (0) 29662306a36Sopenharmony_ci#define STATS_DEC_ACTIVE(x) do { } while (0) 29762306a36Sopenharmony_ci#define STATS_INC_ALLOCED(x) do { } while (0) 29862306a36Sopenharmony_ci#define STATS_INC_GROWN(x) do { } while (0) 29962306a36Sopenharmony_ci#define STATS_ADD_REAPED(x, y) do { (void)(y); } while (0) 30062306a36Sopenharmony_ci#define STATS_SET_HIGH(x) do { } while (0) 30162306a36Sopenharmony_ci#define STATS_INC_ERR(x) do { } while (0) 30262306a36Sopenharmony_ci#define STATS_INC_NODEALLOCS(x) do { } while (0) 30362306a36Sopenharmony_ci#define STATS_INC_NODEFREES(x) do { } while (0) 30462306a36Sopenharmony_ci#define STATS_INC_ACOVERFLOW(x) do { } while (0) 30562306a36Sopenharmony_ci#define STATS_SET_FREEABLE(x, i) do { } while (0) 30662306a36Sopenharmony_ci#define STATS_INC_ALLOCHIT(x) do { } while (0) 30762306a36Sopenharmony_ci#define STATS_INC_ALLOCMISS(x) do { } while (0) 30862306a36Sopenharmony_ci#define STATS_INC_FREEHIT(x) do { } while (0) 30962306a36Sopenharmony_ci#define STATS_INC_FREEMISS(x) do { } while (0) 31062306a36Sopenharmony_ci#endif 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci#if DEBUG 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci/* 31562306a36Sopenharmony_ci * memory layout of objects: 31662306a36Sopenharmony_ci * 0 : objp 31762306a36Sopenharmony_ci * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that 31862306a36Sopenharmony_ci * the end of an object is aligned with the end of the real 31962306a36Sopenharmony_ci * allocation. Catches writes behind the end of the allocation. 32062306a36Sopenharmony_ci * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: 32162306a36Sopenharmony_ci * redzone word. 32262306a36Sopenharmony_ci * cachep->obj_offset: The real object. 32362306a36Sopenharmony_ci * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] 32462306a36Sopenharmony_ci * cachep->size - 1* BYTES_PER_WORD: last caller address 32562306a36Sopenharmony_ci * [BYTES_PER_WORD long] 32662306a36Sopenharmony_ci */ 32762306a36Sopenharmony_cistatic int obj_offset(struct kmem_cache *cachep) 32862306a36Sopenharmony_ci{ 32962306a36Sopenharmony_ci return cachep->obj_offset; 33062306a36Sopenharmony_ci} 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_cistatic unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp) 33362306a36Sopenharmony_ci{ 33462306a36Sopenharmony_ci BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 33562306a36Sopenharmony_ci return (unsigned long long *) (objp + obj_offset(cachep) - 33662306a36Sopenharmony_ci sizeof(unsigned long long)); 33762306a36Sopenharmony_ci} 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_cistatic unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp) 34062306a36Sopenharmony_ci{ 34162306a36Sopenharmony_ci BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 34262306a36Sopenharmony_ci if (cachep->flags & SLAB_STORE_USER) 34362306a36Sopenharmony_ci return (unsigned long long *)(objp + cachep->size - 34462306a36Sopenharmony_ci sizeof(unsigned long long) - 34562306a36Sopenharmony_ci REDZONE_ALIGN); 34662306a36Sopenharmony_ci return (unsigned long long *) (objp + cachep->size - 34762306a36Sopenharmony_ci sizeof(unsigned long long)); 34862306a36Sopenharmony_ci} 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_cistatic void **dbg_userword(struct kmem_cache *cachep, void *objp) 35162306a36Sopenharmony_ci{ 35262306a36Sopenharmony_ci BUG_ON(!(cachep->flags & SLAB_STORE_USER)); 35362306a36Sopenharmony_ci return (void **)(objp + cachep->size - BYTES_PER_WORD); 35462306a36Sopenharmony_ci} 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci#else 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci#define obj_offset(x) 0 35962306a36Sopenharmony_ci#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) 36062306a36Sopenharmony_ci#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) 36162306a36Sopenharmony_ci#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci#endif 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci/* 36662306a36Sopenharmony_ci * Do not go above this order unless 0 objects fit into the slab or 36762306a36Sopenharmony_ci * overridden on the command line. 36862306a36Sopenharmony_ci */ 36962306a36Sopenharmony_ci#define SLAB_MAX_ORDER_HI 1 37062306a36Sopenharmony_ci#define SLAB_MAX_ORDER_LO 0 37162306a36Sopenharmony_cistatic int slab_max_order = SLAB_MAX_ORDER_LO; 37262306a36Sopenharmony_cistatic bool slab_max_order_set __initdata; 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_cistatic inline void *index_to_obj(struct kmem_cache *cache, 37562306a36Sopenharmony_ci const struct slab *slab, unsigned int idx) 37662306a36Sopenharmony_ci{ 37762306a36Sopenharmony_ci return slab->s_mem + cache->size * idx; 37862306a36Sopenharmony_ci} 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci#define BOOT_CPUCACHE_ENTRIES 1 38162306a36Sopenharmony_ci/* internal cache of cache description objs */ 38262306a36Sopenharmony_cistatic struct kmem_cache kmem_cache_boot = { 38362306a36Sopenharmony_ci .batchcount = 1, 38462306a36Sopenharmony_ci .limit = BOOT_CPUCACHE_ENTRIES, 38562306a36Sopenharmony_ci .shared = 1, 38662306a36Sopenharmony_ci .size = sizeof(struct kmem_cache), 38762306a36Sopenharmony_ci .name = "kmem_cache", 38862306a36Sopenharmony_ci}; 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct delayed_work, slab_reap_work); 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_cistatic inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci return this_cpu_ptr(cachep->cpu_cache); 39562306a36Sopenharmony_ci} 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci/* 39862306a36Sopenharmony_ci * Calculate the number of objects and left-over bytes for a given buffer size. 39962306a36Sopenharmony_ci */ 40062306a36Sopenharmony_cistatic unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size, 40162306a36Sopenharmony_ci slab_flags_t flags, size_t *left_over) 40262306a36Sopenharmony_ci{ 40362306a36Sopenharmony_ci unsigned int num; 40462306a36Sopenharmony_ci size_t slab_size = PAGE_SIZE << gfporder; 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci /* 40762306a36Sopenharmony_ci * The slab management structure can be either off the slab or 40862306a36Sopenharmony_ci * on it. For the latter case, the memory allocated for a 40962306a36Sopenharmony_ci * slab is used for: 41062306a36Sopenharmony_ci * 41162306a36Sopenharmony_ci * - @buffer_size bytes for each object 41262306a36Sopenharmony_ci * - One freelist_idx_t for each object 41362306a36Sopenharmony_ci * 41462306a36Sopenharmony_ci * We don't need to consider alignment of freelist because 41562306a36Sopenharmony_ci * freelist will be at the end of slab page. The objects will be 41662306a36Sopenharmony_ci * at the correct alignment. 41762306a36Sopenharmony_ci * 41862306a36Sopenharmony_ci * If the slab management structure is off the slab, then the 41962306a36Sopenharmony_ci * alignment will already be calculated into the size. Because 42062306a36Sopenharmony_ci * the slabs are all pages aligned, the objects will be at the 42162306a36Sopenharmony_ci * correct alignment when allocated. 42262306a36Sopenharmony_ci */ 42362306a36Sopenharmony_ci if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) { 42462306a36Sopenharmony_ci num = slab_size / buffer_size; 42562306a36Sopenharmony_ci *left_over = slab_size % buffer_size; 42662306a36Sopenharmony_ci } else { 42762306a36Sopenharmony_ci num = slab_size / (buffer_size + sizeof(freelist_idx_t)); 42862306a36Sopenharmony_ci *left_over = slab_size % 42962306a36Sopenharmony_ci (buffer_size + sizeof(freelist_idx_t)); 43062306a36Sopenharmony_ci } 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci return num; 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci#if DEBUG 43662306a36Sopenharmony_ci#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg) 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_cistatic void __slab_error(const char *function, struct kmem_cache *cachep, 43962306a36Sopenharmony_ci char *msg) 44062306a36Sopenharmony_ci{ 44162306a36Sopenharmony_ci pr_err("slab error in %s(): cache `%s': %s\n", 44262306a36Sopenharmony_ci function, cachep->name, msg); 44362306a36Sopenharmony_ci dump_stack(); 44462306a36Sopenharmony_ci add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci#endif 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci/* 44962306a36Sopenharmony_ci * By default on NUMA we use alien caches to stage the freeing of 45062306a36Sopenharmony_ci * objects allocated from other nodes. This causes massive memory 45162306a36Sopenharmony_ci * inefficiencies when using fake NUMA setup to split memory into a 45262306a36Sopenharmony_ci * large number of small nodes, so it can be disabled on the command 45362306a36Sopenharmony_ci * line 45462306a36Sopenharmony_ci */ 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_cistatic int use_alien_caches __read_mostly = 1; 45762306a36Sopenharmony_cistatic int __init noaliencache_setup(char *s) 45862306a36Sopenharmony_ci{ 45962306a36Sopenharmony_ci use_alien_caches = 0; 46062306a36Sopenharmony_ci return 1; 46162306a36Sopenharmony_ci} 46262306a36Sopenharmony_ci__setup("noaliencache", noaliencache_setup); 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_cistatic int __init slab_max_order_setup(char *str) 46562306a36Sopenharmony_ci{ 46662306a36Sopenharmony_ci get_option(&str, &slab_max_order); 46762306a36Sopenharmony_ci slab_max_order = slab_max_order < 0 ? 0 : 46862306a36Sopenharmony_ci min(slab_max_order, MAX_ORDER); 46962306a36Sopenharmony_ci slab_max_order_set = true; 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci return 1; 47262306a36Sopenharmony_ci} 47362306a36Sopenharmony_ci__setup("slab_max_order=", slab_max_order_setup); 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci#ifdef CONFIG_NUMA 47662306a36Sopenharmony_ci/* 47762306a36Sopenharmony_ci * Special reaping functions for NUMA systems called from cache_reap(). 47862306a36Sopenharmony_ci * These take care of doing round robin flushing of alien caches (containing 47962306a36Sopenharmony_ci * objects freed on different nodes from which they were allocated) and the 48062306a36Sopenharmony_ci * flushing of remote pcps by calling drain_node_pages. 48162306a36Sopenharmony_ci */ 48262306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, slab_reap_node); 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_cistatic void init_reap_node(int cpu) 48562306a36Sopenharmony_ci{ 48662306a36Sopenharmony_ci per_cpu(slab_reap_node, cpu) = next_node_in(cpu_to_mem(cpu), 48762306a36Sopenharmony_ci node_online_map); 48862306a36Sopenharmony_ci} 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_cistatic void next_reap_node(void) 49162306a36Sopenharmony_ci{ 49262306a36Sopenharmony_ci int node = __this_cpu_read(slab_reap_node); 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci node = next_node_in(node, node_online_map); 49562306a36Sopenharmony_ci __this_cpu_write(slab_reap_node, node); 49662306a36Sopenharmony_ci} 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci#else 49962306a36Sopenharmony_ci#define init_reap_node(cpu) do { } while (0) 50062306a36Sopenharmony_ci#define next_reap_node(void) do { } while (0) 50162306a36Sopenharmony_ci#endif 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci/* 50462306a36Sopenharmony_ci * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz 50562306a36Sopenharmony_ci * via the workqueue/eventd. 50662306a36Sopenharmony_ci * Add the CPU number into the expiration time to minimize the possibility of 50762306a36Sopenharmony_ci * the CPUs getting into lockstep and contending for the global cache chain 50862306a36Sopenharmony_ci * lock. 50962306a36Sopenharmony_ci */ 51062306a36Sopenharmony_cistatic void start_cpu_timer(int cpu) 51162306a36Sopenharmony_ci{ 51262306a36Sopenharmony_ci struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu); 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci if (reap_work->work.func == NULL) { 51562306a36Sopenharmony_ci init_reap_node(cpu); 51662306a36Sopenharmony_ci INIT_DEFERRABLE_WORK(reap_work, cache_reap); 51762306a36Sopenharmony_ci schedule_delayed_work_on(cpu, reap_work, 51862306a36Sopenharmony_ci __round_jiffies_relative(HZ, cpu)); 51962306a36Sopenharmony_ci } 52062306a36Sopenharmony_ci} 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_cistatic void init_arraycache(struct array_cache *ac, int limit, int batch) 52362306a36Sopenharmony_ci{ 52462306a36Sopenharmony_ci if (ac) { 52562306a36Sopenharmony_ci ac->avail = 0; 52662306a36Sopenharmony_ci ac->limit = limit; 52762306a36Sopenharmony_ci ac->batchcount = batch; 52862306a36Sopenharmony_ci ac->touched = 0; 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci} 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_cistatic struct array_cache *alloc_arraycache(int node, int entries, 53362306a36Sopenharmony_ci int batchcount, gfp_t gfp) 53462306a36Sopenharmony_ci{ 53562306a36Sopenharmony_ci size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache); 53662306a36Sopenharmony_ci struct array_cache *ac = NULL; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci ac = kmalloc_node(memsize, gfp, node); 53962306a36Sopenharmony_ci /* 54062306a36Sopenharmony_ci * The array_cache structures contain pointers to free object. 54162306a36Sopenharmony_ci * However, when such objects are allocated or transferred to another 54262306a36Sopenharmony_ci * cache the pointers are not cleared and they could be counted as 54362306a36Sopenharmony_ci * valid references during a kmemleak scan. Therefore, kmemleak must 54462306a36Sopenharmony_ci * not scan such objects. 54562306a36Sopenharmony_ci */ 54662306a36Sopenharmony_ci kmemleak_no_scan(ac); 54762306a36Sopenharmony_ci init_arraycache(ac, entries, batchcount); 54862306a36Sopenharmony_ci return ac; 54962306a36Sopenharmony_ci} 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_cistatic noinline void cache_free_pfmemalloc(struct kmem_cache *cachep, 55262306a36Sopenharmony_ci struct slab *slab, void *objp) 55362306a36Sopenharmony_ci{ 55462306a36Sopenharmony_ci struct kmem_cache_node *n; 55562306a36Sopenharmony_ci int slab_node; 55662306a36Sopenharmony_ci LIST_HEAD(list); 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci slab_node = slab_nid(slab); 55962306a36Sopenharmony_ci n = get_node(cachep, slab_node); 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 56262306a36Sopenharmony_ci free_block(cachep, &objp, 1, slab_node, &list); 56362306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci slabs_destroy(cachep, &list); 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci/* 56962306a36Sopenharmony_ci * Transfer objects in one arraycache to another. 57062306a36Sopenharmony_ci * Locking must be handled by the caller. 57162306a36Sopenharmony_ci * 57262306a36Sopenharmony_ci * Return the number of entries transferred. 57362306a36Sopenharmony_ci */ 57462306a36Sopenharmony_cistatic int transfer_objects(struct array_cache *to, 57562306a36Sopenharmony_ci struct array_cache *from, unsigned int max) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci /* Figure out how many entries to transfer */ 57862306a36Sopenharmony_ci int nr = min3(from->avail, max, to->limit - to->avail); 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci if (!nr) 58162306a36Sopenharmony_ci return 0; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci memcpy(to->entry + to->avail, from->entry + from->avail - nr, 58462306a36Sopenharmony_ci sizeof(void *) *nr); 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci from->avail -= nr; 58762306a36Sopenharmony_ci to->avail += nr; 58862306a36Sopenharmony_ci return nr; 58962306a36Sopenharmony_ci} 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci/* &alien->lock must be held by alien callers. */ 59262306a36Sopenharmony_cistatic __always_inline void __free_one(struct array_cache *ac, void *objp) 59362306a36Sopenharmony_ci{ 59462306a36Sopenharmony_ci /* Avoid trivial double-free. */ 59562306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) && 59662306a36Sopenharmony_ci WARN_ON_ONCE(ac->avail > 0 && ac->entry[ac->avail - 1] == objp)) 59762306a36Sopenharmony_ci return; 59862306a36Sopenharmony_ci ac->entry[ac->avail++] = objp; 59962306a36Sopenharmony_ci} 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci#ifndef CONFIG_NUMA 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci#define drain_alien_cache(cachep, alien) do { } while (0) 60462306a36Sopenharmony_ci#define reap_alien(cachep, n) do { } while (0) 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_cistatic inline struct alien_cache **alloc_alien_cache(int node, 60762306a36Sopenharmony_ci int limit, gfp_t gfp) 60862306a36Sopenharmony_ci{ 60962306a36Sopenharmony_ci return NULL; 61062306a36Sopenharmony_ci} 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_cistatic inline void free_alien_cache(struct alien_cache **ac_ptr) 61362306a36Sopenharmony_ci{ 61462306a36Sopenharmony_ci} 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_cistatic inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 61762306a36Sopenharmony_ci{ 61862306a36Sopenharmony_ci return 0; 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_cistatic inline gfp_t gfp_exact_node(gfp_t flags) 62262306a36Sopenharmony_ci{ 62362306a36Sopenharmony_ci return flags & ~__GFP_NOFAIL; 62462306a36Sopenharmony_ci} 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci#else /* CONFIG_NUMA */ 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_cistatic struct alien_cache *__alloc_alien_cache(int node, int entries, 62962306a36Sopenharmony_ci int batch, gfp_t gfp) 63062306a36Sopenharmony_ci{ 63162306a36Sopenharmony_ci size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache); 63262306a36Sopenharmony_ci struct alien_cache *alc = NULL; 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci alc = kmalloc_node(memsize, gfp, node); 63562306a36Sopenharmony_ci if (alc) { 63662306a36Sopenharmony_ci kmemleak_no_scan(alc); 63762306a36Sopenharmony_ci init_arraycache(&alc->ac, entries, batch); 63862306a36Sopenharmony_ci spin_lock_init(&alc->lock); 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci return alc; 64162306a36Sopenharmony_ci} 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_cistatic struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) 64462306a36Sopenharmony_ci{ 64562306a36Sopenharmony_ci struct alien_cache **alc_ptr; 64662306a36Sopenharmony_ci int i; 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci if (limit > 1) 64962306a36Sopenharmony_ci limit = 12; 65062306a36Sopenharmony_ci alc_ptr = kcalloc_node(nr_node_ids, sizeof(void *), gfp, node); 65162306a36Sopenharmony_ci if (!alc_ptr) 65262306a36Sopenharmony_ci return NULL; 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci for_each_node(i) { 65562306a36Sopenharmony_ci if (i == node || !node_online(i)) 65662306a36Sopenharmony_ci continue; 65762306a36Sopenharmony_ci alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp); 65862306a36Sopenharmony_ci if (!alc_ptr[i]) { 65962306a36Sopenharmony_ci for (i--; i >= 0; i--) 66062306a36Sopenharmony_ci kfree(alc_ptr[i]); 66162306a36Sopenharmony_ci kfree(alc_ptr); 66262306a36Sopenharmony_ci return NULL; 66362306a36Sopenharmony_ci } 66462306a36Sopenharmony_ci } 66562306a36Sopenharmony_ci return alc_ptr; 66662306a36Sopenharmony_ci} 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_cistatic void free_alien_cache(struct alien_cache **alc_ptr) 66962306a36Sopenharmony_ci{ 67062306a36Sopenharmony_ci int i; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci if (!alc_ptr) 67362306a36Sopenharmony_ci return; 67462306a36Sopenharmony_ci for_each_node(i) 67562306a36Sopenharmony_ci kfree(alc_ptr[i]); 67662306a36Sopenharmony_ci kfree(alc_ptr); 67762306a36Sopenharmony_ci} 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_cistatic void __drain_alien_cache(struct kmem_cache *cachep, 68062306a36Sopenharmony_ci struct array_cache *ac, int node, 68162306a36Sopenharmony_ci struct list_head *list) 68262306a36Sopenharmony_ci{ 68362306a36Sopenharmony_ci struct kmem_cache_node *n = get_node(cachep, node); 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci if (ac->avail) { 68662306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 68762306a36Sopenharmony_ci /* 68862306a36Sopenharmony_ci * Stuff objects into the remote nodes shared array first. 68962306a36Sopenharmony_ci * That way we could avoid the overhead of putting the objects 69062306a36Sopenharmony_ci * into the free lists and getting them back later. 69162306a36Sopenharmony_ci */ 69262306a36Sopenharmony_ci if (n->shared) 69362306a36Sopenharmony_ci transfer_objects(n->shared, ac, ac->limit); 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci free_block(cachep, ac->entry, ac->avail, node, list); 69662306a36Sopenharmony_ci ac->avail = 0; 69762306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 69862306a36Sopenharmony_ci } 69962306a36Sopenharmony_ci} 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci/* 70262306a36Sopenharmony_ci * Called from cache_reap() to regularly drain alien caches round robin. 70362306a36Sopenharmony_ci */ 70462306a36Sopenharmony_cistatic void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n) 70562306a36Sopenharmony_ci{ 70662306a36Sopenharmony_ci int node = __this_cpu_read(slab_reap_node); 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci if (n->alien) { 70962306a36Sopenharmony_ci struct alien_cache *alc = n->alien[node]; 71062306a36Sopenharmony_ci struct array_cache *ac; 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci if (alc) { 71362306a36Sopenharmony_ci ac = &alc->ac; 71462306a36Sopenharmony_ci if (ac->avail && spin_trylock_irq(&alc->lock)) { 71562306a36Sopenharmony_ci LIST_HEAD(list); 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci __drain_alien_cache(cachep, ac, node, &list); 71862306a36Sopenharmony_ci spin_unlock_irq(&alc->lock); 71962306a36Sopenharmony_ci slabs_destroy(cachep, &list); 72062306a36Sopenharmony_ci } 72162306a36Sopenharmony_ci } 72262306a36Sopenharmony_ci } 72362306a36Sopenharmony_ci} 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_cistatic void drain_alien_cache(struct kmem_cache *cachep, 72662306a36Sopenharmony_ci struct alien_cache **alien) 72762306a36Sopenharmony_ci{ 72862306a36Sopenharmony_ci int i = 0; 72962306a36Sopenharmony_ci struct alien_cache *alc; 73062306a36Sopenharmony_ci struct array_cache *ac; 73162306a36Sopenharmony_ci unsigned long flags; 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci for_each_online_node(i) { 73462306a36Sopenharmony_ci alc = alien[i]; 73562306a36Sopenharmony_ci if (alc) { 73662306a36Sopenharmony_ci LIST_HEAD(list); 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci ac = &alc->ac; 73962306a36Sopenharmony_ci spin_lock_irqsave(&alc->lock, flags); 74062306a36Sopenharmony_ci __drain_alien_cache(cachep, ac, i, &list); 74162306a36Sopenharmony_ci spin_unlock_irqrestore(&alc->lock, flags); 74262306a36Sopenharmony_ci slabs_destroy(cachep, &list); 74362306a36Sopenharmony_ci } 74462306a36Sopenharmony_ci } 74562306a36Sopenharmony_ci} 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_cistatic int __cache_free_alien(struct kmem_cache *cachep, void *objp, 74862306a36Sopenharmony_ci int node, int slab_node) 74962306a36Sopenharmony_ci{ 75062306a36Sopenharmony_ci struct kmem_cache_node *n; 75162306a36Sopenharmony_ci struct alien_cache *alien = NULL; 75262306a36Sopenharmony_ci struct array_cache *ac; 75362306a36Sopenharmony_ci LIST_HEAD(list); 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci n = get_node(cachep, node); 75662306a36Sopenharmony_ci STATS_INC_NODEFREES(cachep); 75762306a36Sopenharmony_ci if (n->alien && n->alien[slab_node]) { 75862306a36Sopenharmony_ci alien = n->alien[slab_node]; 75962306a36Sopenharmony_ci ac = &alien->ac; 76062306a36Sopenharmony_ci spin_lock(&alien->lock); 76162306a36Sopenharmony_ci if (unlikely(ac->avail == ac->limit)) { 76262306a36Sopenharmony_ci STATS_INC_ACOVERFLOW(cachep); 76362306a36Sopenharmony_ci __drain_alien_cache(cachep, ac, slab_node, &list); 76462306a36Sopenharmony_ci } 76562306a36Sopenharmony_ci __free_one(ac, objp); 76662306a36Sopenharmony_ci spin_unlock(&alien->lock); 76762306a36Sopenharmony_ci slabs_destroy(cachep, &list); 76862306a36Sopenharmony_ci } else { 76962306a36Sopenharmony_ci n = get_node(cachep, slab_node); 77062306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 77162306a36Sopenharmony_ci free_block(cachep, &objp, 1, slab_node, &list); 77262306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 77362306a36Sopenharmony_ci slabs_destroy(cachep, &list); 77462306a36Sopenharmony_ci } 77562306a36Sopenharmony_ci return 1; 77662306a36Sopenharmony_ci} 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_cistatic inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 77962306a36Sopenharmony_ci{ 78062306a36Sopenharmony_ci int slab_node = slab_nid(virt_to_slab(objp)); 78162306a36Sopenharmony_ci int node = numa_mem_id(); 78262306a36Sopenharmony_ci /* 78362306a36Sopenharmony_ci * Make sure we are not freeing an object from another node to the array 78462306a36Sopenharmony_ci * cache on this cpu. 78562306a36Sopenharmony_ci */ 78662306a36Sopenharmony_ci if (likely(node == slab_node)) 78762306a36Sopenharmony_ci return 0; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci return __cache_free_alien(cachep, objp, node, slab_node); 79062306a36Sopenharmony_ci} 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci/* 79362306a36Sopenharmony_ci * Construct gfp mask to allocate from a specific node but do not reclaim or 79462306a36Sopenharmony_ci * warn about failures. 79562306a36Sopenharmony_ci */ 79662306a36Sopenharmony_cistatic inline gfp_t gfp_exact_node(gfp_t flags) 79762306a36Sopenharmony_ci{ 79862306a36Sopenharmony_ci return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~(__GFP_RECLAIM|__GFP_NOFAIL); 79962306a36Sopenharmony_ci} 80062306a36Sopenharmony_ci#endif 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_cistatic int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) 80362306a36Sopenharmony_ci{ 80462306a36Sopenharmony_ci struct kmem_cache_node *n; 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci /* 80762306a36Sopenharmony_ci * Set up the kmem_cache_node for cpu before we can 80862306a36Sopenharmony_ci * begin anything. Make sure some other cpu on this 80962306a36Sopenharmony_ci * node has not already allocated this 81062306a36Sopenharmony_ci */ 81162306a36Sopenharmony_ci n = get_node(cachep, node); 81262306a36Sopenharmony_ci if (n) { 81362306a36Sopenharmony_ci raw_spin_lock_irq(&n->list_lock); 81462306a36Sopenharmony_ci n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + 81562306a36Sopenharmony_ci cachep->num; 81662306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci return 0; 81962306a36Sopenharmony_ci } 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); 82262306a36Sopenharmony_ci if (!n) 82362306a36Sopenharmony_ci return -ENOMEM; 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci kmem_cache_node_init(n); 82662306a36Sopenharmony_ci n->next_reap = jiffies + REAPTIMEOUT_NODE + 82762306a36Sopenharmony_ci ((unsigned long)cachep) % REAPTIMEOUT_NODE; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci n->free_limit = 83062306a36Sopenharmony_ci (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci /* 83362306a36Sopenharmony_ci * The kmem_cache_nodes don't come and go as CPUs 83462306a36Sopenharmony_ci * come and go. slab_mutex provides sufficient 83562306a36Sopenharmony_ci * protection here. 83662306a36Sopenharmony_ci */ 83762306a36Sopenharmony_ci cachep->node[node] = n; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci return 0; 84062306a36Sopenharmony_ci} 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci#if defined(CONFIG_NUMA) || defined(CONFIG_SMP) 84362306a36Sopenharmony_ci/* 84462306a36Sopenharmony_ci * Allocates and initializes node for a node on each slab cache, used for 84562306a36Sopenharmony_ci * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node 84662306a36Sopenharmony_ci * will be allocated off-node since memory is not yet online for the new node. 84762306a36Sopenharmony_ci * When hotplugging memory or a cpu, existing nodes are not replaced if 84862306a36Sopenharmony_ci * already in use. 84962306a36Sopenharmony_ci * 85062306a36Sopenharmony_ci * Must hold slab_mutex. 85162306a36Sopenharmony_ci */ 85262306a36Sopenharmony_cistatic int init_cache_node_node(int node) 85362306a36Sopenharmony_ci{ 85462306a36Sopenharmony_ci int ret; 85562306a36Sopenharmony_ci struct kmem_cache *cachep; 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci list_for_each_entry(cachep, &slab_caches, list) { 85862306a36Sopenharmony_ci ret = init_cache_node(cachep, node, GFP_KERNEL); 85962306a36Sopenharmony_ci if (ret) 86062306a36Sopenharmony_ci return ret; 86162306a36Sopenharmony_ci } 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci return 0; 86462306a36Sopenharmony_ci} 86562306a36Sopenharmony_ci#endif 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_cistatic int setup_kmem_cache_node(struct kmem_cache *cachep, 86862306a36Sopenharmony_ci int node, gfp_t gfp, bool force_change) 86962306a36Sopenharmony_ci{ 87062306a36Sopenharmony_ci int ret = -ENOMEM; 87162306a36Sopenharmony_ci struct kmem_cache_node *n; 87262306a36Sopenharmony_ci struct array_cache *old_shared = NULL; 87362306a36Sopenharmony_ci struct array_cache *new_shared = NULL; 87462306a36Sopenharmony_ci struct alien_cache **new_alien = NULL; 87562306a36Sopenharmony_ci LIST_HEAD(list); 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci if (use_alien_caches) { 87862306a36Sopenharmony_ci new_alien = alloc_alien_cache(node, cachep->limit, gfp); 87962306a36Sopenharmony_ci if (!new_alien) 88062306a36Sopenharmony_ci goto fail; 88162306a36Sopenharmony_ci } 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci if (cachep->shared) { 88462306a36Sopenharmony_ci new_shared = alloc_arraycache(node, 88562306a36Sopenharmony_ci cachep->shared * cachep->batchcount, 0xbaadf00d, gfp); 88662306a36Sopenharmony_ci if (!new_shared) 88762306a36Sopenharmony_ci goto fail; 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci ret = init_cache_node(cachep, node, gfp); 89162306a36Sopenharmony_ci if (ret) 89262306a36Sopenharmony_ci goto fail; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci n = get_node(cachep, node); 89562306a36Sopenharmony_ci raw_spin_lock_irq(&n->list_lock); 89662306a36Sopenharmony_ci if (n->shared && force_change) { 89762306a36Sopenharmony_ci free_block(cachep, n->shared->entry, 89862306a36Sopenharmony_ci n->shared->avail, node, &list); 89962306a36Sopenharmony_ci n->shared->avail = 0; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci if (!n->shared || force_change) { 90362306a36Sopenharmony_ci old_shared = n->shared; 90462306a36Sopenharmony_ci n->shared = new_shared; 90562306a36Sopenharmony_ci new_shared = NULL; 90662306a36Sopenharmony_ci } 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci if (!n->alien) { 90962306a36Sopenharmony_ci n->alien = new_alien; 91062306a36Sopenharmony_ci new_alien = NULL; 91162306a36Sopenharmony_ci } 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 91462306a36Sopenharmony_ci slabs_destroy(cachep, &list); 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci /* 91762306a36Sopenharmony_ci * To protect lockless access to n->shared during irq disabled context. 91862306a36Sopenharmony_ci * If n->shared isn't NULL in irq disabled context, accessing to it is 91962306a36Sopenharmony_ci * guaranteed to be valid until irq is re-enabled, because it will be 92062306a36Sopenharmony_ci * freed after synchronize_rcu(). 92162306a36Sopenharmony_ci */ 92262306a36Sopenharmony_ci if (old_shared && force_change) 92362306a36Sopenharmony_ci synchronize_rcu(); 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_cifail: 92662306a36Sopenharmony_ci kfree(old_shared); 92762306a36Sopenharmony_ci kfree(new_shared); 92862306a36Sopenharmony_ci free_alien_cache(new_alien); 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci return ret; 93162306a36Sopenharmony_ci} 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci#ifdef CONFIG_SMP 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_cistatic void cpuup_canceled(long cpu) 93662306a36Sopenharmony_ci{ 93762306a36Sopenharmony_ci struct kmem_cache *cachep; 93862306a36Sopenharmony_ci struct kmem_cache_node *n = NULL; 93962306a36Sopenharmony_ci int node = cpu_to_mem(cpu); 94062306a36Sopenharmony_ci const struct cpumask *mask = cpumask_of_node(node); 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci list_for_each_entry(cachep, &slab_caches, list) { 94362306a36Sopenharmony_ci struct array_cache *nc; 94462306a36Sopenharmony_ci struct array_cache *shared; 94562306a36Sopenharmony_ci struct alien_cache **alien; 94662306a36Sopenharmony_ci LIST_HEAD(list); 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci n = get_node(cachep, node); 94962306a36Sopenharmony_ci if (!n) 95062306a36Sopenharmony_ci continue; 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci raw_spin_lock_irq(&n->list_lock); 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci /* Free limit for this kmem_cache_node */ 95562306a36Sopenharmony_ci n->free_limit -= cachep->batchcount; 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci /* cpu is dead; no one can alloc from it. */ 95862306a36Sopenharmony_ci nc = per_cpu_ptr(cachep->cpu_cache, cpu); 95962306a36Sopenharmony_ci free_block(cachep, nc->entry, nc->avail, node, &list); 96062306a36Sopenharmony_ci nc->avail = 0; 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci if (!cpumask_empty(mask)) { 96362306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 96462306a36Sopenharmony_ci goto free_slab; 96562306a36Sopenharmony_ci } 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci shared = n->shared; 96862306a36Sopenharmony_ci if (shared) { 96962306a36Sopenharmony_ci free_block(cachep, shared->entry, 97062306a36Sopenharmony_ci shared->avail, node, &list); 97162306a36Sopenharmony_ci n->shared = NULL; 97262306a36Sopenharmony_ci } 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci alien = n->alien; 97562306a36Sopenharmony_ci n->alien = NULL; 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci kfree(shared); 98062306a36Sopenharmony_ci if (alien) { 98162306a36Sopenharmony_ci drain_alien_cache(cachep, alien); 98262306a36Sopenharmony_ci free_alien_cache(alien); 98362306a36Sopenharmony_ci } 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_cifree_slab: 98662306a36Sopenharmony_ci slabs_destroy(cachep, &list); 98762306a36Sopenharmony_ci } 98862306a36Sopenharmony_ci /* 98962306a36Sopenharmony_ci * In the previous loop, all the objects were freed to 99062306a36Sopenharmony_ci * the respective cache's slabs, now we can go ahead and 99162306a36Sopenharmony_ci * shrink each nodelist to its limit. 99262306a36Sopenharmony_ci */ 99362306a36Sopenharmony_ci list_for_each_entry(cachep, &slab_caches, list) { 99462306a36Sopenharmony_ci n = get_node(cachep, node); 99562306a36Sopenharmony_ci if (!n) 99662306a36Sopenharmony_ci continue; 99762306a36Sopenharmony_ci drain_freelist(cachep, n, INT_MAX); 99862306a36Sopenharmony_ci } 99962306a36Sopenharmony_ci} 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_cistatic int cpuup_prepare(long cpu) 100262306a36Sopenharmony_ci{ 100362306a36Sopenharmony_ci struct kmem_cache *cachep; 100462306a36Sopenharmony_ci int node = cpu_to_mem(cpu); 100562306a36Sopenharmony_ci int err; 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci /* 100862306a36Sopenharmony_ci * We need to do this right in the beginning since 100962306a36Sopenharmony_ci * alloc_arraycache's are going to use this list. 101062306a36Sopenharmony_ci * kmalloc_node allows us to add the slab to the right 101162306a36Sopenharmony_ci * kmem_cache_node and not this cpu's kmem_cache_node 101262306a36Sopenharmony_ci */ 101362306a36Sopenharmony_ci err = init_cache_node_node(node); 101462306a36Sopenharmony_ci if (err < 0) 101562306a36Sopenharmony_ci goto bad; 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci /* 101862306a36Sopenharmony_ci * Now we can go ahead with allocating the shared arrays and 101962306a36Sopenharmony_ci * array caches 102062306a36Sopenharmony_ci */ 102162306a36Sopenharmony_ci list_for_each_entry(cachep, &slab_caches, list) { 102262306a36Sopenharmony_ci err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false); 102362306a36Sopenharmony_ci if (err) 102462306a36Sopenharmony_ci goto bad; 102562306a36Sopenharmony_ci } 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci return 0; 102862306a36Sopenharmony_cibad: 102962306a36Sopenharmony_ci cpuup_canceled(cpu); 103062306a36Sopenharmony_ci return -ENOMEM; 103162306a36Sopenharmony_ci} 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ciint slab_prepare_cpu(unsigned int cpu) 103462306a36Sopenharmony_ci{ 103562306a36Sopenharmony_ci int err; 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci mutex_lock(&slab_mutex); 103862306a36Sopenharmony_ci err = cpuup_prepare(cpu); 103962306a36Sopenharmony_ci mutex_unlock(&slab_mutex); 104062306a36Sopenharmony_ci return err; 104162306a36Sopenharmony_ci} 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci/* 104462306a36Sopenharmony_ci * This is called for a failed online attempt and for a successful 104562306a36Sopenharmony_ci * offline. 104662306a36Sopenharmony_ci * 104762306a36Sopenharmony_ci * Even if all the cpus of a node are down, we don't free the 104862306a36Sopenharmony_ci * kmem_cache_node of any cache. This is to avoid a race between cpu_down, and 104962306a36Sopenharmony_ci * a kmalloc allocation from another cpu for memory from the node of 105062306a36Sopenharmony_ci * the cpu going down. The kmem_cache_node structure is usually allocated from 105162306a36Sopenharmony_ci * kmem_cache_create() and gets destroyed at kmem_cache_destroy(). 105262306a36Sopenharmony_ci */ 105362306a36Sopenharmony_ciint slab_dead_cpu(unsigned int cpu) 105462306a36Sopenharmony_ci{ 105562306a36Sopenharmony_ci mutex_lock(&slab_mutex); 105662306a36Sopenharmony_ci cpuup_canceled(cpu); 105762306a36Sopenharmony_ci mutex_unlock(&slab_mutex); 105862306a36Sopenharmony_ci return 0; 105962306a36Sopenharmony_ci} 106062306a36Sopenharmony_ci#endif 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_cistatic int slab_online_cpu(unsigned int cpu) 106362306a36Sopenharmony_ci{ 106462306a36Sopenharmony_ci start_cpu_timer(cpu); 106562306a36Sopenharmony_ci return 0; 106662306a36Sopenharmony_ci} 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_cistatic int slab_offline_cpu(unsigned int cpu) 106962306a36Sopenharmony_ci{ 107062306a36Sopenharmony_ci /* 107162306a36Sopenharmony_ci * Shutdown cache reaper. Note that the slab_mutex is held so 107262306a36Sopenharmony_ci * that if cache_reap() is invoked it cannot do anything 107362306a36Sopenharmony_ci * expensive but will only modify reap_work and reschedule the 107462306a36Sopenharmony_ci * timer. 107562306a36Sopenharmony_ci */ 107662306a36Sopenharmony_ci cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu)); 107762306a36Sopenharmony_ci /* Now the cache_reaper is guaranteed to be not running. */ 107862306a36Sopenharmony_ci per_cpu(slab_reap_work, cpu).work.func = NULL; 107962306a36Sopenharmony_ci return 0; 108062306a36Sopenharmony_ci} 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci#if defined(CONFIG_NUMA) 108362306a36Sopenharmony_ci/* 108462306a36Sopenharmony_ci * Drains freelist for a node on each slab cache, used for memory hot-remove. 108562306a36Sopenharmony_ci * Returns -EBUSY if all objects cannot be drained so that the node is not 108662306a36Sopenharmony_ci * removed. 108762306a36Sopenharmony_ci * 108862306a36Sopenharmony_ci * Must hold slab_mutex. 108962306a36Sopenharmony_ci */ 109062306a36Sopenharmony_cistatic int __meminit drain_cache_node_node(int node) 109162306a36Sopenharmony_ci{ 109262306a36Sopenharmony_ci struct kmem_cache *cachep; 109362306a36Sopenharmony_ci int ret = 0; 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci list_for_each_entry(cachep, &slab_caches, list) { 109662306a36Sopenharmony_ci struct kmem_cache_node *n; 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci n = get_node(cachep, node); 109962306a36Sopenharmony_ci if (!n) 110062306a36Sopenharmony_ci continue; 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_ci drain_freelist(cachep, n, INT_MAX); 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci if (!list_empty(&n->slabs_full) || 110562306a36Sopenharmony_ci !list_empty(&n->slabs_partial)) { 110662306a36Sopenharmony_ci ret = -EBUSY; 110762306a36Sopenharmony_ci break; 110862306a36Sopenharmony_ci } 110962306a36Sopenharmony_ci } 111062306a36Sopenharmony_ci return ret; 111162306a36Sopenharmony_ci} 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_cistatic int __meminit slab_memory_callback(struct notifier_block *self, 111462306a36Sopenharmony_ci unsigned long action, void *arg) 111562306a36Sopenharmony_ci{ 111662306a36Sopenharmony_ci struct memory_notify *mnb = arg; 111762306a36Sopenharmony_ci int ret = 0; 111862306a36Sopenharmony_ci int nid; 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_ci nid = mnb->status_change_nid; 112162306a36Sopenharmony_ci if (nid < 0) 112262306a36Sopenharmony_ci goto out; 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci switch (action) { 112562306a36Sopenharmony_ci case MEM_GOING_ONLINE: 112662306a36Sopenharmony_ci mutex_lock(&slab_mutex); 112762306a36Sopenharmony_ci ret = init_cache_node_node(nid); 112862306a36Sopenharmony_ci mutex_unlock(&slab_mutex); 112962306a36Sopenharmony_ci break; 113062306a36Sopenharmony_ci case MEM_GOING_OFFLINE: 113162306a36Sopenharmony_ci mutex_lock(&slab_mutex); 113262306a36Sopenharmony_ci ret = drain_cache_node_node(nid); 113362306a36Sopenharmony_ci mutex_unlock(&slab_mutex); 113462306a36Sopenharmony_ci break; 113562306a36Sopenharmony_ci case MEM_ONLINE: 113662306a36Sopenharmony_ci case MEM_OFFLINE: 113762306a36Sopenharmony_ci case MEM_CANCEL_ONLINE: 113862306a36Sopenharmony_ci case MEM_CANCEL_OFFLINE: 113962306a36Sopenharmony_ci break; 114062306a36Sopenharmony_ci } 114162306a36Sopenharmony_ciout: 114262306a36Sopenharmony_ci return notifier_from_errno(ret); 114362306a36Sopenharmony_ci} 114462306a36Sopenharmony_ci#endif /* CONFIG_NUMA */ 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci/* 114762306a36Sopenharmony_ci * swap the static kmem_cache_node with kmalloced memory 114862306a36Sopenharmony_ci */ 114962306a36Sopenharmony_cistatic void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list, 115062306a36Sopenharmony_ci int nodeid) 115162306a36Sopenharmony_ci{ 115262306a36Sopenharmony_ci struct kmem_cache_node *ptr; 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid); 115562306a36Sopenharmony_ci BUG_ON(!ptr); 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci memcpy(ptr, list, sizeof(struct kmem_cache_node)); 115862306a36Sopenharmony_ci /* 115962306a36Sopenharmony_ci * Do not assume that spinlocks can be initialized via memcpy: 116062306a36Sopenharmony_ci */ 116162306a36Sopenharmony_ci raw_spin_lock_init(&ptr->list_lock); 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci MAKE_ALL_LISTS(cachep, ptr, nodeid); 116462306a36Sopenharmony_ci cachep->node[nodeid] = ptr; 116562306a36Sopenharmony_ci} 116662306a36Sopenharmony_ci 116762306a36Sopenharmony_ci/* 116862306a36Sopenharmony_ci * For setting up all the kmem_cache_node for cache whose buffer_size is same as 116962306a36Sopenharmony_ci * size of kmem_cache_node. 117062306a36Sopenharmony_ci */ 117162306a36Sopenharmony_cistatic void __init set_up_node(struct kmem_cache *cachep, int index) 117262306a36Sopenharmony_ci{ 117362306a36Sopenharmony_ci int node; 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci for_each_online_node(node) { 117662306a36Sopenharmony_ci cachep->node[node] = &init_kmem_cache_node[index + node]; 117762306a36Sopenharmony_ci cachep->node[node]->next_reap = jiffies + 117862306a36Sopenharmony_ci REAPTIMEOUT_NODE + 117962306a36Sopenharmony_ci ((unsigned long)cachep) % REAPTIMEOUT_NODE; 118062306a36Sopenharmony_ci } 118162306a36Sopenharmony_ci} 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci/* 118462306a36Sopenharmony_ci * Initialisation. Called after the page allocator have been initialised and 118562306a36Sopenharmony_ci * before smp_init(). 118662306a36Sopenharmony_ci */ 118762306a36Sopenharmony_civoid __init kmem_cache_init(void) 118862306a36Sopenharmony_ci{ 118962306a36Sopenharmony_ci int i; 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci kmem_cache = &kmem_cache_boot; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1) 119462306a36Sopenharmony_ci use_alien_caches = 0; 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_ci for (i = 0; i < NUM_INIT_LISTS; i++) 119762306a36Sopenharmony_ci kmem_cache_node_init(&init_kmem_cache_node[i]); 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ci /* 120062306a36Sopenharmony_ci * Fragmentation resistance on low memory - only use bigger 120162306a36Sopenharmony_ci * page orders on machines with more than 32MB of memory if 120262306a36Sopenharmony_ci * not overridden on the command line. 120362306a36Sopenharmony_ci */ 120462306a36Sopenharmony_ci if (!slab_max_order_set && totalram_pages() > (32 << 20) >> PAGE_SHIFT) 120562306a36Sopenharmony_ci slab_max_order = SLAB_MAX_ORDER_HI; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci /* Bootstrap is tricky, because several objects are allocated 120862306a36Sopenharmony_ci * from caches that do not exist yet: 120962306a36Sopenharmony_ci * 1) initialize the kmem_cache cache: it contains the struct 121062306a36Sopenharmony_ci * kmem_cache structures of all caches, except kmem_cache itself: 121162306a36Sopenharmony_ci * kmem_cache is statically allocated. 121262306a36Sopenharmony_ci * Initially an __init data area is used for the head array and the 121362306a36Sopenharmony_ci * kmem_cache_node structures, it's replaced with a kmalloc allocated 121462306a36Sopenharmony_ci * array at the end of the bootstrap. 121562306a36Sopenharmony_ci * 2) Create the first kmalloc cache. 121662306a36Sopenharmony_ci * The struct kmem_cache for the new cache is allocated normally. 121762306a36Sopenharmony_ci * An __init data area is used for the head array. 121862306a36Sopenharmony_ci * 3) Create the remaining kmalloc caches, with minimally sized 121962306a36Sopenharmony_ci * head arrays. 122062306a36Sopenharmony_ci * 4) Replace the __init data head arrays for kmem_cache and the first 122162306a36Sopenharmony_ci * kmalloc cache with kmalloc allocated arrays. 122262306a36Sopenharmony_ci * 5) Replace the __init data for kmem_cache_node for kmem_cache and 122362306a36Sopenharmony_ci * the other cache's with kmalloc allocated memory. 122462306a36Sopenharmony_ci * 6) Resize the head arrays of the kmalloc caches to their final sizes. 122562306a36Sopenharmony_ci */ 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci /* 1) create the kmem_cache */ 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci /* 123062306a36Sopenharmony_ci * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids 123162306a36Sopenharmony_ci */ 123262306a36Sopenharmony_ci create_boot_cache(kmem_cache, "kmem_cache", 123362306a36Sopenharmony_ci offsetof(struct kmem_cache, node) + 123462306a36Sopenharmony_ci nr_node_ids * sizeof(struct kmem_cache_node *), 123562306a36Sopenharmony_ci SLAB_HWCACHE_ALIGN, 0, 0); 123662306a36Sopenharmony_ci list_add(&kmem_cache->list, &slab_caches); 123762306a36Sopenharmony_ci slab_state = PARTIAL; 123862306a36Sopenharmony_ci 123962306a36Sopenharmony_ci /* 124062306a36Sopenharmony_ci * Initialize the caches that provide memory for the kmem_cache_node 124162306a36Sopenharmony_ci * structures first. Without this, further allocations will bug. 124262306a36Sopenharmony_ci */ 124362306a36Sopenharmony_ci new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS); 124462306a36Sopenharmony_ci slab_state = PARTIAL_NODE; 124562306a36Sopenharmony_ci setup_kmalloc_cache_index_table(); 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci /* 5) Replace the bootstrap kmem_cache_node */ 124862306a36Sopenharmony_ci { 124962306a36Sopenharmony_ci int nid; 125062306a36Sopenharmony_ci 125162306a36Sopenharmony_ci for_each_online_node(nid) { 125262306a36Sopenharmony_ci init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid); 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE], 125562306a36Sopenharmony_ci &init_kmem_cache_node[SIZE_NODE + nid], nid); 125662306a36Sopenharmony_ci } 125762306a36Sopenharmony_ci } 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci create_kmalloc_caches(ARCH_KMALLOC_FLAGS); 126062306a36Sopenharmony_ci} 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_civoid __init kmem_cache_init_late(void) 126362306a36Sopenharmony_ci{ 126462306a36Sopenharmony_ci struct kmem_cache *cachep; 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci /* 6) resize the head arrays to their final sizes */ 126762306a36Sopenharmony_ci mutex_lock(&slab_mutex); 126862306a36Sopenharmony_ci list_for_each_entry(cachep, &slab_caches, list) 126962306a36Sopenharmony_ci if (enable_cpucache(cachep, GFP_NOWAIT)) 127062306a36Sopenharmony_ci BUG(); 127162306a36Sopenharmony_ci mutex_unlock(&slab_mutex); 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci /* Done! */ 127462306a36Sopenharmony_ci slab_state = FULL; 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci#ifdef CONFIG_NUMA 127762306a36Sopenharmony_ci /* 127862306a36Sopenharmony_ci * Register a memory hotplug callback that initializes and frees 127962306a36Sopenharmony_ci * node. 128062306a36Sopenharmony_ci */ 128162306a36Sopenharmony_ci hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); 128262306a36Sopenharmony_ci#endif 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci /* 128562306a36Sopenharmony_ci * The reap timers are started later, with a module init call: That part 128662306a36Sopenharmony_ci * of the kernel is not yet operational. 128762306a36Sopenharmony_ci */ 128862306a36Sopenharmony_ci} 128962306a36Sopenharmony_ci 129062306a36Sopenharmony_cistatic int __init cpucache_init(void) 129162306a36Sopenharmony_ci{ 129262306a36Sopenharmony_ci int ret; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci /* 129562306a36Sopenharmony_ci * Register the timers that return unneeded pages to the page allocator 129662306a36Sopenharmony_ci */ 129762306a36Sopenharmony_ci ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online", 129862306a36Sopenharmony_ci slab_online_cpu, slab_offline_cpu); 129962306a36Sopenharmony_ci WARN_ON(ret < 0); 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci return 0; 130262306a36Sopenharmony_ci} 130362306a36Sopenharmony_ci__initcall(cpucache_init); 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_cistatic noinline void 130662306a36Sopenharmony_cislab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) 130762306a36Sopenharmony_ci{ 130862306a36Sopenharmony_ci#if DEBUG 130962306a36Sopenharmony_ci struct kmem_cache_node *n; 131062306a36Sopenharmony_ci unsigned long flags; 131162306a36Sopenharmony_ci int node; 131262306a36Sopenharmony_ci static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL, 131362306a36Sopenharmony_ci DEFAULT_RATELIMIT_BURST); 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs)) 131662306a36Sopenharmony_ci return; 131762306a36Sopenharmony_ci 131862306a36Sopenharmony_ci pr_warn("SLAB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n", 131962306a36Sopenharmony_ci nodeid, gfpflags, &gfpflags); 132062306a36Sopenharmony_ci pr_warn(" cache: %s, object size: %d, order: %d\n", 132162306a36Sopenharmony_ci cachep->name, cachep->size, cachep->gfporder); 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci for_each_kmem_cache_node(cachep, node, n) { 132462306a36Sopenharmony_ci unsigned long total_slabs, free_slabs, free_objs; 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_ci raw_spin_lock_irqsave(&n->list_lock, flags); 132762306a36Sopenharmony_ci total_slabs = n->total_slabs; 132862306a36Sopenharmony_ci free_slabs = n->free_slabs; 132962306a36Sopenharmony_ci free_objs = n->free_objects; 133062306a36Sopenharmony_ci raw_spin_unlock_irqrestore(&n->list_lock, flags); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n", 133362306a36Sopenharmony_ci node, total_slabs - free_slabs, total_slabs, 133462306a36Sopenharmony_ci (total_slabs * cachep->num) - free_objs, 133562306a36Sopenharmony_ci total_slabs * cachep->num); 133662306a36Sopenharmony_ci } 133762306a36Sopenharmony_ci#endif 133862306a36Sopenharmony_ci} 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci/* 134162306a36Sopenharmony_ci * Interface to system's page allocator. No need to hold the 134262306a36Sopenharmony_ci * kmem_cache_node ->list_lock. 134362306a36Sopenharmony_ci * 134462306a36Sopenharmony_ci * If we requested dmaable memory, we will get it. Even if we 134562306a36Sopenharmony_ci * did not request dmaable memory, we might get it, but that 134662306a36Sopenharmony_ci * would be relatively rare and ignorable. 134762306a36Sopenharmony_ci */ 134862306a36Sopenharmony_cistatic struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, 134962306a36Sopenharmony_ci int nodeid) 135062306a36Sopenharmony_ci{ 135162306a36Sopenharmony_ci struct folio *folio; 135262306a36Sopenharmony_ci struct slab *slab; 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_ci flags |= cachep->allocflags; 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci folio = (struct folio *) __alloc_pages_node(nodeid, flags, cachep->gfporder); 135762306a36Sopenharmony_ci if (!folio) { 135862306a36Sopenharmony_ci slab_out_of_memory(cachep, flags, nodeid); 135962306a36Sopenharmony_ci return NULL; 136062306a36Sopenharmony_ci } 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci slab = folio_slab(folio); 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_ci account_slab(slab, cachep->gfporder, cachep, flags); 136562306a36Sopenharmony_ci __folio_set_slab(folio); 136662306a36Sopenharmony_ci /* Make the flag visible before any changes to folio->mapping */ 136762306a36Sopenharmony_ci smp_wmb(); 136862306a36Sopenharmony_ci /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ 136962306a36Sopenharmony_ci if (sk_memalloc_socks() && folio_is_pfmemalloc(folio)) 137062306a36Sopenharmony_ci slab_set_pfmemalloc(slab); 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci return slab; 137362306a36Sopenharmony_ci} 137462306a36Sopenharmony_ci 137562306a36Sopenharmony_ci/* 137662306a36Sopenharmony_ci * Interface to system's page release. 137762306a36Sopenharmony_ci */ 137862306a36Sopenharmony_cistatic void kmem_freepages(struct kmem_cache *cachep, struct slab *slab) 137962306a36Sopenharmony_ci{ 138062306a36Sopenharmony_ci int order = cachep->gfporder; 138162306a36Sopenharmony_ci struct folio *folio = slab_folio(slab); 138262306a36Sopenharmony_ci 138362306a36Sopenharmony_ci BUG_ON(!folio_test_slab(folio)); 138462306a36Sopenharmony_ci __slab_clear_pfmemalloc(slab); 138562306a36Sopenharmony_ci page_mapcount_reset(&folio->page); 138662306a36Sopenharmony_ci folio->mapping = NULL; 138762306a36Sopenharmony_ci /* Make the mapping reset visible before clearing the flag */ 138862306a36Sopenharmony_ci smp_wmb(); 138962306a36Sopenharmony_ci __folio_clear_slab(folio); 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci mm_account_reclaimed_pages(1 << order); 139262306a36Sopenharmony_ci unaccount_slab(slab, order, cachep); 139362306a36Sopenharmony_ci __free_pages(&folio->page, order); 139462306a36Sopenharmony_ci} 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_cistatic void kmem_rcu_free(struct rcu_head *head) 139762306a36Sopenharmony_ci{ 139862306a36Sopenharmony_ci struct kmem_cache *cachep; 139962306a36Sopenharmony_ci struct slab *slab; 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci slab = container_of(head, struct slab, rcu_head); 140262306a36Sopenharmony_ci cachep = slab->slab_cache; 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci kmem_freepages(cachep, slab); 140562306a36Sopenharmony_ci} 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci#if DEBUG 140862306a36Sopenharmony_cistatic inline bool is_debug_pagealloc_cache(struct kmem_cache *cachep) 140962306a36Sopenharmony_ci{ 141062306a36Sopenharmony_ci return debug_pagealloc_enabled_static() && OFF_SLAB(cachep) && 141162306a36Sopenharmony_ci ((cachep->size % PAGE_SIZE) == 0); 141262306a36Sopenharmony_ci} 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_PAGEALLOC 141562306a36Sopenharmony_cistatic void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map) 141662306a36Sopenharmony_ci{ 141762306a36Sopenharmony_ci if (!is_debug_pagealloc_cache(cachep)) 141862306a36Sopenharmony_ci return; 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci __kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); 142162306a36Sopenharmony_ci} 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci#else 142462306a36Sopenharmony_cistatic inline void slab_kernel_map(struct kmem_cache *cachep, void *objp, 142562306a36Sopenharmony_ci int map) {} 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci#endif 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_cistatic void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) 143062306a36Sopenharmony_ci{ 143162306a36Sopenharmony_ci int size = cachep->object_size; 143262306a36Sopenharmony_ci addr = &((char *)addr)[obj_offset(cachep)]; 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci memset(addr, val, size); 143562306a36Sopenharmony_ci *(unsigned char *)(addr + size - 1) = POISON_END; 143662306a36Sopenharmony_ci} 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_cistatic void dump_line(char *data, int offset, int limit) 143962306a36Sopenharmony_ci{ 144062306a36Sopenharmony_ci int i; 144162306a36Sopenharmony_ci unsigned char error = 0; 144262306a36Sopenharmony_ci int bad_count = 0; 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci pr_err("%03x: ", offset); 144562306a36Sopenharmony_ci for (i = 0; i < limit; i++) { 144662306a36Sopenharmony_ci if (data[offset + i] != POISON_FREE) { 144762306a36Sopenharmony_ci error = data[offset + i]; 144862306a36Sopenharmony_ci bad_count++; 144962306a36Sopenharmony_ci } 145062306a36Sopenharmony_ci } 145162306a36Sopenharmony_ci print_hex_dump(KERN_CONT, "", 0, 16, 1, 145262306a36Sopenharmony_ci &data[offset], limit, 1); 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_ci if (bad_count == 1) { 145562306a36Sopenharmony_ci error ^= POISON_FREE; 145662306a36Sopenharmony_ci if (!(error & (error - 1))) { 145762306a36Sopenharmony_ci pr_err("Single bit error detected. Probably bad RAM.\n"); 145862306a36Sopenharmony_ci#ifdef CONFIG_X86 145962306a36Sopenharmony_ci pr_err("Run memtest86+ or a similar memory test tool.\n"); 146062306a36Sopenharmony_ci#else 146162306a36Sopenharmony_ci pr_err("Run a memory test tool.\n"); 146262306a36Sopenharmony_ci#endif 146362306a36Sopenharmony_ci } 146462306a36Sopenharmony_ci } 146562306a36Sopenharmony_ci} 146662306a36Sopenharmony_ci#endif 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci#if DEBUG 146962306a36Sopenharmony_ci 147062306a36Sopenharmony_cistatic void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) 147162306a36Sopenharmony_ci{ 147262306a36Sopenharmony_ci int i, size; 147362306a36Sopenharmony_ci char *realobj; 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci if (cachep->flags & SLAB_RED_ZONE) { 147662306a36Sopenharmony_ci pr_err("Redzone: 0x%llx/0x%llx\n", 147762306a36Sopenharmony_ci *dbg_redzone1(cachep, objp), 147862306a36Sopenharmony_ci *dbg_redzone2(cachep, objp)); 147962306a36Sopenharmony_ci } 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_ci if (cachep->flags & SLAB_STORE_USER) 148262306a36Sopenharmony_ci pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp)); 148362306a36Sopenharmony_ci realobj = (char *)objp + obj_offset(cachep); 148462306a36Sopenharmony_ci size = cachep->object_size; 148562306a36Sopenharmony_ci for (i = 0; i < size && lines; i += 16, lines--) { 148662306a36Sopenharmony_ci int limit; 148762306a36Sopenharmony_ci limit = 16; 148862306a36Sopenharmony_ci if (i + limit > size) 148962306a36Sopenharmony_ci limit = size - i; 149062306a36Sopenharmony_ci dump_line(realobj, i, limit); 149162306a36Sopenharmony_ci } 149262306a36Sopenharmony_ci} 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_cistatic void check_poison_obj(struct kmem_cache *cachep, void *objp) 149562306a36Sopenharmony_ci{ 149662306a36Sopenharmony_ci char *realobj; 149762306a36Sopenharmony_ci int size, i; 149862306a36Sopenharmony_ci int lines = 0; 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci if (is_debug_pagealloc_cache(cachep)) 150162306a36Sopenharmony_ci return; 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_ci realobj = (char *)objp + obj_offset(cachep); 150462306a36Sopenharmony_ci size = cachep->object_size; 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci for (i = 0; i < size; i++) { 150762306a36Sopenharmony_ci char exp = POISON_FREE; 150862306a36Sopenharmony_ci if (i == size - 1) 150962306a36Sopenharmony_ci exp = POISON_END; 151062306a36Sopenharmony_ci if (realobj[i] != exp) { 151162306a36Sopenharmony_ci int limit; 151262306a36Sopenharmony_ci /* Mismatch ! */ 151362306a36Sopenharmony_ci /* Print header */ 151462306a36Sopenharmony_ci if (lines == 0) { 151562306a36Sopenharmony_ci pr_err("Slab corruption (%s): %s start=%px, len=%d\n", 151662306a36Sopenharmony_ci print_tainted(), cachep->name, 151762306a36Sopenharmony_ci realobj, size); 151862306a36Sopenharmony_ci print_objinfo(cachep, objp, 0); 151962306a36Sopenharmony_ci } 152062306a36Sopenharmony_ci /* Hexdump the affected line */ 152162306a36Sopenharmony_ci i = (i / 16) * 16; 152262306a36Sopenharmony_ci limit = 16; 152362306a36Sopenharmony_ci if (i + limit > size) 152462306a36Sopenharmony_ci limit = size - i; 152562306a36Sopenharmony_ci dump_line(realobj, i, limit); 152662306a36Sopenharmony_ci i += 16; 152762306a36Sopenharmony_ci lines++; 152862306a36Sopenharmony_ci /* Limit to 5 lines */ 152962306a36Sopenharmony_ci if (lines > 5) 153062306a36Sopenharmony_ci break; 153162306a36Sopenharmony_ci } 153262306a36Sopenharmony_ci } 153362306a36Sopenharmony_ci if (lines != 0) { 153462306a36Sopenharmony_ci /* Print some data about the neighboring objects, if they 153562306a36Sopenharmony_ci * exist: 153662306a36Sopenharmony_ci */ 153762306a36Sopenharmony_ci struct slab *slab = virt_to_slab(objp); 153862306a36Sopenharmony_ci unsigned int objnr; 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci objnr = obj_to_index(cachep, slab, objp); 154162306a36Sopenharmony_ci if (objnr) { 154262306a36Sopenharmony_ci objp = index_to_obj(cachep, slab, objnr - 1); 154362306a36Sopenharmony_ci realobj = (char *)objp + obj_offset(cachep); 154462306a36Sopenharmony_ci pr_err("Prev obj: start=%px, len=%d\n", realobj, size); 154562306a36Sopenharmony_ci print_objinfo(cachep, objp, 2); 154662306a36Sopenharmony_ci } 154762306a36Sopenharmony_ci if (objnr + 1 < cachep->num) { 154862306a36Sopenharmony_ci objp = index_to_obj(cachep, slab, objnr + 1); 154962306a36Sopenharmony_ci realobj = (char *)objp + obj_offset(cachep); 155062306a36Sopenharmony_ci pr_err("Next obj: start=%px, len=%d\n", realobj, size); 155162306a36Sopenharmony_ci print_objinfo(cachep, objp, 2); 155262306a36Sopenharmony_ci } 155362306a36Sopenharmony_ci } 155462306a36Sopenharmony_ci} 155562306a36Sopenharmony_ci#endif 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci#if DEBUG 155862306a36Sopenharmony_cistatic void slab_destroy_debugcheck(struct kmem_cache *cachep, 155962306a36Sopenharmony_ci struct slab *slab) 156062306a36Sopenharmony_ci{ 156162306a36Sopenharmony_ci int i; 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) { 156462306a36Sopenharmony_ci poison_obj(cachep, slab->freelist - obj_offset(cachep), 156562306a36Sopenharmony_ci POISON_FREE); 156662306a36Sopenharmony_ci } 156762306a36Sopenharmony_ci 156862306a36Sopenharmony_ci for (i = 0; i < cachep->num; i++) { 156962306a36Sopenharmony_ci void *objp = index_to_obj(cachep, slab, i); 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci if (cachep->flags & SLAB_POISON) { 157262306a36Sopenharmony_ci check_poison_obj(cachep, objp); 157362306a36Sopenharmony_ci slab_kernel_map(cachep, objp, 1); 157462306a36Sopenharmony_ci } 157562306a36Sopenharmony_ci if (cachep->flags & SLAB_RED_ZONE) { 157662306a36Sopenharmony_ci if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 157762306a36Sopenharmony_ci slab_error(cachep, "start of a freed object was overwritten"); 157862306a36Sopenharmony_ci if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 157962306a36Sopenharmony_ci slab_error(cachep, "end of a freed object was overwritten"); 158062306a36Sopenharmony_ci } 158162306a36Sopenharmony_ci } 158262306a36Sopenharmony_ci} 158362306a36Sopenharmony_ci#else 158462306a36Sopenharmony_cistatic void slab_destroy_debugcheck(struct kmem_cache *cachep, 158562306a36Sopenharmony_ci struct slab *slab) 158662306a36Sopenharmony_ci{ 158762306a36Sopenharmony_ci} 158862306a36Sopenharmony_ci#endif 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci/** 159162306a36Sopenharmony_ci * slab_destroy - destroy and release all objects in a slab 159262306a36Sopenharmony_ci * @cachep: cache pointer being destroyed 159362306a36Sopenharmony_ci * @slab: slab being destroyed 159462306a36Sopenharmony_ci * 159562306a36Sopenharmony_ci * Destroy all the objs in a slab, and release the mem back to the system. 159662306a36Sopenharmony_ci * Before calling the slab must have been unlinked from the cache. The 159762306a36Sopenharmony_ci * kmem_cache_node ->list_lock is not held/needed. 159862306a36Sopenharmony_ci */ 159962306a36Sopenharmony_cistatic void slab_destroy(struct kmem_cache *cachep, struct slab *slab) 160062306a36Sopenharmony_ci{ 160162306a36Sopenharmony_ci void *freelist; 160262306a36Sopenharmony_ci 160362306a36Sopenharmony_ci freelist = slab->freelist; 160462306a36Sopenharmony_ci slab_destroy_debugcheck(cachep, slab); 160562306a36Sopenharmony_ci if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU)) 160662306a36Sopenharmony_ci call_rcu(&slab->rcu_head, kmem_rcu_free); 160762306a36Sopenharmony_ci else 160862306a36Sopenharmony_ci kmem_freepages(cachep, slab); 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_ci /* 161162306a36Sopenharmony_ci * From now on, we don't use freelist 161262306a36Sopenharmony_ci * although actual page can be freed in rcu context 161362306a36Sopenharmony_ci */ 161462306a36Sopenharmony_ci if (OFF_SLAB(cachep)) 161562306a36Sopenharmony_ci kfree(freelist); 161662306a36Sopenharmony_ci} 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci/* 161962306a36Sopenharmony_ci * Update the size of the caches before calling slabs_destroy as it may 162062306a36Sopenharmony_ci * recursively call kfree. 162162306a36Sopenharmony_ci */ 162262306a36Sopenharmony_cistatic void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) 162362306a36Sopenharmony_ci{ 162462306a36Sopenharmony_ci struct slab *slab, *n; 162562306a36Sopenharmony_ci 162662306a36Sopenharmony_ci list_for_each_entry_safe(slab, n, list, slab_list) { 162762306a36Sopenharmony_ci list_del(&slab->slab_list); 162862306a36Sopenharmony_ci slab_destroy(cachep, slab); 162962306a36Sopenharmony_ci } 163062306a36Sopenharmony_ci} 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci/** 163362306a36Sopenharmony_ci * calculate_slab_order - calculate size (page order) of slabs 163462306a36Sopenharmony_ci * @cachep: pointer to the cache that is being created 163562306a36Sopenharmony_ci * @size: size of objects to be created in this cache. 163662306a36Sopenharmony_ci * @flags: slab allocation flags 163762306a36Sopenharmony_ci * 163862306a36Sopenharmony_ci * Also calculates the number of objects per slab. 163962306a36Sopenharmony_ci * 164062306a36Sopenharmony_ci * This could be made much more intelligent. For now, try to avoid using 164162306a36Sopenharmony_ci * high order pages for slabs. When the gfp() functions are more friendly 164262306a36Sopenharmony_ci * towards high-order requests, this should be changed. 164362306a36Sopenharmony_ci * 164462306a36Sopenharmony_ci * Return: number of left-over bytes in a slab 164562306a36Sopenharmony_ci */ 164662306a36Sopenharmony_cistatic size_t calculate_slab_order(struct kmem_cache *cachep, 164762306a36Sopenharmony_ci size_t size, slab_flags_t flags) 164862306a36Sopenharmony_ci{ 164962306a36Sopenharmony_ci size_t left_over = 0; 165062306a36Sopenharmony_ci int gfporder; 165162306a36Sopenharmony_ci 165262306a36Sopenharmony_ci for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) { 165362306a36Sopenharmony_ci unsigned int num; 165462306a36Sopenharmony_ci size_t remainder; 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ci num = cache_estimate(gfporder, size, flags, &remainder); 165762306a36Sopenharmony_ci if (!num) 165862306a36Sopenharmony_ci continue; 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci /* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */ 166162306a36Sopenharmony_ci if (num > SLAB_OBJ_MAX_NUM) 166262306a36Sopenharmony_ci break; 166362306a36Sopenharmony_ci 166462306a36Sopenharmony_ci if (flags & CFLGS_OFF_SLAB) { 166562306a36Sopenharmony_ci struct kmem_cache *freelist_cache; 166662306a36Sopenharmony_ci size_t freelist_size; 166762306a36Sopenharmony_ci size_t freelist_cache_size; 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci freelist_size = num * sizeof(freelist_idx_t); 167062306a36Sopenharmony_ci if (freelist_size > KMALLOC_MAX_CACHE_SIZE) { 167162306a36Sopenharmony_ci freelist_cache_size = PAGE_SIZE << get_order(freelist_size); 167262306a36Sopenharmony_ci } else { 167362306a36Sopenharmony_ci freelist_cache = kmalloc_slab(freelist_size, 0u, _RET_IP_); 167462306a36Sopenharmony_ci if (!freelist_cache) 167562306a36Sopenharmony_ci continue; 167662306a36Sopenharmony_ci freelist_cache_size = freelist_cache->size; 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci /* 167962306a36Sopenharmony_ci * Needed to avoid possible looping condition 168062306a36Sopenharmony_ci * in cache_grow_begin() 168162306a36Sopenharmony_ci */ 168262306a36Sopenharmony_ci if (OFF_SLAB(freelist_cache)) 168362306a36Sopenharmony_ci continue; 168462306a36Sopenharmony_ci } 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_ci /* check if off slab has enough benefit */ 168762306a36Sopenharmony_ci if (freelist_cache_size > cachep->size / 2) 168862306a36Sopenharmony_ci continue; 168962306a36Sopenharmony_ci } 169062306a36Sopenharmony_ci 169162306a36Sopenharmony_ci /* Found something acceptable - save it away */ 169262306a36Sopenharmony_ci cachep->num = num; 169362306a36Sopenharmony_ci cachep->gfporder = gfporder; 169462306a36Sopenharmony_ci left_over = remainder; 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ci /* 169762306a36Sopenharmony_ci * A VFS-reclaimable slab tends to have most allocations 169862306a36Sopenharmony_ci * as GFP_NOFS and we really don't want to have to be allocating 169962306a36Sopenharmony_ci * higher-order pages when we are unable to shrink dcache. 170062306a36Sopenharmony_ci */ 170162306a36Sopenharmony_ci if (flags & SLAB_RECLAIM_ACCOUNT) 170262306a36Sopenharmony_ci break; 170362306a36Sopenharmony_ci 170462306a36Sopenharmony_ci /* 170562306a36Sopenharmony_ci * Large number of objects is good, but very large slabs are 170662306a36Sopenharmony_ci * currently bad for the gfp()s. 170762306a36Sopenharmony_ci */ 170862306a36Sopenharmony_ci if (gfporder >= slab_max_order) 170962306a36Sopenharmony_ci break; 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci /* 171262306a36Sopenharmony_ci * Acceptable internal fragmentation? 171362306a36Sopenharmony_ci */ 171462306a36Sopenharmony_ci if (left_over * 8 <= (PAGE_SIZE << gfporder)) 171562306a36Sopenharmony_ci break; 171662306a36Sopenharmony_ci } 171762306a36Sopenharmony_ci return left_over; 171862306a36Sopenharmony_ci} 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_cistatic struct array_cache __percpu *alloc_kmem_cache_cpus( 172162306a36Sopenharmony_ci struct kmem_cache *cachep, int entries, int batchcount) 172262306a36Sopenharmony_ci{ 172362306a36Sopenharmony_ci int cpu; 172462306a36Sopenharmony_ci size_t size; 172562306a36Sopenharmony_ci struct array_cache __percpu *cpu_cache; 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci size = sizeof(void *) * entries + sizeof(struct array_cache); 172862306a36Sopenharmony_ci cpu_cache = __alloc_percpu(size, sizeof(void *)); 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_ci if (!cpu_cache) 173162306a36Sopenharmony_ci return NULL; 173262306a36Sopenharmony_ci 173362306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 173462306a36Sopenharmony_ci init_arraycache(per_cpu_ptr(cpu_cache, cpu), 173562306a36Sopenharmony_ci entries, batchcount); 173662306a36Sopenharmony_ci } 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci return cpu_cache; 173962306a36Sopenharmony_ci} 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_cistatic int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) 174262306a36Sopenharmony_ci{ 174362306a36Sopenharmony_ci if (slab_state >= FULL) 174462306a36Sopenharmony_ci return enable_cpucache(cachep, gfp); 174562306a36Sopenharmony_ci 174662306a36Sopenharmony_ci cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1); 174762306a36Sopenharmony_ci if (!cachep->cpu_cache) 174862306a36Sopenharmony_ci return 1; 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_ci if (slab_state == DOWN) { 175162306a36Sopenharmony_ci /* Creation of first cache (kmem_cache). */ 175262306a36Sopenharmony_ci set_up_node(kmem_cache, CACHE_CACHE); 175362306a36Sopenharmony_ci } else if (slab_state == PARTIAL) { 175462306a36Sopenharmony_ci /* For kmem_cache_node */ 175562306a36Sopenharmony_ci set_up_node(cachep, SIZE_NODE); 175662306a36Sopenharmony_ci } else { 175762306a36Sopenharmony_ci int node; 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci for_each_online_node(node) { 176062306a36Sopenharmony_ci cachep->node[node] = kmalloc_node( 176162306a36Sopenharmony_ci sizeof(struct kmem_cache_node), gfp, node); 176262306a36Sopenharmony_ci BUG_ON(!cachep->node[node]); 176362306a36Sopenharmony_ci kmem_cache_node_init(cachep->node[node]); 176462306a36Sopenharmony_ci } 176562306a36Sopenharmony_ci } 176662306a36Sopenharmony_ci 176762306a36Sopenharmony_ci cachep->node[numa_mem_id()]->next_reap = 176862306a36Sopenharmony_ci jiffies + REAPTIMEOUT_NODE + 176962306a36Sopenharmony_ci ((unsigned long)cachep) % REAPTIMEOUT_NODE; 177062306a36Sopenharmony_ci 177162306a36Sopenharmony_ci cpu_cache_get(cachep)->avail = 0; 177262306a36Sopenharmony_ci cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; 177362306a36Sopenharmony_ci cpu_cache_get(cachep)->batchcount = 1; 177462306a36Sopenharmony_ci cpu_cache_get(cachep)->touched = 0; 177562306a36Sopenharmony_ci cachep->batchcount = 1; 177662306a36Sopenharmony_ci cachep->limit = BOOT_CPUCACHE_ENTRIES; 177762306a36Sopenharmony_ci return 0; 177862306a36Sopenharmony_ci} 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_cislab_flags_t kmem_cache_flags(unsigned int object_size, 178162306a36Sopenharmony_ci slab_flags_t flags, const char *name) 178262306a36Sopenharmony_ci{ 178362306a36Sopenharmony_ci return flags; 178462306a36Sopenharmony_ci} 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_cistruct kmem_cache * 178762306a36Sopenharmony_ci__kmem_cache_alias(const char *name, unsigned int size, unsigned int align, 178862306a36Sopenharmony_ci slab_flags_t flags, void (*ctor)(void *)) 178962306a36Sopenharmony_ci{ 179062306a36Sopenharmony_ci struct kmem_cache *cachep; 179162306a36Sopenharmony_ci 179262306a36Sopenharmony_ci cachep = find_mergeable(size, align, flags, name, ctor); 179362306a36Sopenharmony_ci if (cachep) { 179462306a36Sopenharmony_ci cachep->refcount++; 179562306a36Sopenharmony_ci 179662306a36Sopenharmony_ci /* 179762306a36Sopenharmony_ci * Adjust the object sizes so that we clear 179862306a36Sopenharmony_ci * the complete object on kzalloc. 179962306a36Sopenharmony_ci */ 180062306a36Sopenharmony_ci cachep->object_size = max_t(int, cachep->object_size, size); 180162306a36Sopenharmony_ci } 180262306a36Sopenharmony_ci return cachep; 180362306a36Sopenharmony_ci} 180462306a36Sopenharmony_ci 180562306a36Sopenharmony_cistatic bool set_objfreelist_slab_cache(struct kmem_cache *cachep, 180662306a36Sopenharmony_ci size_t size, slab_flags_t flags) 180762306a36Sopenharmony_ci{ 180862306a36Sopenharmony_ci size_t left; 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci cachep->num = 0; 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci /* 181362306a36Sopenharmony_ci * If slab auto-initialization on free is enabled, store the freelist 181462306a36Sopenharmony_ci * off-slab, so that its contents don't end up in one of the allocated 181562306a36Sopenharmony_ci * objects. 181662306a36Sopenharmony_ci */ 181762306a36Sopenharmony_ci if (unlikely(slab_want_init_on_free(cachep))) 181862306a36Sopenharmony_ci return false; 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU) 182162306a36Sopenharmony_ci return false; 182262306a36Sopenharmony_ci 182362306a36Sopenharmony_ci left = calculate_slab_order(cachep, size, 182462306a36Sopenharmony_ci flags | CFLGS_OBJFREELIST_SLAB); 182562306a36Sopenharmony_ci if (!cachep->num) 182662306a36Sopenharmony_ci return false; 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size) 182962306a36Sopenharmony_ci return false; 183062306a36Sopenharmony_ci 183162306a36Sopenharmony_ci cachep->colour = left / cachep->colour_off; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci return true; 183462306a36Sopenharmony_ci} 183562306a36Sopenharmony_ci 183662306a36Sopenharmony_cistatic bool set_off_slab_cache(struct kmem_cache *cachep, 183762306a36Sopenharmony_ci size_t size, slab_flags_t flags) 183862306a36Sopenharmony_ci{ 183962306a36Sopenharmony_ci size_t left; 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci cachep->num = 0; 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_ci /* 184462306a36Sopenharmony_ci * Always use on-slab management when SLAB_NOLEAKTRACE 184562306a36Sopenharmony_ci * to avoid recursive calls into kmemleak. 184662306a36Sopenharmony_ci */ 184762306a36Sopenharmony_ci if (flags & SLAB_NOLEAKTRACE) 184862306a36Sopenharmony_ci return false; 184962306a36Sopenharmony_ci 185062306a36Sopenharmony_ci /* 185162306a36Sopenharmony_ci * Size is large, assume best to place the slab management obj 185262306a36Sopenharmony_ci * off-slab (should allow better packing of objs). 185362306a36Sopenharmony_ci */ 185462306a36Sopenharmony_ci left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB); 185562306a36Sopenharmony_ci if (!cachep->num) 185662306a36Sopenharmony_ci return false; 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_ci /* 185962306a36Sopenharmony_ci * If the slab has been placed off-slab, and we have enough space then 186062306a36Sopenharmony_ci * move it on-slab. This is at the expense of any extra colouring. 186162306a36Sopenharmony_ci */ 186262306a36Sopenharmony_ci if (left >= cachep->num * sizeof(freelist_idx_t)) 186362306a36Sopenharmony_ci return false; 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci cachep->colour = left / cachep->colour_off; 186662306a36Sopenharmony_ci 186762306a36Sopenharmony_ci return true; 186862306a36Sopenharmony_ci} 186962306a36Sopenharmony_ci 187062306a36Sopenharmony_cistatic bool set_on_slab_cache(struct kmem_cache *cachep, 187162306a36Sopenharmony_ci size_t size, slab_flags_t flags) 187262306a36Sopenharmony_ci{ 187362306a36Sopenharmony_ci size_t left; 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci cachep->num = 0; 187662306a36Sopenharmony_ci 187762306a36Sopenharmony_ci left = calculate_slab_order(cachep, size, flags); 187862306a36Sopenharmony_ci if (!cachep->num) 187962306a36Sopenharmony_ci return false; 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ci cachep->colour = left / cachep->colour_off; 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci return true; 188462306a36Sopenharmony_ci} 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci/* 188762306a36Sopenharmony_ci * __kmem_cache_create - Create a cache. 188862306a36Sopenharmony_ci * @cachep: cache management descriptor 188962306a36Sopenharmony_ci * @flags: SLAB flags 189062306a36Sopenharmony_ci * 189162306a36Sopenharmony_ci * Returns zero on success, nonzero on failure. 189262306a36Sopenharmony_ci * 189362306a36Sopenharmony_ci * The flags are 189462306a36Sopenharmony_ci * 189562306a36Sopenharmony_ci * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 189662306a36Sopenharmony_ci * to catch references to uninitialised memory. 189762306a36Sopenharmony_ci * 189862306a36Sopenharmony_ci * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 189962306a36Sopenharmony_ci * for buffer overruns. 190062306a36Sopenharmony_ci * 190162306a36Sopenharmony_ci * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 190262306a36Sopenharmony_ci * cacheline. This can be beneficial if you're counting cycles as closely 190362306a36Sopenharmony_ci * as davem. 190462306a36Sopenharmony_ci */ 190562306a36Sopenharmony_ciint __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags) 190662306a36Sopenharmony_ci{ 190762306a36Sopenharmony_ci size_t ralign = BYTES_PER_WORD; 190862306a36Sopenharmony_ci gfp_t gfp; 190962306a36Sopenharmony_ci int err; 191062306a36Sopenharmony_ci unsigned int size = cachep->size; 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci#if DEBUG 191362306a36Sopenharmony_ci#if FORCED_DEBUG 191462306a36Sopenharmony_ci /* 191562306a36Sopenharmony_ci * Enable redzoning and last user accounting, except for caches with 191662306a36Sopenharmony_ci * large objects, if the increased size would increase the object size 191762306a36Sopenharmony_ci * above the next power of two: caches with object sizes just above a 191862306a36Sopenharmony_ci * power of two have a significant amount of internal fragmentation. 191962306a36Sopenharmony_ci */ 192062306a36Sopenharmony_ci if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN + 192162306a36Sopenharmony_ci 2 * sizeof(unsigned long long))) 192262306a36Sopenharmony_ci flags |= SLAB_RED_ZONE | SLAB_STORE_USER; 192362306a36Sopenharmony_ci if (!(flags & SLAB_TYPESAFE_BY_RCU)) 192462306a36Sopenharmony_ci flags |= SLAB_POISON; 192562306a36Sopenharmony_ci#endif 192662306a36Sopenharmony_ci#endif 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_ci /* 192962306a36Sopenharmony_ci * Check that size is in terms of words. This is needed to avoid 193062306a36Sopenharmony_ci * unaligned accesses for some archs when redzoning is used, and makes 193162306a36Sopenharmony_ci * sure any on-slab bufctl's are also correctly aligned. 193262306a36Sopenharmony_ci */ 193362306a36Sopenharmony_ci size = ALIGN(size, BYTES_PER_WORD); 193462306a36Sopenharmony_ci 193562306a36Sopenharmony_ci if (flags & SLAB_RED_ZONE) { 193662306a36Sopenharmony_ci ralign = REDZONE_ALIGN; 193762306a36Sopenharmony_ci /* If redzoning, ensure that the second redzone is suitably 193862306a36Sopenharmony_ci * aligned, by adjusting the object size accordingly. */ 193962306a36Sopenharmony_ci size = ALIGN(size, REDZONE_ALIGN); 194062306a36Sopenharmony_ci } 194162306a36Sopenharmony_ci 194262306a36Sopenharmony_ci /* 3) caller mandated alignment */ 194362306a36Sopenharmony_ci if (ralign < cachep->align) { 194462306a36Sopenharmony_ci ralign = cachep->align; 194562306a36Sopenharmony_ci } 194662306a36Sopenharmony_ci /* disable debug if necessary */ 194762306a36Sopenharmony_ci if (ralign > __alignof__(unsigned long long)) 194862306a36Sopenharmony_ci flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 194962306a36Sopenharmony_ci /* 195062306a36Sopenharmony_ci * 4) Store it. 195162306a36Sopenharmony_ci */ 195262306a36Sopenharmony_ci cachep->align = ralign; 195362306a36Sopenharmony_ci cachep->colour_off = cache_line_size(); 195462306a36Sopenharmony_ci /* Offset must be a multiple of the alignment. */ 195562306a36Sopenharmony_ci if (cachep->colour_off < cachep->align) 195662306a36Sopenharmony_ci cachep->colour_off = cachep->align; 195762306a36Sopenharmony_ci 195862306a36Sopenharmony_ci if (slab_is_available()) 195962306a36Sopenharmony_ci gfp = GFP_KERNEL; 196062306a36Sopenharmony_ci else 196162306a36Sopenharmony_ci gfp = GFP_NOWAIT; 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci#if DEBUG 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_ci /* 196662306a36Sopenharmony_ci * Both debugging options require word-alignment which is calculated 196762306a36Sopenharmony_ci * into align above. 196862306a36Sopenharmony_ci */ 196962306a36Sopenharmony_ci if (flags & SLAB_RED_ZONE) { 197062306a36Sopenharmony_ci /* add space for red zone words */ 197162306a36Sopenharmony_ci cachep->obj_offset += sizeof(unsigned long long); 197262306a36Sopenharmony_ci size += 2 * sizeof(unsigned long long); 197362306a36Sopenharmony_ci } 197462306a36Sopenharmony_ci if (flags & SLAB_STORE_USER) { 197562306a36Sopenharmony_ci /* user store requires one word storage behind the end of 197662306a36Sopenharmony_ci * the real object. But if the second red zone needs to be 197762306a36Sopenharmony_ci * aligned to 64 bits, we must allow that much space. 197862306a36Sopenharmony_ci */ 197962306a36Sopenharmony_ci if (flags & SLAB_RED_ZONE) 198062306a36Sopenharmony_ci size += REDZONE_ALIGN; 198162306a36Sopenharmony_ci else 198262306a36Sopenharmony_ci size += BYTES_PER_WORD; 198362306a36Sopenharmony_ci } 198462306a36Sopenharmony_ci#endif 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci kasan_cache_create(cachep, &size, &flags); 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_ci size = ALIGN(size, cachep->align); 198962306a36Sopenharmony_ci /* 199062306a36Sopenharmony_ci * We should restrict the number of objects in a slab to implement 199162306a36Sopenharmony_ci * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition. 199262306a36Sopenharmony_ci */ 199362306a36Sopenharmony_ci if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE) 199462306a36Sopenharmony_ci size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align); 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci#if DEBUG 199762306a36Sopenharmony_ci /* 199862306a36Sopenharmony_ci * To activate debug pagealloc, off-slab management is necessary 199962306a36Sopenharmony_ci * requirement. In early phase of initialization, small sized slab 200062306a36Sopenharmony_ci * doesn't get initialized so it would not be possible. So, we need 200162306a36Sopenharmony_ci * to check size >= 256. It guarantees that all necessary small 200262306a36Sopenharmony_ci * sized slab is initialized in current slab initialization sequence. 200362306a36Sopenharmony_ci */ 200462306a36Sopenharmony_ci if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) && 200562306a36Sopenharmony_ci size >= 256 && cachep->object_size > cache_line_size()) { 200662306a36Sopenharmony_ci if (size < PAGE_SIZE || size % PAGE_SIZE == 0) { 200762306a36Sopenharmony_ci size_t tmp_size = ALIGN(size, PAGE_SIZE); 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci if (set_off_slab_cache(cachep, tmp_size, flags)) { 201062306a36Sopenharmony_ci flags |= CFLGS_OFF_SLAB; 201162306a36Sopenharmony_ci cachep->obj_offset += tmp_size - size; 201262306a36Sopenharmony_ci size = tmp_size; 201362306a36Sopenharmony_ci goto done; 201462306a36Sopenharmony_ci } 201562306a36Sopenharmony_ci } 201662306a36Sopenharmony_ci } 201762306a36Sopenharmony_ci#endif 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_ci if (set_objfreelist_slab_cache(cachep, size, flags)) { 202062306a36Sopenharmony_ci flags |= CFLGS_OBJFREELIST_SLAB; 202162306a36Sopenharmony_ci goto done; 202262306a36Sopenharmony_ci } 202362306a36Sopenharmony_ci 202462306a36Sopenharmony_ci if (set_off_slab_cache(cachep, size, flags)) { 202562306a36Sopenharmony_ci flags |= CFLGS_OFF_SLAB; 202662306a36Sopenharmony_ci goto done; 202762306a36Sopenharmony_ci } 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci if (set_on_slab_cache(cachep, size, flags)) 203062306a36Sopenharmony_ci goto done; 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci return -E2BIG; 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_cidone: 203562306a36Sopenharmony_ci cachep->freelist_size = cachep->num * sizeof(freelist_idx_t); 203662306a36Sopenharmony_ci cachep->flags = flags; 203762306a36Sopenharmony_ci cachep->allocflags = __GFP_COMP; 203862306a36Sopenharmony_ci if (flags & SLAB_CACHE_DMA) 203962306a36Sopenharmony_ci cachep->allocflags |= GFP_DMA; 204062306a36Sopenharmony_ci if (flags & SLAB_CACHE_DMA32) 204162306a36Sopenharmony_ci cachep->allocflags |= GFP_DMA32; 204262306a36Sopenharmony_ci if (flags & SLAB_RECLAIM_ACCOUNT) 204362306a36Sopenharmony_ci cachep->allocflags |= __GFP_RECLAIMABLE; 204462306a36Sopenharmony_ci cachep->size = size; 204562306a36Sopenharmony_ci cachep->reciprocal_buffer_size = reciprocal_value(size); 204662306a36Sopenharmony_ci 204762306a36Sopenharmony_ci#if DEBUG 204862306a36Sopenharmony_ci /* 204962306a36Sopenharmony_ci * If we're going to use the generic kernel_map_pages() 205062306a36Sopenharmony_ci * poisoning, then it's going to smash the contents of 205162306a36Sopenharmony_ci * the redzone and userword anyhow, so switch them off. 205262306a36Sopenharmony_ci */ 205362306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_PAGE_POISONING) && 205462306a36Sopenharmony_ci (cachep->flags & SLAB_POISON) && 205562306a36Sopenharmony_ci is_debug_pagealloc_cache(cachep)) 205662306a36Sopenharmony_ci cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 205762306a36Sopenharmony_ci#endif 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_ci err = setup_cpu_cache(cachep, gfp); 206062306a36Sopenharmony_ci if (err) { 206162306a36Sopenharmony_ci __kmem_cache_release(cachep); 206262306a36Sopenharmony_ci return err; 206362306a36Sopenharmony_ci } 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_ci return 0; 206662306a36Sopenharmony_ci} 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci#if DEBUG 206962306a36Sopenharmony_cistatic void check_irq_off(void) 207062306a36Sopenharmony_ci{ 207162306a36Sopenharmony_ci BUG_ON(!irqs_disabled()); 207262306a36Sopenharmony_ci} 207362306a36Sopenharmony_ci 207462306a36Sopenharmony_cistatic void check_irq_on(void) 207562306a36Sopenharmony_ci{ 207662306a36Sopenharmony_ci BUG_ON(irqs_disabled()); 207762306a36Sopenharmony_ci} 207862306a36Sopenharmony_ci 207962306a36Sopenharmony_cistatic void check_mutex_acquired(void) 208062306a36Sopenharmony_ci{ 208162306a36Sopenharmony_ci BUG_ON(!mutex_is_locked(&slab_mutex)); 208262306a36Sopenharmony_ci} 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_cistatic void check_spinlock_acquired(struct kmem_cache *cachep) 208562306a36Sopenharmony_ci{ 208662306a36Sopenharmony_ci#ifdef CONFIG_SMP 208762306a36Sopenharmony_ci check_irq_off(); 208862306a36Sopenharmony_ci assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); 208962306a36Sopenharmony_ci#endif 209062306a36Sopenharmony_ci} 209162306a36Sopenharmony_ci 209262306a36Sopenharmony_cistatic void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) 209362306a36Sopenharmony_ci{ 209462306a36Sopenharmony_ci#ifdef CONFIG_SMP 209562306a36Sopenharmony_ci check_irq_off(); 209662306a36Sopenharmony_ci assert_raw_spin_locked(&get_node(cachep, node)->list_lock); 209762306a36Sopenharmony_ci#endif 209862306a36Sopenharmony_ci} 209962306a36Sopenharmony_ci 210062306a36Sopenharmony_ci#else 210162306a36Sopenharmony_ci#define check_irq_off() do { } while(0) 210262306a36Sopenharmony_ci#define check_irq_on() do { } while(0) 210362306a36Sopenharmony_ci#define check_mutex_acquired() do { } while(0) 210462306a36Sopenharmony_ci#define check_spinlock_acquired(x) do { } while(0) 210562306a36Sopenharmony_ci#define check_spinlock_acquired_node(x, y) do { } while(0) 210662306a36Sopenharmony_ci#endif 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_cistatic void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, 210962306a36Sopenharmony_ci int node, bool free_all, struct list_head *list) 211062306a36Sopenharmony_ci{ 211162306a36Sopenharmony_ci int tofree; 211262306a36Sopenharmony_ci 211362306a36Sopenharmony_ci if (!ac || !ac->avail) 211462306a36Sopenharmony_ci return; 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_ci tofree = free_all ? ac->avail : (ac->limit + 4) / 5; 211762306a36Sopenharmony_ci if (tofree > ac->avail) 211862306a36Sopenharmony_ci tofree = (ac->avail + 1) / 2; 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci free_block(cachep, ac->entry, tofree, node, list); 212162306a36Sopenharmony_ci ac->avail -= tofree; 212262306a36Sopenharmony_ci memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); 212362306a36Sopenharmony_ci} 212462306a36Sopenharmony_ci 212562306a36Sopenharmony_cistatic void do_drain(void *arg) 212662306a36Sopenharmony_ci{ 212762306a36Sopenharmony_ci struct kmem_cache *cachep = arg; 212862306a36Sopenharmony_ci struct array_cache *ac; 212962306a36Sopenharmony_ci int node = numa_mem_id(); 213062306a36Sopenharmony_ci struct kmem_cache_node *n; 213162306a36Sopenharmony_ci LIST_HEAD(list); 213262306a36Sopenharmony_ci 213362306a36Sopenharmony_ci check_irq_off(); 213462306a36Sopenharmony_ci ac = cpu_cache_get(cachep); 213562306a36Sopenharmony_ci n = get_node(cachep, node); 213662306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 213762306a36Sopenharmony_ci free_block(cachep, ac->entry, ac->avail, node, &list); 213862306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 213962306a36Sopenharmony_ci ac->avail = 0; 214062306a36Sopenharmony_ci slabs_destroy(cachep, &list); 214162306a36Sopenharmony_ci} 214262306a36Sopenharmony_ci 214362306a36Sopenharmony_cistatic void drain_cpu_caches(struct kmem_cache *cachep) 214462306a36Sopenharmony_ci{ 214562306a36Sopenharmony_ci struct kmem_cache_node *n; 214662306a36Sopenharmony_ci int node; 214762306a36Sopenharmony_ci LIST_HEAD(list); 214862306a36Sopenharmony_ci 214962306a36Sopenharmony_ci on_each_cpu(do_drain, cachep, 1); 215062306a36Sopenharmony_ci check_irq_on(); 215162306a36Sopenharmony_ci for_each_kmem_cache_node(cachep, node, n) 215262306a36Sopenharmony_ci if (n->alien) 215362306a36Sopenharmony_ci drain_alien_cache(cachep, n->alien); 215462306a36Sopenharmony_ci 215562306a36Sopenharmony_ci for_each_kmem_cache_node(cachep, node, n) { 215662306a36Sopenharmony_ci raw_spin_lock_irq(&n->list_lock); 215762306a36Sopenharmony_ci drain_array_locked(cachep, n->shared, node, true, &list); 215862306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 215962306a36Sopenharmony_ci 216062306a36Sopenharmony_ci slabs_destroy(cachep, &list); 216162306a36Sopenharmony_ci } 216262306a36Sopenharmony_ci} 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci/* 216562306a36Sopenharmony_ci * Remove slabs from the list of free slabs. 216662306a36Sopenharmony_ci * Specify the number of slabs to drain in tofree. 216762306a36Sopenharmony_ci * 216862306a36Sopenharmony_ci * Returns the actual number of slabs released. 216962306a36Sopenharmony_ci */ 217062306a36Sopenharmony_cistatic int drain_freelist(struct kmem_cache *cache, 217162306a36Sopenharmony_ci struct kmem_cache_node *n, int tofree) 217262306a36Sopenharmony_ci{ 217362306a36Sopenharmony_ci struct list_head *p; 217462306a36Sopenharmony_ci int nr_freed; 217562306a36Sopenharmony_ci struct slab *slab; 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci nr_freed = 0; 217862306a36Sopenharmony_ci while (nr_freed < tofree && !list_empty(&n->slabs_free)) { 217962306a36Sopenharmony_ci 218062306a36Sopenharmony_ci raw_spin_lock_irq(&n->list_lock); 218162306a36Sopenharmony_ci p = n->slabs_free.prev; 218262306a36Sopenharmony_ci if (p == &n->slabs_free) { 218362306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 218462306a36Sopenharmony_ci goto out; 218562306a36Sopenharmony_ci } 218662306a36Sopenharmony_ci 218762306a36Sopenharmony_ci slab = list_entry(p, struct slab, slab_list); 218862306a36Sopenharmony_ci list_del(&slab->slab_list); 218962306a36Sopenharmony_ci n->free_slabs--; 219062306a36Sopenharmony_ci n->total_slabs--; 219162306a36Sopenharmony_ci /* 219262306a36Sopenharmony_ci * Safe to drop the lock. The slab is no longer linked 219362306a36Sopenharmony_ci * to the cache. 219462306a36Sopenharmony_ci */ 219562306a36Sopenharmony_ci n->free_objects -= cache->num; 219662306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 219762306a36Sopenharmony_ci slab_destroy(cache, slab); 219862306a36Sopenharmony_ci nr_freed++; 219962306a36Sopenharmony_ci 220062306a36Sopenharmony_ci cond_resched(); 220162306a36Sopenharmony_ci } 220262306a36Sopenharmony_ciout: 220362306a36Sopenharmony_ci return nr_freed; 220462306a36Sopenharmony_ci} 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_cibool __kmem_cache_empty(struct kmem_cache *s) 220762306a36Sopenharmony_ci{ 220862306a36Sopenharmony_ci int node; 220962306a36Sopenharmony_ci struct kmem_cache_node *n; 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci for_each_kmem_cache_node(s, node, n) 221262306a36Sopenharmony_ci if (!list_empty(&n->slabs_full) || 221362306a36Sopenharmony_ci !list_empty(&n->slabs_partial)) 221462306a36Sopenharmony_ci return false; 221562306a36Sopenharmony_ci return true; 221662306a36Sopenharmony_ci} 221762306a36Sopenharmony_ci 221862306a36Sopenharmony_ciint __kmem_cache_shrink(struct kmem_cache *cachep) 221962306a36Sopenharmony_ci{ 222062306a36Sopenharmony_ci int ret = 0; 222162306a36Sopenharmony_ci int node; 222262306a36Sopenharmony_ci struct kmem_cache_node *n; 222362306a36Sopenharmony_ci 222462306a36Sopenharmony_ci drain_cpu_caches(cachep); 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci check_irq_on(); 222762306a36Sopenharmony_ci for_each_kmem_cache_node(cachep, node, n) { 222862306a36Sopenharmony_ci drain_freelist(cachep, n, INT_MAX); 222962306a36Sopenharmony_ci 223062306a36Sopenharmony_ci ret += !list_empty(&n->slabs_full) || 223162306a36Sopenharmony_ci !list_empty(&n->slabs_partial); 223262306a36Sopenharmony_ci } 223362306a36Sopenharmony_ci return (ret ? 1 : 0); 223462306a36Sopenharmony_ci} 223562306a36Sopenharmony_ci 223662306a36Sopenharmony_ciint __kmem_cache_shutdown(struct kmem_cache *cachep) 223762306a36Sopenharmony_ci{ 223862306a36Sopenharmony_ci return __kmem_cache_shrink(cachep); 223962306a36Sopenharmony_ci} 224062306a36Sopenharmony_ci 224162306a36Sopenharmony_civoid __kmem_cache_release(struct kmem_cache *cachep) 224262306a36Sopenharmony_ci{ 224362306a36Sopenharmony_ci int i; 224462306a36Sopenharmony_ci struct kmem_cache_node *n; 224562306a36Sopenharmony_ci 224662306a36Sopenharmony_ci cache_random_seq_destroy(cachep); 224762306a36Sopenharmony_ci 224862306a36Sopenharmony_ci free_percpu(cachep->cpu_cache); 224962306a36Sopenharmony_ci 225062306a36Sopenharmony_ci /* NUMA: free the node structures */ 225162306a36Sopenharmony_ci for_each_kmem_cache_node(cachep, i, n) { 225262306a36Sopenharmony_ci kfree(n->shared); 225362306a36Sopenharmony_ci free_alien_cache(n->alien); 225462306a36Sopenharmony_ci kfree(n); 225562306a36Sopenharmony_ci cachep->node[i] = NULL; 225662306a36Sopenharmony_ci } 225762306a36Sopenharmony_ci} 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_ci/* 226062306a36Sopenharmony_ci * Get the memory for a slab management obj. 226162306a36Sopenharmony_ci * 226262306a36Sopenharmony_ci * For a slab cache when the slab descriptor is off-slab, the 226362306a36Sopenharmony_ci * slab descriptor can't come from the same cache which is being created, 226462306a36Sopenharmony_ci * Because if it is the case, that means we defer the creation of 226562306a36Sopenharmony_ci * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point. 226662306a36Sopenharmony_ci * And we eventually call down to __kmem_cache_create(), which 226762306a36Sopenharmony_ci * in turn looks up in the kmalloc_{dma,}_caches for the desired-size one. 226862306a36Sopenharmony_ci * This is a "chicken-and-egg" problem. 226962306a36Sopenharmony_ci * 227062306a36Sopenharmony_ci * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches, 227162306a36Sopenharmony_ci * which are all initialized during kmem_cache_init(). 227262306a36Sopenharmony_ci */ 227362306a36Sopenharmony_cistatic void *alloc_slabmgmt(struct kmem_cache *cachep, 227462306a36Sopenharmony_ci struct slab *slab, int colour_off, 227562306a36Sopenharmony_ci gfp_t local_flags, int nodeid) 227662306a36Sopenharmony_ci{ 227762306a36Sopenharmony_ci void *freelist; 227862306a36Sopenharmony_ci void *addr = slab_address(slab); 227962306a36Sopenharmony_ci 228062306a36Sopenharmony_ci slab->s_mem = addr + colour_off; 228162306a36Sopenharmony_ci slab->active = 0; 228262306a36Sopenharmony_ci 228362306a36Sopenharmony_ci if (OBJFREELIST_SLAB(cachep)) 228462306a36Sopenharmony_ci freelist = NULL; 228562306a36Sopenharmony_ci else if (OFF_SLAB(cachep)) { 228662306a36Sopenharmony_ci /* Slab management obj is off-slab. */ 228762306a36Sopenharmony_ci freelist = kmalloc_node(cachep->freelist_size, 228862306a36Sopenharmony_ci local_flags, nodeid); 228962306a36Sopenharmony_ci } else { 229062306a36Sopenharmony_ci /* We will use last bytes at the slab for freelist */ 229162306a36Sopenharmony_ci freelist = addr + (PAGE_SIZE << cachep->gfporder) - 229262306a36Sopenharmony_ci cachep->freelist_size; 229362306a36Sopenharmony_ci } 229462306a36Sopenharmony_ci 229562306a36Sopenharmony_ci return freelist; 229662306a36Sopenharmony_ci} 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_cistatic inline freelist_idx_t get_free_obj(struct slab *slab, unsigned int idx) 229962306a36Sopenharmony_ci{ 230062306a36Sopenharmony_ci return ((freelist_idx_t *) slab->freelist)[idx]; 230162306a36Sopenharmony_ci} 230262306a36Sopenharmony_ci 230362306a36Sopenharmony_cistatic inline void set_free_obj(struct slab *slab, 230462306a36Sopenharmony_ci unsigned int idx, freelist_idx_t val) 230562306a36Sopenharmony_ci{ 230662306a36Sopenharmony_ci ((freelist_idx_t *)(slab->freelist))[idx] = val; 230762306a36Sopenharmony_ci} 230862306a36Sopenharmony_ci 230962306a36Sopenharmony_cistatic void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab) 231062306a36Sopenharmony_ci{ 231162306a36Sopenharmony_ci#if DEBUG 231262306a36Sopenharmony_ci int i; 231362306a36Sopenharmony_ci 231462306a36Sopenharmony_ci for (i = 0; i < cachep->num; i++) { 231562306a36Sopenharmony_ci void *objp = index_to_obj(cachep, slab, i); 231662306a36Sopenharmony_ci 231762306a36Sopenharmony_ci if (cachep->flags & SLAB_STORE_USER) 231862306a36Sopenharmony_ci *dbg_userword(cachep, objp) = NULL; 231962306a36Sopenharmony_ci 232062306a36Sopenharmony_ci if (cachep->flags & SLAB_RED_ZONE) { 232162306a36Sopenharmony_ci *dbg_redzone1(cachep, objp) = RED_INACTIVE; 232262306a36Sopenharmony_ci *dbg_redzone2(cachep, objp) = RED_INACTIVE; 232362306a36Sopenharmony_ci } 232462306a36Sopenharmony_ci /* 232562306a36Sopenharmony_ci * Constructors are not allowed to allocate memory from the same 232662306a36Sopenharmony_ci * cache which they are a constructor for. Otherwise, deadlock. 232762306a36Sopenharmony_ci * They must also be threaded. 232862306a36Sopenharmony_ci */ 232962306a36Sopenharmony_ci if (cachep->ctor && !(cachep->flags & SLAB_POISON)) { 233062306a36Sopenharmony_ci kasan_unpoison_object_data(cachep, 233162306a36Sopenharmony_ci objp + obj_offset(cachep)); 233262306a36Sopenharmony_ci cachep->ctor(objp + obj_offset(cachep)); 233362306a36Sopenharmony_ci kasan_poison_object_data( 233462306a36Sopenharmony_ci cachep, objp + obj_offset(cachep)); 233562306a36Sopenharmony_ci } 233662306a36Sopenharmony_ci 233762306a36Sopenharmony_ci if (cachep->flags & SLAB_RED_ZONE) { 233862306a36Sopenharmony_ci if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 233962306a36Sopenharmony_ci slab_error(cachep, "constructor overwrote the end of an object"); 234062306a36Sopenharmony_ci if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 234162306a36Sopenharmony_ci slab_error(cachep, "constructor overwrote the start of an object"); 234262306a36Sopenharmony_ci } 234362306a36Sopenharmony_ci /* need to poison the objs? */ 234462306a36Sopenharmony_ci if (cachep->flags & SLAB_POISON) { 234562306a36Sopenharmony_ci poison_obj(cachep, objp, POISON_FREE); 234662306a36Sopenharmony_ci slab_kernel_map(cachep, objp, 0); 234762306a36Sopenharmony_ci } 234862306a36Sopenharmony_ci } 234962306a36Sopenharmony_ci#endif 235062306a36Sopenharmony_ci} 235162306a36Sopenharmony_ci 235262306a36Sopenharmony_ci#ifdef CONFIG_SLAB_FREELIST_RANDOM 235362306a36Sopenharmony_ci/* Hold information during a freelist initialization */ 235462306a36Sopenharmony_cistruct freelist_init_state { 235562306a36Sopenharmony_ci unsigned int pos; 235662306a36Sopenharmony_ci unsigned int *list; 235762306a36Sopenharmony_ci unsigned int count; 235862306a36Sopenharmony_ci}; 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_ci/* 236162306a36Sopenharmony_ci * Initialize the state based on the randomization method available. 236262306a36Sopenharmony_ci * return true if the pre-computed list is available, false otherwise. 236362306a36Sopenharmony_ci */ 236462306a36Sopenharmony_cistatic bool freelist_state_initialize(struct freelist_init_state *state, 236562306a36Sopenharmony_ci struct kmem_cache *cachep, 236662306a36Sopenharmony_ci unsigned int count) 236762306a36Sopenharmony_ci{ 236862306a36Sopenharmony_ci bool ret; 236962306a36Sopenharmony_ci if (!cachep->random_seq) { 237062306a36Sopenharmony_ci ret = false; 237162306a36Sopenharmony_ci } else { 237262306a36Sopenharmony_ci state->list = cachep->random_seq; 237362306a36Sopenharmony_ci state->count = count; 237462306a36Sopenharmony_ci state->pos = get_random_u32_below(count); 237562306a36Sopenharmony_ci ret = true; 237662306a36Sopenharmony_ci } 237762306a36Sopenharmony_ci return ret; 237862306a36Sopenharmony_ci} 237962306a36Sopenharmony_ci 238062306a36Sopenharmony_ci/* Get the next entry on the list and randomize it using a random shift */ 238162306a36Sopenharmony_cistatic freelist_idx_t next_random_slot(struct freelist_init_state *state) 238262306a36Sopenharmony_ci{ 238362306a36Sopenharmony_ci if (state->pos >= state->count) 238462306a36Sopenharmony_ci state->pos = 0; 238562306a36Sopenharmony_ci return state->list[state->pos++]; 238662306a36Sopenharmony_ci} 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci/* Swap two freelist entries */ 238962306a36Sopenharmony_cistatic void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b) 239062306a36Sopenharmony_ci{ 239162306a36Sopenharmony_ci swap(((freelist_idx_t *) slab->freelist)[a], 239262306a36Sopenharmony_ci ((freelist_idx_t *) slab->freelist)[b]); 239362306a36Sopenharmony_ci} 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_ci/* 239662306a36Sopenharmony_ci * Shuffle the freelist initialization state based on pre-computed lists. 239762306a36Sopenharmony_ci * return true if the list was successfully shuffled, false otherwise. 239862306a36Sopenharmony_ci */ 239962306a36Sopenharmony_cistatic bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab) 240062306a36Sopenharmony_ci{ 240162306a36Sopenharmony_ci unsigned int objfreelist = 0, i, rand, count = cachep->num; 240262306a36Sopenharmony_ci struct freelist_init_state state; 240362306a36Sopenharmony_ci bool precomputed; 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci if (count < 2) 240662306a36Sopenharmony_ci return false; 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci precomputed = freelist_state_initialize(&state, cachep, count); 240962306a36Sopenharmony_ci 241062306a36Sopenharmony_ci /* Take a random entry as the objfreelist */ 241162306a36Sopenharmony_ci if (OBJFREELIST_SLAB(cachep)) { 241262306a36Sopenharmony_ci if (!precomputed) 241362306a36Sopenharmony_ci objfreelist = count - 1; 241462306a36Sopenharmony_ci else 241562306a36Sopenharmony_ci objfreelist = next_random_slot(&state); 241662306a36Sopenharmony_ci slab->freelist = index_to_obj(cachep, slab, objfreelist) + 241762306a36Sopenharmony_ci obj_offset(cachep); 241862306a36Sopenharmony_ci count--; 241962306a36Sopenharmony_ci } 242062306a36Sopenharmony_ci 242162306a36Sopenharmony_ci /* 242262306a36Sopenharmony_ci * On early boot, generate the list dynamically. 242362306a36Sopenharmony_ci * Later use a pre-computed list for speed. 242462306a36Sopenharmony_ci */ 242562306a36Sopenharmony_ci if (!precomputed) { 242662306a36Sopenharmony_ci for (i = 0; i < count; i++) 242762306a36Sopenharmony_ci set_free_obj(slab, i, i); 242862306a36Sopenharmony_ci 242962306a36Sopenharmony_ci /* Fisher-Yates shuffle */ 243062306a36Sopenharmony_ci for (i = count - 1; i > 0; i--) { 243162306a36Sopenharmony_ci rand = get_random_u32_below(i + 1); 243262306a36Sopenharmony_ci swap_free_obj(slab, i, rand); 243362306a36Sopenharmony_ci } 243462306a36Sopenharmony_ci } else { 243562306a36Sopenharmony_ci for (i = 0; i < count; i++) 243662306a36Sopenharmony_ci set_free_obj(slab, i, next_random_slot(&state)); 243762306a36Sopenharmony_ci } 243862306a36Sopenharmony_ci 243962306a36Sopenharmony_ci if (OBJFREELIST_SLAB(cachep)) 244062306a36Sopenharmony_ci set_free_obj(slab, cachep->num - 1, objfreelist); 244162306a36Sopenharmony_ci 244262306a36Sopenharmony_ci return true; 244362306a36Sopenharmony_ci} 244462306a36Sopenharmony_ci#else 244562306a36Sopenharmony_cistatic inline bool shuffle_freelist(struct kmem_cache *cachep, 244662306a36Sopenharmony_ci struct slab *slab) 244762306a36Sopenharmony_ci{ 244862306a36Sopenharmony_ci return false; 244962306a36Sopenharmony_ci} 245062306a36Sopenharmony_ci#endif /* CONFIG_SLAB_FREELIST_RANDOM */ 245162306a36Sopenharmony_ci 245262306a36Sopenharmony_cistatic void cache_init_objs(struct kmem_cache *cachep, 245362306a36Sopenharmony_ci struct slab *slab) 245462306a36Sopenharmony_ci{ 245562306a36Sopenharmony_ci int i; 245662306a36Sopenharmony_ci void *objp; 245762306a36Sopenharmony_ci bool shuffled; 245862306a36Sopenharmony_ci 245962306a36Sopenharmony_ci cache_init_objs_debug(cachep, slab); 246062306a36Sopenharmony_ci 246162306a36Sopenharmony_ci /* Try to randomize the freelist if enabled */ 246262306a36Sopenharmony_ci shuffled = shuffle_freelist(cachep, slab); 246362306a36Sopenharmony_ci 246462306a36Sopenharmony_ci if (!shuffled && OBJFREELIST_SLAB(cachep)) { 246562306a36Sopenharmony_ci slab->freelist = index_to_obj(cachep, slab, cachep->num - 1) + 246662306a36Sopenharmony_ci obj_offset(cachep); 246762306a36Sopenharmony_ci } 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci for (i = 0; i < cachep->num; i++) { 247062306a36Sopenharmony_ci objp = index_to_obj(cachep, slab, i); 247162306a36Sopenharmony_ci objp = kasan_init_slab_obj(cachep, objp); 247262306a36Sopenharmony_ci 247362306a36Sopenharmony_ci /* constructor could break poison info */ 247462306a36Sopenharmony_ci if (DEBUG == 0 && cachep->ctor) { 247562306a36Sopenharmony_ci kasan_unpoison_object_data(cachep, objp); 247662306a36Sopenharmony_ci cachep->ctor(objp); 247762306a36Sopenharmony_ci kasan_poison_object_data(cachep, objp); 247862306a36Sopenharmony_ci } 247962306a36Sopenharmony_ci 248062306a36Sopenharmony_ci if (!shuffled) 248162306a36Sopenharmony_ci set_free_obj(slab, i, i); 248262306a36Sopenharmony_ci } 248362306a36Sopenharmony_ci} 248462306a36Sopenharmony_ci 248562306a36Sopenharmony_cistatic void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab) 248662306a36Sopenharmony_ci{ 248762306a36Sopenharmony_ci void *objp; 248862306a36Sopenharmony_ci 248962306a36Sopenharmony_ci objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active)); 249062306a36Sopenharmony_ci slab->active++; 249162306a36Sopenharmony_ci 249262306a36Sopenharmony_ci return objp; 249362306a36Sopenharmony_ci} 249462306a36Sopenharmony_ci 249562306a36Sopenharmony_cistatic void slab_put_obj(struct kmem_cache *cachep, 249662306a36Sopenharmony_ci struct slab *slab, void *objp) 249762306a36Sopenharmony_ci{ 249862306a36Sopenharmony_ci unsigned int objnr = obj_to_index(cachep, slab, objp); 249962306a36Sopenharmony_ci#if DEBUG 250062306a36Sopenharmony_ci unsigned int i; 250162306a36Sopenharmony_ci 250262306a36Sopenharmony_ci /* Verify double free bug */ 250362306a36Sopenharmony_ci for (i = slab->active; i < cachep->num; i++) { 250462306a36Sopenharmony_ci if (get_free_obj(slab, i) == objnr) { 250562306a36Sopenharmony_ci pr_err("slab: double free detected in cache '%s', objp %px\n", 250662306a36Sopenharmony_ci cachep->name, objp); 250762306a36Sopenharmony_ci BUG(); 250862306a36Sopenharmony_ci } 250962306a36Sopenharmony_ci } 251062306a36Sopenharmony_ci#endif 251162306a36Sopenharmony_ci slab->active--; 251262306a36Sopenharmony_ci if (!slab->freelist) 251362306a36Sopenharmony_ci slab->freelist = objp + obj_offset(cachep); 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci set_free_obj(slab, slab->active, objnr); 251662306a36Sopenharmony_ci} 251762306a36Sopenharmony_ci 251862306a36Sopenharmony_ci/* 251962306a36Sopenharmony_ci * Grow (by 1) the number of slabs within a cache. This is called by 252062306a36Sopenharmony_ci * kmem_cache_alloc() when there are no active objs left in a cache. 252162306a36Sopenharmony_ci */ 252262306a36Sopenharmony_cistatic struct slab *cache_grow_begin(struct kmem_cache *cachep, 252362306a36Sopenharmony_ci gfp_t flags, int nodeid) 252462306a36Sopenharmony_ci{ 252562306a36Sopenharmony_ci void *freelist; 252662306a36Sopenharmony_ci size_t offset; 252762306a36Sopenharmony_ci gfp_t local_flags; 252862306a36Sopenharmony_ci int slab_node; 252962306a36Sopenharmony_ci struct kmem_cache_node *n; 253062306a36Sopenharmony_ci struct slab *slab; 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci /* 253362306a36Sopenharmony_ci * Be lazy and only check for valid flags here, keeping it out of the 253462306a36Sopenharmony_ci * critical path in kmem_cache_alloc(). 253562306a36Sopenharmony_ci */ 253662306a36Sopenharmony_ci if (unlikely(flags & GFP_SLAB_BUG_MASK)) 253762306a36Sopenharmony_ci flags = kmalloc_fix_flags(flags); 253862306a36Sopenharmony_ci 253962306a36Sopenharmony_ci WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); 254062306a36Sopenharmony_ci local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); 254162306a36Sopenharmony_ci 254262306a36Sopenharmony_ci check_irq_off(); 254362306a36Sopenharmony_ci if (gfpflags_allow_blocking(local_flags)) 254462306a36Sopenharmony_ci local_irq_enable(); 254562306a36Sopenharmony_ci 254662306a36Sopenharmony_ci /* 254762306a36Sopenharmony_ci * Get mem for the objs. Attempt to allocate a physical page from 254862306a36Sopenharmony_ci * 'nodeid'. 254962306a36Sopenharmony_ci */ 255062306a36Sopenharmony_ci slab = kmem_getpages(cachep, local_flags, nodeid); 255162306a36Sopenharmony_ci if (!slab) 255262306a36Sopenharmony_ci goto failed; 255362306a36Sopenharmony_ci 255462306a36Sopenharmony_ci slab_node = slab_nid(slab); 255562306a36Sopenharmony_ci n = get_node(cachep, slab_node); 255662306a36Sopenharmony_ci 255762306a36Sopenharmony_ci /* Get colour for the slab, and cal the next value. */ 255862306a36Sopenharmony_ci n->colour_next++; 255962306a36Sopenharmony_ci if (n->colour_next >= cachep->colour) 256062306a36Sopenharmony_ci n->colour_next = 0; 256162306a36Sopenharmony_ci 256262306a36Sopenharmony_ci offset = n->colour_next; 256362306a36Sopenharmony_ci if (offset >= cachep->colour) 256462306a36Sopenharmony_ci offset = 0; 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci offset *= cachep->colour_off; 256762306a36Sopenharmony_ci 256862306a36Sopenharmony_ci /* 256962306a36Sopenharmony_ci * Call kasan_poison_slab() before calling alloc_slabmgmt(), so 257062306a36Sopenharmony_ci * page_address() in the latter returns a non-tagged pointer, 257162306a36Sopenharmony_ci * as it should be for slab pages. 257262306a36Sopenharmony_ci */ 257362306a36Sopenharmony_ci kasan_poison_slab(slab); 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_ci /* Get slab management. */ 257662306a36Sopenharmony_ci freelist = alloc_slabmgmt(cachep, slab, offset, 257762306a36Sopenharmony_ci local_flags & ~GFP_CONSTRAINT_MASK, slab_node); 257862306a36Sopenharmony_ci if (OFF_SLAB(cachep) && !freelist) 257962306a36Sopenharmony_ci goto opps1; 258062306a36Sopenharmony_ci 258162306a36Sopenharmony_ci slab->slab_cache = cachep; 258262306a36Sopenharmony_ci slab->freelist = freelist; 258362306a36Sopenharmony_ci 258462306a36Sopenharmony_ci cache_init_objs(cachep, slab); 258562306a36Sopenharmony_ci 258662306a36Sopenharmony_ci if (gfpflags_allow_blocking(local_flags)) 258762306a36Sopenharmony_ci local_irq_disable(); 258862306a36Sopenharmony_ci 258962306a36Sopenharmony_ci return slab; 259062306a36Sopenharmony_ci 259162306a36Sopenharmony_ciopps1: 259262306a36Sopenharmony_ci kmem_freepages(cachep, slab); 259362306a36Sopenharmony_cifailed: 259462306a36Sopenharmony_ci if (gfpflags_allow_blocking(local_flags)) 259562306a36Sopenharmony_ci local_irq_disable(); 259662306a36Sopenharmony_ci return NULL; 259762306a36Sopenharmony_ci} 259862306a36Sopenharmony_ci 259962306a36Sopenharmony_cistatic void cache_grow_end(struct kmem_cache *cachep, struct slab *slab) 260062306a36Sopenharmony_ci{ 260162306a36Sopenharmony_ci struct kmem_cache_node *n; 260262306a36Sopenharmony_ci void *list = NULL; 260362306a36Sopenharmony_ci 260462306a36Sopenharmony_ci check_irq_off(); 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_ci if (!slab) 260762306a36Sopenharmony_ci return; 260862306a36Sopenharmony_ci 260962306a36Sopenharmony_ci INIT_LIST_HEAD(&slab->slab_list); 261062306a36Sopenharmony_ci n = get_node(cachep, slab_nid(slab)); 261162306a36Sopenharmony_ci 261262306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 261362306a36Sopenharmony_ci n->total_slabs++; 261462306a36Sopenharmony_ci if (!slab->active) { 261562306a36Sopenharmony_ci list_add_tail(&slab->slab_list, &n->slabs_free); 261662306a36Sopenharmony_ci n->free_slabs++; 261762306a36Sopenharmony_ci } else 261862306a36Sopenharmony_ci fixup_slab_list(cachep, n, slab, &list); 261962306a36Sopenharmony_ci 262062306a36Sopenharmony_ci STATS_INC_GROWN(cachep); 262162306a36Sopenharmony_ci n->free_objects += cachep->num - slab->active; 262262306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 262362306a36Sopenharmony_ci 262462306a36Sopenharmony_ci fixup_objfreelist_debug(cachep, &list); 262562306a36Sopenharmony_ci} 262662306a36Sopenharmony_ci 262762306a36Sopenharmony_ci#if DEBUG 262862306a36Sopenharmony_ci 262962306a36Sopenharmony_ci/* 263062306a36Sopenharmony_ci * Perform extra freeing checks: 263162306a36Sopenharmony_ci * - detect bad pointers. 263262306a36Sopenharmony_ci * - POISON/RED_ZONE checking 263362306a36Sopenharmony_ci */ 263462306a36Sopenharmony_cistatic void kfree_debugcheck(const void *objp) 263562306a36Sopenharmony_ci{ 263662306a36Sopenharmony_ci if (!virt_addr_valid(objp)) { 263762306a36Sopenharmony_ci pr_err("kfree_debugcheck: out of range ptr %lxh\n", 263862306a36Sopenharmony_ci (unsigned long)objp); 263962306a36Sopenharmony_ci BUG(); 264062306a36Sopenharmony_ci } 264162306a36Sopenharmony_ci} 264262306a36Sopenharmony_ci 264362306a36Sopenharmony_cistatic inline void verify_redzone_free(struct kmem_cache *cache, void *obj) 264462306a36Sopenharmony_ci{ 264562306a36Sopenharmony_ci unsigned long long redzone1, redzone2; 264662306a36Sopenharmony_ci 264762306a36Sopenharmony_ci redzone1 = *dbg_redzone1(cache, obj); 264862306a36Sopenharmony_ci redzone2 = *dbg_redzone2(cache, obj); 264962306a36Sopenharmony_ci 265062306a36Sopenharmony_ci /* 265162306a36Sopenharmony_ci * Redzone is ok. 265262306a36Sopenharmony_ci */ 265362306a36Sopenharmony_ci if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE) 265462306a36Sopenharmony_ci return; 265562306a36Sopenharmony_ci 265662306a36Sopenharmony_ci if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE) 265762306a36Sopenharmony_ci slab_error(cache, "double free detected"); 265862306a36Sopenharmony_ci else 265962306a36Sopenharmony_ci slab_error(cache, "memory outside object was overwritten"); 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n", 266262306a36Sopenharmony_ci obj, redzone1, redzone2); 266362306a36Sopenharmony_ci} 266462306a36Sopenharmony_ci 266562306a36Sopenharmony_cistatic void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, 266662306a36Sopenharmony_ci unsigned long caller) 266762306a36Sopenharmony_ci{ 266862306a36Sopenharmony_ci unsigned int objnr; 266962306a36Sopenharmony_ci struct slab *slab; 267062306a36Sopenharmony_ci 267162306a36Sopenharmony_ci BUG_ON(virt_to_cache(objp) != cachep); 267262306a36Sopenharmony_ci 267362306a36Sopenharmony_ci objp -= obj_offset(cachep); 267462306a36Sopenharmony_ci kfree_debugcheck(objp); 267562306a36Sopenharmony_ci slab = virt_to_slab(objp); 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci if (cachep->flags & SLAB_RED_ZONE) { 267862306a36Sopenharmony_ci verify_redzone_free(cachep, objp); 267962306a36Sopenharmony_ci *dbg_redzone1(cachep, objp) = RED_INACTIVE; 268062306a36Sopenharmony_ci *dbg_redzone2(cachep, objp) = RED_INACTIVE; 268162306a36Sopenharmony_ci } 268262306a36Sopenharmony_ci if (cachep->flags & SLAB_STORE_USER) 268362306a36Sopenharmony_ci *dbg_userword(cachep, objp) = (void *)caller; 268462306a36Sopenharmony_ci 268562306a36Sopenharmony_ci objnr = obj_to_index(cachep, slab, objp); 268662306a36Sopenharmony_ci 268762306a36Sopenharmony_ci BUG_ON(objnr >= cachep->num); 268862306a36Sopenharmony_ci BUG_ON(objp != index_to_obj(cachep, slab, objnr)); 268962306a36Sopenharmony_ci 269062306a36Sopenharmony_ci if (cachep->flags & SLAB_POISON) { 269162306a36Sopenharmony_ci poison_obj(cachep, objp, POISON_FREE); 269262306a36Sopenharmony_ci slab_kernel_map(cachep, objp, 0); 269362306a36Sopenharmony_ci } 269462306a36Sopenharmony_ci return objp; 269562306a36Sopenharmony_ci} 269662306a36Sopenharmony_ci 269762306a36Sopenharmony_ci#else 269862306a36Sopenharmony_ci#define kfree_debugcheck(x) do { } while(0) 269962306a36Sopenharmony_ci#define cache_free_debugcheck(x, objp, z) (objp) 270062306a36Sopenharmony_ci#endif 270162306a36Sopenharmony_ci 270262306a36Sopenharmony_cistatic inline void fixup_objfreelist_debug(struct kmem_cache *cachep, 270362306a36Sopenharmony_ci void **list) 270462306a36Sopenharmony_ci{ 270562306a36Sopenharmony_ci#if DEBUG 270662306a36Sopenharmony_ci void *next = *list; 270762306a36Sopenharmony_ci void *objp; 270862306a36Sopenharmony_ci 270962306a36Sopenharmony_ci while (next) { 271062306a36Sopenharmony_ci objp = next - obj_offset(cachep); 271162306a36Sopenharmony_ci next = *(void **)next; 271262306a36Sopenharmony_ci poison_obj(cachep, objp, POISON_FREE); 271362306a36Sopenharmony_ci } 271462306a36Sopenharmony_ci#endif 271562306a36Sopenharmony_ci} 271662306a36Sopenharmony_ci 271762306a36Sopenharmony_cistatic inline void fixup_slab_list(struct kmem_cache *cachep, 271862306a36Sopenharmony_ci struct kmem_cache_node *n, struct slab *slab, 271962306a36Sopenharmony_ci void **list) 272062306a36Sopenharmony_ci{ 272162306a36Sopenharmony_ci /* move slabp to correct slabp list: */ 272262306a36Sopenharmony_ci list_del(&slab->slab_list); 272362306a36Sopenharmony_ci if (slab->active == cachep->num) { 272462306a36Sopenharmony_ci list_add(&slab->slab_list, &n->slabs_full); 272562306a36Sopenharmony_ci if (OBJFREELIST_SLAB(cachep)) { 272662306a36Sopenharmony_ci#if DEBUG 272762306a36Sopenharmony_ci /* Poisoning will be done without holding the lock */ 272862306a36Sopenharmony_ci if (cachep->flags & SLAB_POISON) { 272962306a36Sopenharmony_ci void **objp = slab->freelist; 273062306a36Sopenharmony_ci 273162306a36Sopenharmony_ci *objp = *list; 273262306a36Sopenharmony_ci *list = objp; 273362306a36Sopenharmony_ci } 273462306a36Sopenharmony_ci#endif 273562306a36Sopenharmony_ci slab->freelist = NULL; 273662306a36Sopenharmony_ci } 273762306a36Sopenharmony_ci } else 273862306a36Sopenharmony_ci list_add(&slab->slab_list, &n->slabs_partial); 273962306a36Sopenharmony_ci} 274062306a36Sopenharmony_ci 274162306a36Sopenharmony_ci/* Try to find non-pfmemalloc slab if needed */ 274262306a36Sopenharmony_cistatic noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n, 274362306a36Sopenharmony_ci struct slab *slab, bool pfmemalloc) 274462306a36Sopenharmony_ci{ 274562306a36Sopenharmony_ci if (!slab) 274662306a36Sopenharmony_ci return NULL; 274762306a36Sopenharmony_ci 274862306a36Sopenharmony_ci if (pfmemalloc) 274962306a36Sopenharmony_ci return slab; 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci if (!slab_test_pfmemalloc(slab)) 275262306a36Sopenharmony_ci return slab; 275362306a36Sopenharmony_ci 275462306a36Sopenharmony_ci /* No need to keep pfmemalloc slab if we have enough free objects */ 275562306a36Sopenharmony_ci if (n->free_objects > n->free_limit) { 275662306a36Sopenharmony_ci slab_clear_pfmemalloc(slab); 275762306a36Sopenharmony_ci return slab; 275862306a36Sopenharmony_ci } 275962306a36Sopenharmony_ci 276062306a36Sopenharmony_ci /* Move pfmemalloc slab to the end of list to speed up next search */ 276162306a36Sopenharmony_ci list_del(&slab->slab_list); 276262306a36Sopenharmony_ci if (!slab->active) { 276362306a36Sopenharmony_ci list_add_tail(&slab->slab_list, &n->slabs_free); 276462306a36Sopenharmony_ci n->free_slabs++; 276562306a36Sopenharmony_ci } else 276662306a36Sopenharmony_ci list_add_tail(&slab->slab_list, &n->slabs_partial); 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci list_for_each_entry(slab, &n->slabs_partial, slab_list) { 276962306a36Sopenharmony_ci if (!slab_test_pfmemalloc(slab)) 277062306a36Sopenharmony_ci return slab; 277162306a36Sopenharmony_ci } 277262306a36Sopenharmony_ci 277362306a36Sopenharmony_ci n->free_touched = 1; 277462306a36Sopenharmony_ci list_for_each_entry(slab, &n->slabs_free, slab_list) { 277562306a36Sopenharmony_ci if (!slab_test_pfmemalloc(slab)) { 277662306a36Sopenharmony_ci n->free_slabs--; 277762306a36Sopenharmony_ci return slab; 277862306a36Sopenharmony_ci } 277962306a36Sopenharmony_ci } 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_ci return NULL; 278262306a36Sopenharmony_ci} 278362306a36Sopenharmony_ci 278462306a36Sopenharmony_cistatic struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) 278562306a36Sopenharmony_ci{ 278662306a36Sopenharmony_ci struct slab *slab; 278762306a36Sopenharmony_ci 278862306a36Sopenharmony_ci assert_raw_spin_locked(&n->list_lock); 278962306a36Sopenharmony_ci slab = list_first_entry_or_null(&n->slabs_partial, struct slab, 279062306a36Sopenharmony_ci slab_list); 279162306a36Sopenharmony_ci if (!slab) { 279262306a36Sopenharmony_ci n->free_touched = 1; 279362306a36Sopenharmony_ci slab = list_first_entry_or_null(&n->slabs_free, struct slab, 279462306a36Sopenharmony_ci slab_list); 279562306a36Sopenharmony_ci if (slab) 279662306a36Sopenharmony_ci n->free_slabs--; 279762306a36Sopenharmony_ci } 279862306a36Sopenharmony_ci 279962306a36Sopenharmony_ci if (sk_memalloc_socks()) 280062306a36Sopenharmony_ci slab = get_valid_first_slab(n, slab, pfmemalloc); 280162306a36Sopenharmony_ci 280262306a36Sopenharmony_ci return slab; 280362306a36Sopenharmony_ci} 280462306a36Sopenharmony_ci 280562306a36Sopenharmony_cistatic noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, 280662306a36Sopenharmony_ci struct kmem_cache_node *n, gfp_t flags) 280762306a36Sopenharmony_ci{ 280862306a36Sopenharmony_ci struct slab *slab; 280962306a36Sopenharmony_ci void *obj; 281062306a36Sopenharmony_ci void *list = NULL; 281162306a36Sopenharmony_ci 281262306a36Sopenharmony_ci if (!gfp_pfmemalloc_allowed(flags)) 281362306a36Sopenharmony_ci return NULL; 281462306a36Sopenharmony_ci 281562306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 281662306a36Sopenharmony_ci slab = get_first_slab(n, true); 281762306a36Sopenharmony_ci if (!slab) { 281862306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 281962306a36Sopenharmony_ci return NULL; 282062306a36Sopenharmony_ci } 282162306a36Sopenharmony_ci 282262306a36Sopenharmony_ci obj = slab_get_obj(cachep, slab); 282362306a36Sopenharmony_ci n->free_objects--; 282462306a36Sopenharmony_ci 282562306a36Sopenharmony_ci fixup_slab_list(cachep, n, slab, &list); 282662306a36Sopenharmony_ci 282762306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 282862306a36Sopenharmony_ci fixup_objfreelist_debug(cachep, &list); 282962306a36Sopenharmony_ci 283062306a36Sopenharmony_ci return obj; 283162306a36Sopenharmony_ci} 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_ci/* 283462306a36Sopenharmony_ci * Slab list should be fixed up by fixup_slab_list() for existing slab 283562306a36Sopenharmony_ci * or cache_grow_end() for new slab 283662306a36Sopenharmony_ci */ 283762306a36Sopenharmony_cistatic __always_inline int alloc_block(struct kmem_cache *cachep, 283862306a36Sopenharmony_ci struct array_cache *ac, struct slab *slab, int batchcount) 283962306a36Sopenharmony_ci{ 284062306a36Sopenharmony_ci /* 284162306a36Sopenharmony_ci * There must be at least one object available for 284262306a36Sopenharmony_ci * allocation. 284362306a36Sopenharmony_ci */ 284462306a36Sopenharmony_ci BUG_ON(slab->active >= cachep->num); 284562306a36Sopenharmony_ci 284662306a36Sopenharmony_ci while (slab->active < cachep->num && batchcount--) { 284762306a36Sopenharmony_ci STATS_INC_ALLOCED(cachep); 284862306a36Sopenharmony_ci STATS_INC_ACTIVE(cachep); 284962306a36Sopenharmony_ci STATS_SET_HIGH(cachep); 285062306a36Sopenharmony_ci 285162306a36Sopenharmony_ci ac->entry[ac->avail++] = slab_get_obj(cachep, slab); 285262306a36Sopenharmony_ci } 285362306a36Sopenharmony_ci 285462306a36Sopenharmony_ci return batchcount; 285562306a36Sopenharmony_ci} 285662306a36Sopenharmony_ci 285762306a36Sopenharmony_cistatic void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) 285862306a36Sopenharmony_ci{ 285962306a36Sopenharmony_ci int batchcount; 286062306a36Sopenharmony_ci struct kmem_cache_node *n; 286162306a36Sopenharmony_ci struct array_cache *ac, *shared; 286262306a36Sopenharmony_ci int node; 286362306a36Sopenharmony_ci void *list = NULL; 286462306a36Sopenharmony_ci struct slab *slab; 286562306a36Sopenharmony_ci 286662306a36Sopenharmony_ci check_irq_off(); 286762306a36Sopenharmony_ci node = numa_mem_id(); 286862306a36Sopenharmony_ci 286962306a36Sopenharmony_ci ac = cpu_cache_get(cachep); 287062306a36Sopenharmony_ci batchcount = ac->batchcount; 287162306a36Sopenharmony_ci if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 287262306a36Sopenharmony_ci /* 287362306a36Sopenharmony_ci * If there was little recent activity on this cache, then 287462306a36Sopenharmony_ci * perform only a partial refill. Otherwise we could generate 287562306a36Sopenharmony_ci * refill bouncing. 287662306a36Sopenharmony_ci */ 287762306a36Sopenharmony_ci batchcount = BATCHREFILL_LIMIT; 287862306a36Sopenharmony_ci } 287962306a36Sopenharmony_ci n = get_node(cachep, node); 288062306a36Sopenharmony_ci 288162306a36Sopenharmony_ci BUG_ON(ac->avail > 0 || !n); 288262306a36Sopenharmony_ci shared = READ_ONCE(n->shared); 288362306a36Sopenharmony_ci if (!n->free_objects && (!shared || !shared->avail)) 288462306a36Sopenharmony_ci goto direct_grow; 288562306a36Sopenharmony_ci 288662306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 288762306a36Sopenharmony_ci shared = READ_ONCE(n->shared); 288862306a36Sopenharmony_ci 288962306a36Sopenharmony_ci /* See if we can refill from the shared array */ 289062306a36Sopenharmony_ci if (shared && transfer_objects(ac, shared, batchcount)) { 289162306a36Sopenharmony_ci shared->touched = 1; 289262306a36Sopenharmony_ci goto alloc_done; 289362306a36Sopenharmony_ci } 289462306a36Sopenharmony_ci 289562306a36Sopenharmony_ci while (batchcount > 0) { 289662306a36Sopenharmony_ci /* Get slab alloc is to come from. */ 289762306a36Sopenharmony_ci slab = get_first_slab(n, false); 289862306a36Sopenharmony_ci if (!slab) 289962306a36Sopenharmony_ci goto must_grow; 290062306a36Sopenharmony_ci 290162306a36Sopenharmony_ci check_spinlock_acquired(cachep); 290262306a36Sopenharmony_ci 290362306a36Sopenharmony_ci batchcount = alloc_block(cachep, ac, slab, batchcount); 290462306a36Sopenharmony_ci fixup_slab_list(cachep, n, slab, &list); 290562306a36Sopenharmony_ci } 290662306a36Sopenharmony_ci 290762306a36Sopenharmony_cimust_grow: 290862306a36Sopenharmony_ci n->free_objects -= ac->avail; 290962306a36Sopenharmony_cialloc_done: 291062306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 291162306a36Sopenharmony_ci fixup_objfreelist_debug(cachep, &list); 291262306a36Sopenharmony_ci 291362306a36Sopenharmony_cidirect_grow: 291462306a36Sopenharmony_ci if (unlikely(!ac->avail)) { 291562306a36Sopenharmony_ci /* Check if we can use obj in pfmemalloc slab */ 291662306a36Sopenharmony_ci if (sk_memalloc_socks()) { 291762306a36Sopenharmony_ci void *obj = cache_alloc_pfmemalloc(cachep, n, flags); 291862306a36Sopenharmony_ci 291962306a36Sopenharmony_ci if (obj) 292062306a36Sopenharmony_ci return obj; 292162306a36Sopenharmony_ci } 292262306a36Sopenharmony_ci 292362306a36Sopenharmony_ci slab = cache_grow_begin(cachep, gfp_exact_node(flags), node); 292462306a36Sopenharmony_ci 292562306a36Sopenharmony_ci /* 292662306a36Sopenharmony_ci * cache_grow_begin() can reenable interrupts, 292762306a36Sopenharmony_ci * then ac could change. 292862306a36Sopenharmony_ci */ 292962306a36Sopenharmony_ci ac = cpu_cache_get(cachep); 293062306a36Sopenharmony_ci if (!ac->avail && slab) 293162306a36Sopenharmony_ci alloc_block(cachep, ac, slab, batchcount); 293262306a36Sopenharmony_ci cache_grow_end(cachep, slab); 293362306a36Sopenharmony_ci 293462306a36Sopenharmony_ci if (!ac->avail) 293562306a36Sopenharmony_ci return NULL; 293662306a36Sopenharmony_ci } 293762306a36Sopenharmony_ci ac->touched = 1; 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ci return ac->entry[--ac->avail]; 294062306a36Sopenharmony_ci} 294162306a36Sopenharmony_ci 294262306a36Sopenharmony_ci#if DEBUG 294362306a36Sopenharmony_cistatic void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, 294462306a36Sopenharmony_ci gfp_t flags, void *objp, unsigned long caller) 294562306a36Sopenharmony_ci{ 294662306a36Sopenharmony_ci WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); 294762306a36Sopenharmony_ci if (!objp || is_kfence_address(objp)) 294862306a36Sopenharmony_ci return objp; 294962306a36Sopenharmony_ci if (cachep->flags & SLAB_POISON) { 295062306a36Sopenharmony_ci check_poison_obj(cachep, objp); 295162306a36Sopenharmony_ci slab_kernel_map(cachep, objp, 1); 295262306a36Sopenharmony_ci poison_obj(cachep, objp, POISON_INUSE); 295362306a36Sopenharmony_ci } 295462306a36Sopenharmony_ci if (cachep->flags & SLAB_STORE_USER) 295562306a36Sopenharmony_ci *dbg_userword(cachep, objp) = (void *)caller; 295662306a36Sopenharmony_ci 295762306a36Sopenharmony_ci if (cachep->flags & SLAB_RED_ZONE) { 295862306a36Sopenharmony_ci if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || 295962306a36Sopenharmony_ci *dbg_redzone2(cachep, objp) != RED_INACTIVE) { 296062306a36Sopenharmony_ci slab_error(cachep, "double free, or memory outside object was overwritten"); 296162306a36Sopenharmony_ci pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n", 296262306a36Sopenharmony_ci objp, *dbg_redzone1(cachep, objp), 296362306a36Sopenharmony_ci *dbg_redzone2(cachep, objp)); 296462306a36Sopenharmony_ci } 296562306a36Sopenharmony_ci *dbg_redzone1(cachep, objp) = RED_ACTIVE; 296662306a36Sopenharmony_ci *dbg_redzone2(cachep, objp) = RED_ACTIVE; 296762306a36Sopenharmony_ci } 296862306a36Sopenharmony_ci 296962306a36Sopenharmony_ci objp += obj_offset(cachep); 297062306a36Sopenharmony_ci if (cachep->ctor && cachep->flags & SLAB_POISON) 297162306a36Sopenharmony_ci cachep->ctor(objp); 297262306a36Sopenharmony_ci if ((unsigned long)objp & (arch_slab_minalign() - 1)) { 297362306a36Sopenharmony_ci pr_err("0x%px: not aligned to arch_slab_minalign()=%u\n", objp, 297462306a36Sopenharmony_ci arch_slab_minalign()); 297562306a36Sopenharmony_ci } 297662306a36Sopenharmony_ci return objp; 297762306a36Sopenharmony_ci} 297862306a36Sopenharmony_ci#else 297962306a36Sopenharmony_ci#define cache_alloc_debugcheck_after(a, b, objp, d) (objp) 298062306a36Sopenharmony_ci#endif 298162306a36Sopenharmony_ci 298262306a36Sopenharmony_cistatic inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) 298362306a36Sopenharmony_ci{ 298462306a36Sopenharmony_ci void *objp; 298562306a36Sopenharmony_ci struct array_cache *ac; 298662306a36Sopenharmony_ci 298762306a36Sopenharmony_ci check_irq_off(); 298862306a36Sopenharmony_ci 298962306a36Sopenharmony_ci ac = cpu_cache_get(cachep); 299062306a36Sopenharmony_ci if (likely(ac->avail)) { 299162306a36Sopenharmony_ci ac->touched = 1; 299262306a36Sopenharmony_ci objp = ac->entry[--ac->avail]; 299362306a36Sopenharmony_ci 299462306a36Sopenharmony_ci STATS_INC_ALLOCHIT(cachep); 299562306a36Sopenharmony_ci goto out; 299662306a36Sopenharmony_ci } 299762306a36Sopenharmony_ci 299862306a36Sopenharmony_ci STATS_INC_ALLOCMISS(cachep); 299962306a36Sopenharmony_ci objp = cache_alloc_refill(cachep, flags); 300062306a36Sopenharmony_ci /* 300162306a36Sopenharmony_ci * the 'ac' may be updated by cache_alloc_refill(), 300262306a36Sopenharmony_ci * and kmemleak_erase() requires its correct value. 300362306a36Sopenharmony_ci */ 300462306a36Sopenharmony_ci ac = cpu_cache_get(cachep); 300562306a36Sopenharmony_ci 300662306a36Sopenharmony_ciout: 300762306a36Sopenharmony_ci /* 300862306a36Sopenharmony_ci * To avoid a false negative, if an object that is in one of the 300962306a36Sopenharmony_ci * per-CPU caches is leaked, we need to make sure kmemleak doesn't 301062306a36Sopenharmony_ci * treat the array pointers as a reference to the object. 301162306a36Sopenharmony_ci */ 301262306a36Sopenharmony_ci if (objp) 301362306a36Sopenharmony_ci kmemleak_erase(&ac->entry[ac->avail]); 301462306a36Sopenharmony_ci return objp; 301562306a36Sopenharmony_ci} 301662306a36Sopenharmony_ci 301762306a36Sopenharmony_ci#ifdef CONFIG_NUMA 301862306a36Sopenharmony_cistatic void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); 301962306a36Sopenharmony_ci 302062306a36Sopenharmony_ci/* 302162306a36Sopenharmony_ci * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set. 302262306a36Sopenharmony_ci * 302362306a36Sopenharmony_ci * If we are in_interrupt, then process context, including cpusets and 302462306a36Sopenharmony_ci * mempolicy, may not apply and should not be used for allocation policy. 302562306a36Sopenharmony_ci */ 302662306a36Sopenharmony_cistatic void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) 302762306a36Sopenharmony_ci{ 302862306a36Sopenharmony_ci int nid_alloc, nid_here; 302962306a36Sopenharmony_ci 303062306a36Sopenharmony_ci if (in_interrupt() || (flags & __GFP_THISNODE)) 303162306a36Sopenharmony_ci return NULL; 303262306a36Sopenharmony_ci nid_alloc = nid_here = numa_mem_id(); 303362306a36Sopenharmony_ci if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 303462306a36Sopenharmony_ci nid_alloc = cpuset_slab_spread_node(); 303562306a36Sopenharmony_ci else if (current->mempolicy) 303662306a36Sopenharmony_ci nid_alloc = mempolicy_slab_node(); 303762306a36Sopenharmony_ci if (nid_alloc != nid_here) 303862306a36Sopenharmony_ci return ____cache_alloc_node(cachep, flags, nid_alloc); 303962306a36Sopenharmony_ci return NULL; 304062306a36Sopenharmony_ci} 304162306a36Sopenharmony_ci 304262306a36Sopenharmony_ci/* 304362306a36Sopenharmony_ci * Fallback function if there was no memory available and no objects on a 304462306a36Sopenharmony_ci * certain node and fall back is permitted. First we scan all the 304562306a36Sopenharmony_ci * available node for available objects. If that fails then we 304662306a36Sopenharmony_ci * perform an allocation without specifying a node. This allows the page 304762306a36Sopenharmony_ci * allocator to do its reclaim / fallback magic. We then insert the 304862306a36Sopenharmony_ci * slab into the proper nodelist and then allocate from it. 304962306a36Sopenharmony_ci */ 305062306a36Sopenharmony_cistatic void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) 305162306a36Sopenharmony_ci{ 305262306a36Sopenharmony_ci struct zonelist *zonelist; 305362306a36Sopenharmony_ci struct zoneref *z; 305462306a36Sopenharmony_ci struct zone *zone; 305562306a36Sopenharmony_ci enum zone_type highest_zoneidx = gfp_zone(flags); 305662306a36Sopenharmony_ci void *obj = NULL; 305762306a36Sopenharmony_ci struct slab *slab; 305862306a36Sopenharmony_ci int nid; 305962306a36Sopenharmony_ci unsigned int cpuset_mems_cookie; 306062306a36Sopenharmony_ci 306162306a36Sopenharmony_ci if (flags & __GFP_THISNODE) 306262306a36Sopenharmony_ci return NULL; 306362306a36Sopenharmony_ci 306462306a36Sopenharmony_ciretry_cpuset: 306562306a36Sopenharmony_ci cpuset_mems_cookie = read_mems_allowed_begin(); 306662306a36Sopenharmony_ci zonelist = node_zonelist(mempolicy_slab_node(), flags); 306762306a36Sopenharmony_ci 306862306a36Sopenharmony_ciretry: 306962306a36Sopenharmony_ci /* 307062306a36Sopenharmony_ci * Look through allowed nodes for objects available 307162306a36Sopenharmony_ci * from existing per node queues. 307262306a36Sopenharmony_ci */ 307362306a36Sopenharmony_ci for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { 307462306a36Sopenharmony_ci nid = zone_to_nid(zone); 307562306a36Sopenharmony_ci 307662306a36Sopenharmony_ci if (cpuset_zone_allowed(zone, flags) && 307762306a36Sopenharmony_ci get_node(cache, nid) && 307862306a36Sopenharmony_ci get_node(cache, nid)->free_objects) { 307962306a36Sopenharmony_ci obj = ____cache_alloc_node(cache, 308062306a36Sopenharmony_ci gfp_exact_node(flags), nid); 308162306a36Sopenharmony_ci if (obj) 308262306a36Sopenharmony_ci break; 308362306a36Sopenharmony_ci } 308462306a36Sopenharmony_ci } 308562306a36Sopenharmony_ci 308662306a36Sopenharmony_ci if (!obj) { 308762306a36Sopenharmony_ci /* 308862306a36Sopenharmony_ci * This allocation will be performed within the constraints 308962306a36Sopenharmony_ci * of the current cpuset / memory policy requirements. 309062306a36Sopenharmony_ci * We may trigger various forms of reclaim on the allowed 309162306a36Sopenharmony_ci * set and go into memory reserves if necessary. 309262306a36Sopenharmony_ci */ 309362306a36Sopenharmony_ci slab = cache_grow_begin(cache, flags, numa_mem_id()); 309462306a36Sopenharmony_ci cache_grow_end(cache, slab); 309562306a36Sopenharmony_ci if (slab) { 309662306a36Sopenharmony_ci nid = slab_nid(slab); 309762306a36Sopenharmony_ci obj = ____cache_alloc_node(cache, 309862306a36Sopenharmony_ci gfp_exact_node(flags), nid); 309962306a36Sopenharmony_ci 310062306a36Sopenharmony_ci /* 310162306a36Sopenharmony_ci * Another processor may allocate the objects in 310262306a36Sopenharmony_ci * the slab since we are not holding any locks. 310362306a36Sopenharmony_ci */ 310462306a36Sopenharmony_ci if (!obj) 310562306a36Sopenharmony_ci goto retry; 310662306a36Sopenharmony_ci } 310762306a36Sopenharmony_ci } 310862306a36Sopenharmony_ci 310962306a36Sopenharmony_ci if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie))) 311062306a36Sopenharmony_ci goto retry_cpuset; 311162306a36Sopenharmony_ci return obj; 311262306a36Sopenharmony_ci} 311362306a36Sopenharmony_ci 311462306a36Sopenharmony_ci/* 311562306a36Sopenharmony_ci * An interface to enable slab creation on nodeid 311662306a36Sopenharmony_ci */ 311762306a36Sopenharmony_cistatic void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 311862306a36Sopenharmony_ci int nodeid) 311962306a36Sopenharmony_ci{ 312062306a36Sopenharmony_ci struct slab *slab; 312162306a36Sopenharmony_ci struct kmem_cache_node *n; 312262306a36Sopenharmony_ci void *obj = NULL; 312362306a36Sopenharmony_ci void *list = NULL; 312462306a36Sopenharmony_ci 312562306a36Sopenharmony_ci VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES); 312662306a36Sopenharmony_ci n = get_node(cachep, nodeid); 312762306a36Sopenharmony_ci BUG_ON(!n); 312862306a36Sopenharmony_ci 312962306a36Sopenharmony_ci check_irq_off(); 313062306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 313162306a36Sopenharmony_ci slab = get_first_slab(n, false); 313262306a36Sopenharmony_ci if (!slab) 313362306a36Sopenharmony_ci goto must_grow; 313462306a36Sopenharmony_ci 313562306a36Sopenharmony_ci check_spinlock_acquired_node(cachep, nodeid); 313662306a36Sopenharmony_ci 313762306a36Sopenharmony_ci STATS_INC_NODEALLOCS(cachep); 313862306a36Sopenharmony_ci STATS_INC_ACTIVE(cachep); 313962306a36Sopenharmony_ci STATS_SET_HIGH(cachep); 314062306a36Sopenharmony_ci 314162306a36Sopenharmony_ci BUG_ON(slab->active == cachep->num); 314262306a36Sopenharmony_ci 314362306a36Sopenharmony_ci obj = slab_get_obj(cachep, slab); 314462306a36Sopenharmony_ci n->free_objects--; 314562306a36Sopenharmony_ci 314662306a36Sopenharmony_ci fixup_slab_list(cachep, n, slab, &list); 314762306a36Sopenharmony_ci 314862306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 314962306a36Sopenharmony_ci fixup_objfreelist_debug(cachep, &list); 315062306a36Sopenharmony_ci return obj; 315162306a36Sopenharmony_ci 315262306a36Sopenharmony_cimust_grow: 315362306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 315462306a36Sopenharmony_ci slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); 315562306a36Sopenharmony_ci if (slab) { 315662306a36Sopenharmony_ci /* This slab isn't counted yet so don't update free_objects */ 315762306a36Sopenharmony_ci obj = slab_get_obj(cachep, slab); 315862306a36Sopenharmony_ci } 315962306a36Sopenharmony_ci cache_grow_end(cachep, slab); 316062306a36Sopenharmony_ci 316162306a36Sopenharmony_ci return obj ? obj : fallback_alloc(cachep, flags); 316262306a36Sopenharmony_ci} 316362306a36Sopenharmony_ci 316462306a36Sopenharmony_cistatic __always_inline void * 316562306a36Sopenharmony_ci__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid) 316662306a36Sopenharmony_ci{ 316762306a36Sopenharmony_ci void *objp = NULL; 316862306a36Sopenharmony_ci int slab_node = numa_mem_id(); 316962306a36Sopenharmony_ci 317062306a36Sopenharmony_ci if (nodeid == NUMA_NO_NODE) { 317162306a36Sopenharmony_ci if (current->mempolicy || cpuset_do_slab_mem_spread()) { 317262306a36Sopenharmony_ci objp = alternate_node_alloc(cachep, flags); 317362306a36Sopenharmony_ci if (objp) 317462306a36Sopenharmony_ci goto out; 317562306a36Sopenharmony_ci } 317662306a36Sopenharmony_ci /* 317762306a36Sopenharmony_ci * Use the locally cached objects if possible. 317862306a36Sopenharmony_ci * However ____cache_alloc does not allow fallback 317962306a36Sopenharmony_ci * to other nodes. It may fail while we still have 318062306a36Sopenharmony_ci * objects on other nodes available. 318162306a36Sopenharmony_ci */ 318262306a36Sopenharmony_ci objp = ____cache_alloc(cachep, flags); 318362306a36Sopenharmony_ci nodeid = slab_node; 318462306a36Sopenharmony_ci } else if (nodeid == slab_node) { 318562306a36Sopenharmony_ci objp = ____cache_alloc(cachep, flags); 318662306a36Sopenharmony_ci } else if (!get_node(cachep, nodeid)) { 318762306a36Sopenharmony_ci /* Node not bootstrapped yet */ 318862306a36Sopenharmony_ci objp = fallback_alloc(cachep, flags); 318962306a36Sopenharmony_ci goto out; 319062306a36Sopenharmony_ci } 319162306a36Sopenharmony_ci 319262306a36Sopenharmony_ci /* 319362306a36Sopenharmony_ci * We may just have run out of memory on the local node. 319462306a36Sopenharmony_ci * ____cache_alloc_node() knows how to locate memory on other nodes 319562306a36Sopenharmony_ci */ 319662306a36Sopenharmony_ci if (!objp) 319762306a36Sopenharmony_ci objp = ____cache_alloc_node(cachep, flags, nodeid); 319862306a36Sopenharmony_ciout: 319962306a36Sopenharmony_ci return objp; 320062306a36Sopenharmony_ci} 320162306a36Sopenharmony_ci#else 320262306a36Sopenharmony_ci 320362306a36Sopenharmony_cistatic __always_inline void * 320462306a36Sopenharmony_ci__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid __maybe_unused) 320562306a36Sopenharmony_ci{ 320662306a36Sopenharmony_ci return ____cache_alloc(cachep, flags); 320762306a36Sopenharmony_ci} 320862306a36Sopenharmony_ci 320962306a36Sopenharmony_ci#endif /* CONFIG_NUMA */ 321062306a36Sopenharmony_ci 321162306a36Sopenharmony_cistatic __always_inline void * 321262306a36Sopenharmony_cislab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags, 321362306a36Sopenharmony_ci int nodeid, size_t orig_size, unsigned long caller) 321462306a36Sopenharmony_ci{ 321562306a36Sopenharmony_ci unsigned long save_flags; 321662306a36Sopenharmony_ci void *objp; 321762306a36Sopenharmony_ci struct obj_cgroup *objcg = NULL; 321862306a36Sopenharmony_ci bool init = false; 321962306a36Sopenharmony_ci 322062306a36Sopenharmony_ci flags &= gfp_allowed_mask; 322162306a36Sopenharmony_ci cachep = slab_pre_alloc_hook(cachep, lru, &objcg, 1, flags); 322262306a36Sopenharmony_ci if (unlikely(!cachep)) 322362306a36Sopenharmony_ci return NULL; 322462306a36Sopenharmony_ci 322562306a36Sopenharmony_ci objp = kfence_alloc(cachep, orig_size, flags); 322662306a36Sopenharmony_ci if (unlikely(objp)) 322762306a36Sopenharmony_ci goto out; 322862306a36Sopenharmony_ci 322962306a36Sopenharmony_ci local_irq_save(save_flags); 323062306a36Sopenharmony_ci objp = __do_cache_alloc(cachep, flags, nodeid); 323162306a36Sopenharmony_ci local_irq_restore(save_flags); 323262306a36Sopenharmony_ci objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); 323362306a36Sopenharmony_ci prefetchw(objp); 323462306a36Sopenharmony_ci init = slab_want_init_on_alloc(flags, cachep); 323562306a36Sopenharmony_ci 323662306a36Sopenharmony_ciout: 323762306a36Sopenharmony_ci slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init, 323862306a36Sopenharmony_ci cachep->object_size); 323962306a36Sopenharmony_ci return objp; 324062306a36Sopenharmony_ci} 324162306a36Sopenharmony_ci 324262306a36Sopenharmony_cistatic __always_inline void * 324362306a36Sopenharmony_cislab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags, 324462306a36Sopenharmony_ci size_t orig_size, unsigned long caller) 324562306a36Sopenharmony_ci{ 324662306a36Sopenharmony_ci return slab_alloc_node(cachep, lru, flags, NUMA_NO_NODE, orig_size, 324762306a36Sopenharmony_ci caller); 324862306a36Sopenharmony_ci} 324962306a36Sopenharmony_ci 325062306a36Sopenharmony_ci/* 325162306a36Sopenharmony_ci * Caller needs to acquire correct kmem_cache_node's list_lock 325262306a36Sopenharmony_ci * @list: List of detached free slabs should be freed by caller 325362306a36Sopenharmony_ci */ 325462306a36Sopenharmony_cistatic void free_block(struct kmem_cache *cachep, void **objpp, 325562306a36Sopenharmony_ci int nr_objects, int node, struct list_head *list) 325662306a36Sopenharmony_ci{ 325762306a36Sopenharmony_ci int i; 325862306a36Sopenharmony_ci struct kmem_cache_node *n = get_node(cachep, node); 325962306a36Sopenharmony_ci struct slab *slab; 326062306a36Sopenharmony_ci 326162306a36Sopenharmony_ci n->free_objects += nr_objects; 326262306a36Sopenharmony_ci 326362306a36Sopenharmony_ci for (i = 0; i < nr_objects; i++) { 326462306a36Sopenharmony_ci void *objp; 326562306a36Sopenharmony_ci struct slab *slab; 326662306a36Sopenharmony_ci 326762306a36Sopenharmony_ci objp = objpp[i]; 326862306a36Sopenharmony_ci 326962306a36Sopenharmony_ci slab = virt_to_slab(objp); 327062306a36Sopenharmony_ci list_del(&slab->slab_list); 327162306a36Sopenharmony_ci check_spinlock_acquired_node(cachep, node); 327262306a36Sopenharmony_ci slab_put_obj(cachep, slab, objp); 327362306a36Sopenharmony_ci STATS_DEC_ACTIVE(cachep); 327462306a36Sopenharmony_ci 327562306a36Sopenharmony_ci /* fixup slab chains */ 327662306a36Sopenharmony_ci if (slab->active == 0) { 327762306a36Sopenharmony_ci list_add(&slab->slab_list, &n->slabs_free); 327862306a36Sopenharmony_ci n->free_slabs++; 327962306a36Sopenharmony_ci } else { 328062306a36Sopenharmony_ci /* Unconditionally move a slab to the end of the 328162306a36Sopenharmony_ci * partial list on free - maximum time for the 328262306a36Sopenharmony_ci * other objects to be freed, too. 328362306a36Sopenharmony_ci */ 328462306a36Sopenharmony_ci list_add_tail(&slab->slab_list, &n->slabs_partial); 328562306a36Sopenharmony_ci } 328662306a36Sopenharmony_ci } 328762306a36Sopenharmony_ci 328862306a36Sopenharmony_ci while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) { 328962306a36Sopenharmony_ci n->free_objects -= cachep->num; 329062306a36Sopenharmony_ci 329162306a36Sopenharmony_ci slab = list_last_entry(&n->slabs_free, struct slab, slab_list); 329262306a36Sopenharmony_ci list_move(&slab->slab_list, list); 329362306a36Sopenharmony_ci n->free_slabs--; 329462306a36Sopenharmony_ci n->total_slabs--; 329562306a36Sopenharmony_ci } 329662306a36Sopenharmony_ci} 329762306a36Sopenharmony_ci 329862306a36Sopenharmony_cistatic void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) 329962306a36Sopenharmony_ci{ 330062306a36Sopenharmony_ci int batchcount; 330162306a36Sopenharmony_ci struct kmem_cache_node *n; 330262306a36Sopenharmony_ci int node = numa_mem_id(); 330362306a36Sopenharmony_ci LIST_HEAD(list); 330462306a36Sopenharmony_ci 330562306a36Sopenharmony_ci batchcount = ac->batchcount; 330662306a36Sopenharmony_ci 330762306a36Sopenharmony_ci check_irq_off(); 330862306a36Sopenharmony_ci n = get_node(cachep, node); 330962306a36Sopenharmony_ci raw_spin_lock(&n->list_lock); 331062306a36Sopenharmony_ci if (n->shared) { 331162306a36Sopenharmony_ci struct array_cache *shared_array = n->shared; 331262306a36Sopenharmony_ci int max = shared_array->limit - shared_array->avail; 331362306a36Sopenharmony_ci if (max) { 331462306a36Sopenharmony_ci if (batchcount > max) 331562306a36Sopenharmony_ci batchcount = max; 331662306a36Sopenharmony_ci memcpy(&(shared_array->entry[shared_array->avail]), 331762306a36Sopenharmony_ci ac->entry, sizeof(void *) * batchcount); 331862306a36Sopenharmony_ci shared_array->avail += batchcount; 331962306a36Sopenharmony_ci goto free_done; 332062306a36Sopenharmony_ci } 332162306a36Sopenharmony_ci } 332262306a36Sopenharmony_ci 332362306a36Sopenharmony_ci free_block(cachep, ac->entry, batchcount, node, &list); 332462306a36Sopenharmony_cifree_done: 332562306a36Sopenharmony_ci#if STATS 332662306a36Sopenharmony_ci { 332762306a36Sopenharmony_ci int i = 0; 332862306a36Sopenharmony_ci struct slab *slab; 332962306a36Sopenharmony_ci 333062306a36Sopenharmony_ci list_for_each_entry(slab, &n->slabs_free, slab_list) { 333162306a36Sopenharmony_ci BUG_ON(slab->active); 333262306a36Sopenharmony_ci 333362306a36Sopenharmony_ci i++; 333462306a36Sopenharmony_ci } 333562306a36Sopenharmony_ci STATS_SET_FREEABLE(cachep, i); 333662306a36Sopenharmony_ci } 333762306a36Sopenharmony_ci#endif 333862306a36Sopenharmony_ci raw_spin_unlock(&n->list_lock); 333962306a36Sopenharmony_ci ac->avail -= batchcount; 334062306a36Sopenharmony_ci memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); 334162306a36Sopenharmony_ci slabs_destroy(cachep, &list); 334262306a36Sopenharmony_ci} 334362306a36Sopenharmony_ci 334462306a36Sopenharmony_ci/* 334562306a36Sopenharmony_ci * Release an obj back to its cache. If the obj has a constructed state, it must 334662306a36Sopenharmony_ci * be in this state _before_ it is released. Called with disabled ints. 334762306a36Sopenharmony_ci */ 334862306a36Sopenharmony_cistatic __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, 334962306a36Sopenharmony_ci unsigned long caller) 335062306a36Sopenharmony_ci{ 335162306a36Sopenharmony_ci bool init; 335262306a36Sopenharmony_ci 335362306a36Sopenharmony_ci memcg_slab_free_hook(cachep, virt_to_slab(objp), &objp, 1); 335462306a36Sopenharmony_ci 335562306a36Sopenharmony_ci if (is_kfence_address(objp)) { 335662306a36Sopenharmony_ci kmemleak_free_recursive(objp, cachep->flags); 335762306a36Sopenharmony_ci __kfence_free(objp); 335862306a36Sopenharmony_ci return; 335962306a36Sopenharmony_ci } 336062306a36Sopenharmony_ci 336162306a36Sopenharmony_ci /* 336262306a36Sopenharmony_ci * As memory initialization might be integrated into KASAN, 336362306a36Sopenharmony_ci * kasan_slab_free and initialization memset must be 336462306a36Sopenharmony_ci * kept together to avoid discrepancies in behavior. 336562306a36Sopenharmony_ci */ 336662306a36Sopenharmony_ci init = slab_want_init_on_free(cachep); 336762306a36Sopenharmony_ci if (init && !kasan_has_integrated_init()) 336862306a36Sopenharmony_ci memset(objp, 0, cachep->object_size); 336962306a36Sopenharmony_ci /* KASAN might put objp into memory quarantine, delaying its reuse. */ 337062306a36Sopenharmony_ci if (kasan_slab_free(cachep, objp, init)) 337162306a36Sopenharmony_ci return; 337262306a36Sopenharmony_ci 337362306a36Sopenharmony_ci /* Use KCSAN to help debug racy use-after-free. */ 337462306a36Sopenharmony_ci if (!(cachep->flags & SLAB_TYPESAFE_BY_RCU)) 337562306a36Sopenharmony_ci __kcsan_check_access(objp, cachep->object_size, 337662306a36Sopenharmony_ci KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); 337762306a36Sopenharmony_ci 337862306a36Sopenharmony_ci ___cache_free(cachep, objp, caller); 337962306a36Sopenharmony_ci} 338062306a36Sopenharmony_ci 338162306a36Sopenharmony_civoid ___cache_free(struct kmem_cache *cachep, void *objp, 338262306a36Sopenharmony_ci unsigned long caller) 338362306a36Sopenharmony_ci{ 338462306a36Sopenharmony_ci struct array_cache *ac = cpu_cache_get(cachep); 338562306a36Sopenharmony_ci 338662306a36Sopenharmony_ci check_irq_off(); 338762306a36Sopenharmony_ci kmemleak_free_recursive(objp, cachep->flags); 338862306a36Sopenharmony_ci objp = cache_free_debugcheck(cachep, objp, caller); 338962306a36Sopenharmony_ci 339062306a36Sopenharmony_ci /* 339162306a36Sopenharmony_ci * Skip calling cache_free_alien() when the platform is not numa. 339262306a36Sopenharmony_ci * This will avoid cache misses that happen while accessing slabp (which 339362306a36Sopenharmony_ci * is per page memory reference) to get nodeid. Instead use a global 339462306a36Sopenharmony_ci * variable to skip the call, which is mostly likely to be present in 339562306a36Sopenharmony_ci * the cache. 339662306a36Sopenharmony_ci */ 339762306a36Sopenharmony_ci if (nr_online_nodes > 1 && cache_free_alien(cachep, objp)) 339862306a36Sopenharmony_ci return; 339962306a36Sopenharmony_ci 340062306a36Sopenharmony_ci if (ac->avail < ac->limit) { 340162306a36Sopenharmony_ci STATS_INC_FREEHIT(cachep); 340262306a36Sopenharmony_ci } else { 340362306a36Sopenharmony_ci STATS_INC_FREEMISS(cachep); 340462306a36Sopenharmony_ci cache_flusharray(cachep, ac); 340562306a36Sopenharmony_ci } 340662306a36Sopenharmony_ci 340762306a36Sopenharmony_ci if (sk_memalloc_socks()) { 340862306a36Sopenharmony_ci struct slab *slab = virt_to_slab(objp); 340962306a36Sopenharmony_ci 341062306a36Sopenharmony_ci if (unlikely(slab_test_pfmemalloc(slab))) { 341162306a36Sopenharmony_ci cache_free_pfmemalloc(cachep, slab, objp); 341262306a36Sopenharmony_ci return; 341362306a36Sopenharmony_ci } 341462306a36Sopenharmony_ci } 341562306a36Sopenharmony_ci 341662306a36Sopenharmony_ci __free_one(ac, objp); 341762306a36Sopenharmony_ci} 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_cistatic __always_inline 342062306a36Sopenharmony_civoid *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, 342162306a36Sopenharmony_ci gfp_t flags) 342262306a36Sopenharmony_ci{ 342362306a36Sopenharmony_ci void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_); 342462306a36Sopenharmony_ci 342562306a36Sopenharmony_ci trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, NUMA_NO_NODE); 342662306a36Sopenharmony_ci 342762306a36Sopenharmony_ci return ret; 342862306a36Sopenharmony_ci} 342962306a36Sopenharmony_ci 343062306a36Sopenharmony_civoid *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 343162306a36Sopenharmony_ci{ 343262306a36Sopenharmony_ci return __kmem_cache_alloc_lru(cachep, NULL, flags); 343362306a36Sopenharmony_ci} 343462306a36Sopenharmony_ciEXPORT_SYMBOL(kmem_cache_alloc); 343562306a36Sopenharmony_ci 343662306a36Sopenharmony_civoid *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, 343762306a36Sopenharmony_ci gfp_t flags) 343862306a36Sopenharmony_ci{ 343962306a36Sopenharmony_ci return __kmem_cache_alloc_lru(cachep, lru, flags); 344062306a36Sopenharmony_ci} 344162306a36Sopenharmony_ciEXPORT_SYMBOL(kmem_cache_alloc_lru); 344262306a36Sopenharmony_ci 344362306a36Sopenharmony_cistatic __always_inline void 344462306a36Sopenharmony_cicache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags, 344562306a36Sopenharmony_ci size_t size, void **p, unsigned long caller) 344662306a36Sopenharmony_ci{ 344762306a36Sopenharmony_ci size_t i; 344862306a36Sopenharmony_ci 344962306a36Sopenharmony_ci for (i = 0; i < size; i++) 345062306a36Sopenharmony_ci p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller); 345162306a36Sopenharmony_ci} 345262306a36Sopenharmony_ci 345362306a36Sopenharmony_ciint kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, 345462306a36Sopenharmony_ci void **p) 345562306a36Sopenharmony_ci{ 345662306a36Sopenharmony_ci struct obj_cgroup *objcg = NULL; 345762306a36Sopenharmony_ci unsigned long irqflags; 345862306a36Sopenharmony_ci size_t i; 345962306a36Sopenharmony_ci 346062306a36Sopenharmony_ci s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags); 346162306a36Sopenharmony_ci if (!s) 346262306a36Sopenharmony_ci return 0; 346362306a36Sopenharmony_ci 346462306a36Sopenharmony_ci local_irq_save(irqflags); 346562306a36Sopenharmony_ci for (i = 0; i < size; i++) { 346662306a36Sopenharmony_ci void *objp = kfence_alloc(s, s->object_size, flags) ?: 346762306a36Sopenharmony_ci __do_cache_alloc(s, flags, NUMA_NO_NODE); 346862306a36Sopenharmony_ci 346962306a36Sopenharmony_ci if (unlikely(!objp)) 347062306a36Sopenharmony_ci goto error; 347162306a36Sopenharmony_ci p[i] = objp; 347262306a36Sopenharmony_ci } 347362306a36Sopenharmony_ci local_irq_restore(irqflags); 347462306a36Sopenharmony_ci 347562306a36Sopenharmony_ci cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_); 347662306a36Sopenharmony_ci 347762306a36Sopenharmony_ci /* 347862306a36Sopenharmony_ci * memcg and kmem_cache debug support and memory initialization. 347962306a36Sopenharmony_ci * Done outside of the IRQ disabled section. 348062306a36Sopenharmony_ci */ 348162306a36Sopenharmony_ci slab_post_alloc_hook(s, objcg, flags, size, p, 348262306a36Sopenharmony_ci slab_want_init_on_alloc(flags, s), s->object_size); 348362306a36Sopenharmony_ci /* FIXME: Trace call missing. Christoph would like a bulk variant */ 348462306a36Sopenharmony_ci return size; 348562306a36Sopenharmony_cierror: 348662306a36Sopenharmony_ci local_irq_restore(irqflags); 348762306a36Sopenharmony_ci cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_); 348862306a36Sopenharmony_ci slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size); 348962306a36Sopenharmony_ci kmem_cache_free_bulk(s, i, p); 349062306a36Sopenharmony_ci return 0; 349162306a36Sopenharmony_ci} 349262306a36Sopenharmony_ciEXPORT_SYMBOL(kmem_cache_alloc_bulk); 349362306a36Sopenharmony_ci 349462306a36Sopenharmony_ci/** 349562306a36Sopenharmony_ci * kmem_cache_alloc_node - Allocate an object on the specified node 349662306a36Sopenharmony_ci * @cachep: The cache to allocate from. 349762306a36Sopenharmony_ci * @flags: See kmalloc(). 349862306a36Sopenharmony_ci * @nodeid: node number of the target node. 349962306a36Sopenharmony_ci * 350062306a36Sopenharmony_ci * Identical to kmem_cache_alloc but it will allocate memory on the given 350162306a36Sopenharmony_ci * node, which can improve the performance for cpu bound structures. 350262306a36Sopenharmony_ci * 350362306a36Sopenharmony_ci * Fallback to other node is possible if __GFP_THISNODE is not set. 350462306a36Sopenharmony_ci * 350562306a36Sopenharmony_ci * Return: pointer to the new object or %NULL in case of error 350662306a36Sopenharmony_ci */ 350762306a36Sopenharmony_civoid *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 350862306a36Sopenharmony_ci{ 350962306a36Sopenharmony_ci void *ret = slab_alloc_node(cachep, NULL, flags, nodeid, cachep->object_size, _RET_IP_); 351062306a36Sopenharmony_ci 351162306a36Sopenharmony_ci trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, nodeid); 351262306a36Sopenharmony_ci 351362306a36Sopenharmony_ci return ret; 351462306a36Sopenharmony_ci} 351562306a36Sopenharmony_ciEXPORT_SYMBOL(kmem_cache_alloc_node); 351662306a36Sopenharmony_ci 351762306a36Sopenharmony_civoid *__kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 351862306a36Sopenharmony_ci int nodeid, size_t orig_size, 351962306a36Sopenharmony_ci unsigned long caller) 352062306a36Sopenharmony_ci{ 352162306a36Sopenharmony_ci return slab_alloc_node(cachep, NULL, flags, nodeid, 352262306a36Sopenharmony_ci orig_size, caller); 352362306a36Sopenharmony_ci} 352462306a36Sopenharmony_ci 352562306a36Sopenharmony_ci#ifdef CONFIG_PRINTK 352662306a36Sopenharmony_civoid __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) 352762306a36Sopenharmony_ci{ 352862306a36Sopenharmony_ci struct kmem_cache *cachep; 352962306a36Sopenharmony_ci unsigned int objnr; 353062306a36Sopenharmony_ci void *objp; 353162306a36Sopenharmony_ci 353262306a36Sopenharmony_ci kpp->kp_ptr = object; 353362306a36Sopenharmony_ci kpp->kp_slab = slab; 353462306a36Sopenharmony_ci cachep = slab->slab_cache; 353562306a36Sopenharmony_ci kpp->kp_slab_cache = cachep; 353662306a36Sopenharmony_ci objp = object - obj_offset(cachep); 353762306a36Sopenharmony_ci kpp->kp_data_offset = obj_offset(cachep); 353862306a36Sopenharmony_ci slab = virt_to_slab(objp); 353962306a36Sopenharmony_ci objnr = obj_to_index(cachep, slab, objp); 354062306a36Sopenharmony_ci objp = index_to_obj(cachep, slab, objnr); 354162306a36Sopenharmony_ci kpp->kp_objp = objp; 354262306a36Sopenharmony_ci if (DEBUG && cachep->flags & SLAB_STORE_USER) 354362306a36Sopenharmony_ci kpp->kp_ret = *dbg_userword(cachep, objp); 354462306a36Sopenharmony_ci} 354562306a36Sopenharmony_ci#endif 354662306a36Sopenharmony_ci 354762306a36Sopenharmony_cistatic __always_inline 354862306a36Sopenharmony_civoid __do_kmem_cache_free(struct kmem_cache *cachep, void *objp, 354962306a36Sopenharmony_ci unsigned long caller) 355062306a36Sopenharmony_ci{ 355162306a36Sopenharmony_ci unsigned long flags; 355262306a36Sopenharmony_ci 355362306a36Sopenharmony_ci local_irq_save(flags); 355462306a36Sopenharmony_ci debug_check_no_locks_freed(objp, cachep->object_size); 355562306a36Sopenharmony_ci if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) 355662306a36Sopenharmony_ci debug_check_no_obj_freed(objp, cachep->object_size); 355762306a36Sopenharmony_ci __cache_free(cachep, objp, caller); 355862306a36Sopenharmony_ci local_irq_restore(flags); 355962306a36Sopenharmony_ci} 356062306a36Sopenharmony_ci 356162306a36Sopenharmony_civoid __kmem_cache_free(struct kmem_cache *cachep, void *objp, 356262306a36Sopenharmony_ci unsigned long caller) 356362306a36Sopenharmony_ci{ 356462306a36Sopenharmony_ci __do_kmem_cache_free(cachep, objp, caller); 356562306a36Sopenharmony_ci} 356662306a36Sopenharmony_ci 356762306a36Sopenharmony_ci/** 356862306a36Sopenharmony_ci * kmem_cache_free - Deallocate an object 356962306a36Sopenharmony_ci * @cachep: The cache the allocation was from. 357062306a36Sopenharmony_ci * @objp: The previously allocated object. 357162306a36Sopenharmony_ci * 357262306a36Sopenharmony_ci * Free an object which was previously allocated from this 357362306a36Sopenharmony_ci * cache. 357462306a36Sopenharmony_ci */ 357562306a36Sopenharmony_civoid kmem_cache_free(struct kmem_cache *cachep, void *objp) 357662306a36Sopenharmony_ci{ 357762306a36Sopenharmony_ci cachep = cache_from_obj(cachep, objp); 357862306a36Sopenharmony_ci if (!cachep) 357962306a36Sopenharmony_ci return; 358062306a36Sopenharmony_ci 358162306a36Sopenharmony_ci trace_kmem_cache_free(_RET_IP_, objp, cachep); 358262306a36Sopenharmony_ci __do_kmem_cache_free(cachep, objp, _RET_IP_); 358362306a36Sopenharmony_ci} 358462306a36Sopenharmony_ciEXPORT_SYMBOL(kmem_cache_free); 358562306a36Sopenharmony_ci 358662306a36Sopenharmony_civoid kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) 358762306a36Sopenharmony_ci{ 358862306a36Sopenharmony_ci unsigned long flags; 358962306a36Sopenharmony_ci 359062306a36Sopenharmony_ci local_irq_save(flags); 359162306a36Sopenharmony_ci for (int i = 0; i < size; i++) { 359262306a36Sopenharmony_ci void *objp = p[i]; 359362306a36Sopenharmony_ci struct kmem_cache *s; 359462306a36Sopenharmony_ci 359562306a36Sopenharmony_ci if (!orig_s) { 359662306a36Sopenharmony_ci struct folio *folio = virt_to_folio(objp); 359762306a36Sopenharmony_ci 359862306a36Sopenharmony_ci /* called via kfree_bulk */ 359962306a36Sopenharmony_ci if (!folio_test_slab(folio)) { 360062306a36Sopenharmony_ci local_irq_restore(flags); 360162306a36Sopenharmony_ci free_large_kmalloc(folio, objp); 360262306a36Sopenharmony_ci local_irq_save(flags); 360362306a36Sopenharmony_ci continue; 360462306a36Sopenharmony_ci } 360562306a36Sopenharmony_ci s = folio_slab(folio)->slab_cache; 360662306a36Sopenharmony_ci } else { 360762306a36Sopenharmony_ci s = cache_from_obj(orig_s, objp); 360862306a36Sopenharmony_ci } 360962306a36Sopenharmony_ci 361062306a36Sopenharmony_ci if (!s) 361162306a36Sopenharmony_ci continue; 361262306a36Sopenharmony_ci 361362306a36Sopenharmony_ci debug_check_no_locks_freed(objp, s->object_size); 361462306a36Sopenharmony_ci if (!(s->flags & SLAB_DEBUG_OBJECTS)) 361562306a36Sopenharmony_ci debug_check_no_obj_freed(objp, s->object_size); 361662306a36Sopenharmony_ci 361762306a36Sopenharmony_ci __cache_free(s, objp, _RET_IP_); 361862306a36Sopenharmony_ci } 361962306a36Sopenharmony_ci local_irq_restore(flags); 362062306a36Sopenharmony_ci 362162306a36Sopenharmony_ci /* FIXME: add tracing */ 362262306a36Sopenharmony_ci} 362362306a36Sopenharmony_ciEXPORT_SYMBOL(kmem_cache_free_bulk); 362462306a36Sopenharmony_ci 362562306a36Sopenharmony_ci/* 362662306a36Sopenharmony_ci * This initializes kmem_cache_node or resizes various caches for all nodes. 362762306a36Sopenharmony_ci */ 362862306a36Sopenharmony_cistatic int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp) 362962306a36Sopenharmony_ci{ 363062306a36Sopenharmony_ci int ret; 363162306a36Sopenharmony_ci int node; 363262306a36Sopenharmony_ci struct kmem_cache_node *n; 363362306a36Sopenharmony_ci 363462306a36Sopenharmony_ci for_each_online_node(node) { 363562306a36Sopenharmony_ci ret = setup_kmem_cache_node(cachep, node, gfp, true); 363662306a36Sopenharmony_ci if (ret) 363762306a36Sopenharmony_ci goto fail; 363862306a36Sopenharmony_ci 363962306a36Sopenharmony_ci } 364062306a36Sopenharmony_ci 364162306a36Sopenharmony_ci return 0; 364262306a36Sopenharmony_ci 364362306a36Sopenharmony_cifail: 364462306a36Sopenharmony_ci if (!cachep->list.next) { 364562306a36Sopenharmony_ci /* Cache is not active yet. Roll back what we did */ 364662306a36Sopenharmony_ci node--; 364762306a36Sopenharmony_ci while (node >= 0) { 364862306a36Sopenharmony_ci n = get_node(cachep, node); 364962306a36Sopenharmony_ci if (n) { 365062306a36Sopenharmony_ci kfree(n->shared); 365162306a36Sopenharmony_ci free_alien_cache(n->alien); 365262306a36Sopenharmony_ci kfree(n); 365362306a36Sopenharmony_ci cachep->node[node] = NULL; 365462306a36Sopenharmony_ci } 365562306a36Sopenharmony_ci node--; 365662306a36Sopenharmony_ci } 365762306a36Sopenharmony_ci } 365862306a36Sopenharmony_ci return -ENOMEM; 365962306a36Sopenharmony_ci} 366062306a36Sopenharmony_ci 366162306a36Sopenharmony_ci/* Always called with the slab_mutex held */ 366262306a36Sopenharmony_cistatic int do_tune_cpucache(struct kmem_cache *cachep, int limit, 366362306a36Sopenharmony_ci int batchcount, int shared, gfp_t gfp) 366462306a36Sopenharmony_ci{ 366562306a36Sopenharmony_ci struct array_cache __percpu *cpu_cache, *prev; 366662306a36Sopenharmony_ci int cpu; 366762306a36Sopenharmony_ci 366862306a36Sopenharmony_ci cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount); 366962306a36Sopenharmony_ci if (!cpu_cache) 367062306a36Sopenharmony_ci return -ENOMEM; 367162306a36Sopenharmony_ci 367262306a36Sopenharmony_ci prev = cachep->cpu_cache; 367362306a36Sopenharmony_ci cachep->cpu_cache = cpu_cache; 367462306a36Sopenharmony_ci /* 367562306a36Sopenharmony_ci * Without a previous cpu_cache there's no need to synchronize remote 367662306a36Sopenharmony_ci * cpus, so skip the IPIs. 367762306a36Sopenharmony_ci */ 367862306a36Sopenharmony_ci if (prev) 367962306a36Sopenharmony_ci kick_all_cpus_sync(); 368062306a36Sopenharmony_ci 368162306a36Sopenharmony_ci check_irq_on(); 368262306a36Sopenharmony_ci cachep->batchcount = batchcount; 368362306a36Sopenharmony_ci cachep->limit = limit; 368462306a36Sopenharmony_ci cachep->shared = shared; 368562306a36Sopenharmony_ci 368662306a36Sopenharmony_ci if (!prev) 368762306a36Sopenharmony_ci goto setup_node; 368862306a36Sopenharmony_ci 368962306a36Sopenharmony_ci for_each_online_cpu(cpu) { 369062306a36Sopenharmony_ci LIST_HEAD(list); 369162306a36Sopenharmony_ci int node; 369262306a36Sopenharmony_ci struct kmem_cache_node *n; 369362306a36Sopenharmony_ci struct array_cache *ac = per_cpu_ptr(prev, cpu); 369462306a36Sopenharmony_ci 369562306a36Sopenharmony_ci node = cpu_to_mem(cpu); 369662306a36Sopenharmony_ci n = get_node(cachep, node); 369762306a36Sopenharmony_ci raw_spin_lock_irq(&n->list_lock); 369862306a36Sopenharmony_ci free_block(cachep, ac->entry, ac->avail, node, &list); 369962306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 370062306a36Sopenharmony_ci slabs_destroy(cachep, &list); 370162306a36Sopenharmony_ci } 370262306a36Sopenharmony_ci free_percpu(prev); 370362306a36Sopenharmony_ci 370462306a36Sopenharmony_cisetup_node: 370562306a36Sopenharmony_ci return setup_kmem_cache_nodes(cachep, gfp); 370662306a36Sopenharmony_ci} 370762306a36Sopenharmony_ci 370862306a36Sopenharmony_ci/* Called with slab_mutex held always */ 370962306a36Sopenharmony_cistatic int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) 371062306a36Sopenharmony_ci{ 371162306a36Sopenharmony_ci int err; 371262306a36Sopenharmony_ci int limit = 0; 371362306a36Sopenharmony_ci int shared = 0; 371462306a36Sopenharmony_ci int batchcount = 0; 371562306a36Sopenharmony_ci 371662306a36Sopenharmony_ci err = cache_random_seq_create(cachep, cachep->num, gfp); 371762306a36Sopenharmony_ci if (err) 371862306a36Sopenharmony_ci goto end; 371962306a36Sopenharmony_ci 372062306a36Sopenharmony_ci /* 372162306a36Sopenharmony_ci * The head array serves three purposes: 372262306a36Sopenharmony_ci * - create a LIFO ordering, i.e. return objects that are cache-warm 372362306a36Sopenharmony_ci * - reduce the number of spinlock operations. 372462306a36Sopenharmony_ci * - reduce the number of linked list operations on the slab and 372562306a36Sopenharmony_ci * bufctl chains: array operations are cheaper. 372662306a36Sopenharmony_ci * The numbers are guessed, we should auto-tune as described by 372762306a36Sopenharmony_ci * Bonwick. 372862306a36Sopenharmony_ci */ 372962306a36Sopenharmony_ci if (cachep->size > 131072) 373062306a36Sopenharmony_ci limit = 1; 373162306a36Sopenharmony_ci else if (cachep->size > PAGE_SIZE) 373262306a36Sopenharmony_ci limit = 8; 373362306a36Sopenharmony_ci else if (cachep->size > 1024) 373462306a36Sopenharmony_ci limit = 24; 373562306a36Sopenharmony_ci else if (cachep->size > 256) 373662306a36Sopenharmony_ci limit = 54; 373762306a36Sopenharmony_ci else 373862306a36Sopenharmony_ci limit = 120; 373962306a36Sopenharmony_ci 374062306a36Sopenharmony_ci /* 374162306a36Sopenharmony_ci * CPU bound tasks (e.g. network routing) can exhibit cpu bound 374262306a36Sopenharmony_ci * allocation behaviour: Most allocs on one cpu, most free operations 374362306a36Sopenharmony_ci * on another cpu. For these cases, an efficient object passing between 374462306a36Sopenharmony_ci * cpus is necessary. This is provided by a shared array. The array 374562306a36Sopenharmony_ci * replaces Bonwick's magazine layer. 374662306a36Sopenharmony_ci * On uniprocessor, it's functionally equivalent (but less efficient) 374762306a36Sopenharmony_ci * to a larger limit. Thus disabled by default. 374862306a36Sopenharmony_ci */ 374962306a36Sopenharmony_ci shared = 0; 375062306a36Sopenharmony_ci if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1) 375162306a36Sopenharmony_ci shared = 8; 375262306a36Sopenharmony_ci 375362306a36Sopenharmony_ci#if DEBUG 375462306a36Sopenharmony_ci /* 375562306a36Sopenharmony_ci * With debugging enabled, large batchcount lead to excessively long 375662306a36Sopenharmony_ci * periods with disabled local interrupts. Limit the batchcount 375762306a36Sopenharmony_ci */ 375862306a36Sopenharmony_ci if (limit > 32) 375962306a36Sopenharmony_ci limit = 32; 376062306a36Sopenharmony_ci#endif 376162306a36Sopenharmony_ci batchcount = (limit + 1) / 2; 376262306a36Sopenharmony_ci err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp); 376362306a36Sopenharmony_ciend: 376462306a36Sopenharmony_ci if (err) 376562306a36Sopenharmony_ci pr_err("enable_cpucache failed for %s, error %d\n", 376662306a36Sopenharmony_ci cachep->name, -err); 376762306a36Sopenharmony_ci return err; 376862306a36Sopenharmony_ci} 376962306a36Sopenharmony_ci 377062306a36Sopenharmony_ci/* 377162306a36Sopenharmony_ci * Drain an array if it contains any elements taking the node lock only if 377262306a36Sopenharmony_ci * necessary. Note that the node listlock also protects the array_cache 377362306a36Sopenharmony_ci * if drain_array() is used on the shared array. 377462306a36Sopenharmony_ci */ 377562306a36Sopenharmony_cistatic void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, 377662306a36Sopenharmony_ci struct array_cache *ac, int node) 377762306a36Sopenharmony_ci{ 377862306a36Sopenharmony_ci LIST_HEAD(list); 377962306a36Sopenharmony_ci 378062306a36Sopenharmony_ci /* ac from n->shared can be freed if we don't hold the slab_mutex. */ 378162306a36Sopenharmony_ci check_mutex_acquired(); 378262306a36Sopenharmony_ci 378362306a36Sopenharmony_ci if (!ac || !ac->avail) 378462306a36Sopenharmony_ci return; 378562306a36Sopenharmony_ci 378662306a36Sopenharmony_ci if (ac->touched) { 378762306a36Sopenharmony_ci ac->touched = 0; 378862306a36Sopenharmony_ci return; 378962306a36Sopenharmony_ci } 379062306a36Sopenharmony_ci 379162306a36Sopenharmony_ci raw_spin_lock_irq(&n->list_lock); 379262306a36Sopenharmony_ci drain_array_locked(cachep, ac, node, false, &list); 379362306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 379462306a36Sopenharmony_ci 379562306a36Sopenharmony_ci slabs_destroy(cachep, &list); 379662306a36Sopenharmony_ci} 379762306a36Sopenharmony_ci 379862306a36Sopenharmony_ci/** 379962306a36Sopenharmony_ci * cache_reap - Reclaim memory from caches. 380062306a36Sopenharmony_ci * @w: work descriptor 380162306a36Sopenharmony_ci * 380262306a36Sopenharmony_ci * Called from workqueue/eventd every few seconds. 380362306a36Sopenharmony_ci * Purpose: 380462306a36Sopenharmony_ci * - clear the per-cpu caches for this CPU. 380562306a36Sopenharmony_ci * - return freeable pages to the main free memory pool. 380662306a36Sopenharmony_ci * 380762306a36Sopenharmony_ci * If we cannot acquire the cache chain mutex then just give up - we'll try 380862306a36Sopenharmony_ci * again on the next iteration. 380962306a36Sopenharmony_ci */ 381062306a36Sopenharmony_cistatic void cache_reap(struct work_struct *w) 381162306a36Sopenharmony_ci{ 381262306a36Sopenharmony_ci struct kmem_cache *searchp; 381362306a36Sopenharmony_ci struct kmem_cache_node *n; 381462306a36Sopenharmony_ci int node = numa_mem_id(); 381562306a36Sopenharmony_ci struct delayed_work *work = to_delayed_work(w); 381662306a36Sopenharmony_ci 381762306a36Sopenharmony_ci if (!mutex_trylock(&slab_mutex)) 381862306a36Sopenharmony_ci /* Give up. Setup the next iteration. */ 381962306a36Sopenharmony_ci goto out; 382062306a36Sopenharmony_ci 382162306a36Sopenharmony_ci list_for_each_entry(searchp, &slab_caches, list) { 382262306a36Sopenharmony_ci check_irq_on(); 382362306a36Sopenharmony_ci 382462306a36Sopenharmony_ci /* 382562306a36Sopenharmony_ci * We only take the node lock if absolutely necessary and we 382662306a36Sopenharmony_ci * have established with reasonable certainty that 382762306a36Sopenharmony_ci * we can do some work if the lock was obtained. 382862306a36Sopenharmony_ci */ 382962306a36Sopenharmony_ci n = get_node(searchp, node); 383062306a36Sopenharmony_ci 383162306a36Sopenharmony_ci reap_alien(searchp, n); 383262306a36Sopenharmony_ci 383362306a36Sopenharmony_ci drain_array(searchp, n, cpu_cache_get(searchp), node); 383462306a36Sopenharmony_ci 383562306a36Sopenharmony_ci /* 383662306a36Sopenharmony_ci * These are racy checks but it does not matter 383762306a36Sopenharmony_ci * if we skip one check or scan twice. 383862306a36Sopenharmony_ci */ 383962306a36Sopenharmony_ci if (time_after(n->next_reap, jiffies)) 384062306a36Sopenharmony_ci goto next; 384162306a36Sopenharmony_ci 384262306a36Sopenharmony_ci n->next_reap = jiffies + REAPTIMEOUT_NODE; 384362306a36Sopenharmony_ci 384462306a36Sopenharmony_ci drain_array(searchp, n, n->shared, node); 384562306a36Sopenharmony_ci 384662306a36Sopenharmony_ci if (n->free_touched) 384762306a36Sopenharmony_ci n->free_touched = 0; 384862306a36Sopenharmony_ci else { 384962306a36Sopenharmony_ci int freed; 385062306a36Sopenharmony_ci 385162306a36Sopenharmony_ci freed = drain_freelist(searchp, n, (n->free_limit + 385262306a36Sopenharmony_ci 5 * searchp->num - 1) / (5 * searchp->num)); 385362306a36Sopenharmony_ci STATS_ADD_REAPED(searchp, freed); 385462306a36Sopenharmony_ci } 385562306a36Sopenharmony_cinext: 385662306a36Sopenharmony_ci cond_resched(); 385762306a36Sopenharmony_ci } 385862306a36Sopenharmony_ci check_irq_on(); 385962306a36Sopenharmony_ci mutex_unlock(&slab_mutex); 386062306a36Sopenharmony_ci next_reap_node(); 386162306a36Sopenharmony_ciout: 386262306a36Sopenharmony_ci /* Set up the next iteration */ 386362306a36Sopenharmony_ci schedule_delayed_work_on(smp_processor_id(), work, 386462306a36Sopenharmony_ci round_jiffies_relative(REAPTIMEOUT_AC)); 386562306a36Sopenharmony_ci} 386662306a36Sopenharmony_ci 386762306a36Sopenharmony_civoid get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) 386862306a36Sopenharmony_ci{ 386962306a36Sopenharmony_ci unsigned long active_objs, num_objs, active_slabs; 387062306a36Sopenharmony_ci unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0; 387162306a36Sopenharmony_ci unsigned long free_slabs = 0; 387262306a36Sopenharmony_ci int node; 387362306a36Sopenharmony_ci struct kmem_cache_node *n; 387462306a36Sopenharmony_ci 387562306a36Sopenharmony_ci for_each_kmem_cache_node(cachep, node, n) { 387662306a36Sopenharmony_ci check_irq_on(); 387762306a36Sopenharmony_ci raw_spin_lock_irq(&n->list_lock); 387862306a36Sopenharmony_ci 387962306a36Sopenharmony_ci total_slabs += n->total_slabs; 388062306a36Sopenharmony_ci free_slabs += n->free_slabs; 388162306a36Sopenharmony_ci free_objs += n->free_objects; 388262306a36Sopenharmony_ci 388362306a36Sopenharmony_ci if (n->shared) 388462306a36Sopenharmony_ci shared_avail += n->shared->avail; 388562306a36Sopenharmony_ci 388662306a36Sopenharmony_ci raw_spin_unlock_irq(&n->list_lock); 388762306a36Sopenharmony_ci } 388862306a36Sopenharmony_ci num_objs = total_slabs * cachep->num; 388962306a36Sopenharmony_ci active_slabs = total_slabs - free_slabs; 389062306a36Sopenharmony_ci active_objs = num_objs - free_objs; 389162306a36Sopenharmony_ci 389262306a36Sopenharmony_ci sinfo->active_objs = active_objs; 389362306a36Sopenharmony_ci sinfo->num_objs = num_objs; 389462306a36Sopenharmony_ci sinfo->active_slabs = active_slabs; 389562306a36Sopenharmony_ci sinfo->num_slabs = total_slabs; 389662306a36Sopenharmony_ci sinfo->shared_avail = shared_avail; 389762306a36Sopenharmony_ci sinfo->limit = cachep->limit; 389862306a36Sopenharmony_ci sinfo->batchcount = cachep->batchcount; 389962306a36Sopenharmony_ci sinfo->shared = cachep->shared; 390062306a36Sopenharmony_ci sinfo->objects_per_slab = cachep->num; 390162306a36Sopenharmony_ci sinfo->cache_order = cachep->gfporder; 390262306a36Sopenharmony_ci} 390362306a36Sopenharmony_ci 390462306a36Sopenharmony_civoid slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep) 390562306a36Sopenharmony_ci{ 390662306a36Sopenharmony_ci#if STATS 390762306a36Sopenharmony_ci { /* node stats */ 390862306a36Sopenharmony_ci unsigned long high = cachep->high_mark; 390962306a36Sopenharmony_ci unsigned long allocs = cachep->num_allocations; 391062306a36Sopenharmony_ci unsigned long grown = cachep->grown; 391162306a36Sopenharmony_ci unsigned long reaped = cachep->reaped; 391262306a36Sopenharmony_ci unsigned long errors = cachep->errors; 391362306a36Sopenharmony_ci unsigned long max_freeable = cachep->max_freeable; 391462306a36Sopenharmony_ci unsigned long node_allocs = cachep->node_allocs; 391562306a36Sopenharmony_ci unsigned long node_frees = cachep->node_frees; 391662306a36Sopenharmony_ci unsigned long overflows = cachep->node_overflow; 391762306a36Sopenharmony_ci 391862306a36Sopenharmony_ci seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu %4lu", 391962306a36Sopenharmony_ci allocs, high, grown, 392062306a36Sopenharmony_ci reaped, errors, max_freeable, node_allocs, 392162306a36Sopenharmony_ci node_frees, overflows); 392262306a36Sopenharmony_ci } 392362306a36Sopenharmony_ci /* cpu stats */ 392462306a36Sopenharmony_ci { 392562306a36Sopenharmony_ci unsigned long allochit = atomic_read(&cachep->allochit); 392662306a36Sopenharmony_ci unsigned long allocmiss = atomic_read(&cachep->allocmiss); 392762306a36Sopenharmony_ci unsigned long freehit = atomic_read(&cachep->freehit); 392862306a36Sopenharmony_ci unsigned long freemiss = atomic_read(&cachep->freemiss); 392962306a36Sopenharmony_ci 393062306a36Sopenharmony_ci seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", 393162306a36Sopenharmony_ci allochit, allocmiss, freehit, freemiss); 393262306a36Sopenharmony_ci } 393362306a36Sopenharmony_ci#endif 393462306a36Sopenharmony_ci} 393562306a36Sopenharmony_ci 393662306a36Sopenharmony_ci#define MAX_SLABINFO_WRITE 128 393762306a36Sopenharmony_ci/** 393862306a36Sopenharmony_ci * slabinfo_write - Tuning for the slab allocator 393962306a36Sopenharmony_ci * @file: unused 394062306a36Sopenharmony_ci * @buffer: user buffer 394162306a36Sopenharmony_ci * @count: data length 394262306a36Sopenharmony_ci * @ppos: unused 394362306a36Sopenharmony_ci * 394462306a36Sopenharmony_ci * Return: %0 on success, negative error code otherwise. 394562306a36Sopenharmony_ci */ 394662306a36Sopenharmony_cissize_t slabinfo_write(struct file *file, const char __user *buffer, 394762306a36Sopenharmony_ci size_t count, loff_t *ppos) 394862306a36Sopenharmony_ci{ 394962306a36Sopenharmony_ci char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; 395062306a36Sopenharmony_ci int limit, batchcount, shared, res; 395162306a36Sopenharmony_ci struct kmem_cache *cachep; 395262306a36Sopenharmony_ci 395362306a36Sopenharmony_ci if (count > MAX_SLABINFO_WRITE) 395462306a36Sopenharmony_ci return -EINVAL; 395562306a36Sopenharmony_ci if (copy_from_user(&kbuf, buffer, count)) 395662306a36Sopenharmony_ci return -EFAULT; 395762306a36Sopenharmony_ci kbuf[MAX_SLABINFO_WRITE] = '\0'; 395862306a36Sopenharmony_ci 395962306a36Sopenharmony_ci tmp = strchr(kbuf, ' '); 396062306a36Sopenharmony_ci if (!tmp) 396162306a36Sopenharmony_ci return -EINVAL; 396262306a36Sopenharmony_ci *tmp = '\0'; 396362306a36Sopenharmony_ci tmp++; 396462306a36Sopenharmony_ci if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3) 396562306a36Sopenharmony_ci return -EINVAL; 396662306a36Sopenharmony_ci 396762306a36Sopenharmony_ci /* Find the cache in the chain of caches. */ 396862306a36Sopenharmony_ci mutex_lock(&slab_mutex); 396962306a36Sopenharmony_ci res = -EINVAL; 397062306a36Sopenharmony_ci list_for_each_entry(cachep, &slab_caches, list) { 397162306a36Sopenharmony_ci if (!strcmp(cachep->name, kbuf)) { 397262306a36Sopenharmony_ci if (limit < 1 || batchcount < 1 || 397362306a36Sopenharmony_ci batchcount > limit || shared < 0) { 397462306a36Sopenharmony_ci res = 0; 397562306a36Sopenharmony_ci } else { 397662306a36Sopenharmony_ci res = do_tune_cpucache(cachep, limit, 397762306a36Sopenharmony_ci batchcount, shared, 397862306a36Sopenharmony_ci GFP_KERNEL); 397962306a36Sopenharmony_ci } 398062306a36Sopenharmony_ci break; 398162306a36Sopenharmony_ci } 398262306a36Sopenharmony_ci } 398362306a36Sopenharmony_ci mutex_unlock(&slab_mutex); 398462306a36Sopenharmony_ci if (res >= 0) 398562306a36Sopenharmony_ci res = count; 398662306a36Sopenharmony_ci return res; 398762306a36Sopenharmony_ci} 398862306a36Sopenharmony_ci 398962306a36Sopenharmony_ci#ifdef CONFIG_HARDENED_USERCOPY 399062306a36Sopenharmony_ci/* 399162306a36Sopenharmony_ci * Rejects incorrectly sized objects and objects that are to be copied 399262306a36Sopenharmony_ci * to/from userspace but do not fall entirely within the containing slab 399362306a36Sopenharmony_ci * cache's usercopy region. 399462306a36Sopenharmony_ci * 399562306a36Sopenharmony_ci * Returns NULL if check passes, otherwise const char * to name of cache 399662306a36Sopenharmony_ci * to indicate an error. 399762306a36Sopenharmony_ci */ 399862306a36Sopenharmony_civoid __check_heap_object(const void *ptr, unsigned long n, 399962306a36Sopenharmony_ci const struct slab *slab, bool to_user) 400062306a36Sopenharmony_ci{ 400162306a36Sopenharmony_ci struct kmem_cache *cachep; 400262306a36Sopenharmony_ci unsigned int objnr; 400362306a36Sopenharmony_ci unsigned long offset; 400462306a36Sopenharmony_ci 400562306a36Sopenharmony_ci ptr = kasan_reset_tag(ptr); 400662306a36Sopenharmony_ci 400762306a36Sopenharmony_ci /* Find and validate object. */ 400862306a36Sopenharmony_ci cachep = slab->slab_cache; 400962306a36Sopenharmony_ci objnr = obj_to_index(cachep, slab, (void *)ptr); 401062306a36Sopenharmony_ci BUG_ON(objnr >= cachep->num); 401162306a36Sopenharmony_ci 401262306a36Sopenharmony_ci /* Find offset within object. */ 401362306a36Sopenharmony_ci if (is_kfence_address(ptr)) 401462306a36Sopenharmony_ci offset = ptr - kfence_object_start(ptr); 401562306a36Sopenharmony_ci else 401662306a36Sopenharmony_ci offset = ptr - index_to_obj(cachep, slab, objnr) - obj_offset(cachep); 401762306a36Sopenharmony_ci 401862306a36Sopenharmony_ci /* Allow address range falling entirely within usercopy region. */ 401962306a36Sopenharmony_ci if (offset >= cachep->useroffset && 402062306a36Sopenharmony_ci offset - cachep->useroffset <= cachep->usersize && 402162306a36Sopenharmony_ci n <= cachep->useroffset - offset + cachep->usersize) 402262306a36Sopenharmony_ci return; 402362306a36Sopenharmony_ci 402462306a36Sopenharmony_ci usercopy_abort("SLAB object", cachep->name, to_user, offset, n); 402562306a36Sopenharmony_ci} 402662306a36Sopenharmony_ci#endif /* CONFIG_HARDENED_USERCOPY */ 4027