18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * mm/percpu.c - percpu memory allocator 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2009 SUSE Linux Products GmbH 68c2ecf20Sopenharmony_ci * Copyright (C) 2009 Tejun Heo <tj@kernel.org> 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Copyright (C) 2017 Facebook Inc. 98c2ecf20Sopenharmony_ci * Copyright (C) 2017 Dennis Zhou <dennis@kernel.org> 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The percpu allocator handles both static and dynamic areas. Percpu 128c2ecf20Sopenharmony_ci * areas are allocated in chunks which are divided into units. There is 138c2ecf20Sopenharmony_ci * a 1-to-1 mapping for units to possible cpus. These units are grouped 148c2ecf20Sopenharmony_ci * based on NUMA properties of the machine. 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * c0 c1 c2 178c2ecf20Sopenharmony_ci * ------------------- ------------------- ------------ 188c2ecf20Sopenharmony_ci * | u0 | u1 | u2 | u3 | | u0 | u1 | u2 | u3 | | u0 | u1 | u 198c2ecf20Sopenharmony_ci * ------------------- ...... ------------------- .... ------------ 208c2ecf20Sopenharmony_ci * 218c2ecf20Sopenharmony_ci * Allocation is done by offsets into a unit's address space. Ie., an 228c2ecf20Sopenharmony_ci * area of 512 bytes at 6k in c1 occupies 512 bytes at 6k in c1:u0, 238c2ecf20Sopenharmony_ci * c1:u1, c1:u2, etc. On NUMA machines, the mapping may be non-linear 248c2ecf20Sopenharmony_ci * and even sparse. Access is handled by configuring percpu base 258c2ecf20Sopenharmony_ci * registers according to the cpu to unit mappings and offsetting the 268c2ecf20Sopenharmony_ci * base address using pcpu_unit_size. 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * There is special consideration for the first chunk which must handle 298c2ecf20Sopenharmony_ci * the static percpu variables in the kernel image as allocation services 308c2ecf20Sopenharmony_ci * are not online yet. In short, the first chunk is structured like so: 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci * <Static | [Reserved] | Dynamic> 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * The static data is copied from the original section managed by the 358c2ecf20Sopenharmony_ci * linker. The reserved section, if non-zero, primarily manages static 368c2ecf20Sopenharmony_ci * percpu variables from kernel modules. Finally, the dynamic section 378c2ecf20Sopenharmony_ci * takes care of normal allocations. 388c2ecf20Sopenharmony_ci * 398c2ecf20Sopenharmony_ci * The allocator organizes chunks into lists according to free size and 408c2ecf20Sopenharmony_ci * memcg-awareness. To make a percpu allocation memcg-aware the __GFP_ACCOUNT 418c2ecf20Sopenharmony_ci * flag should be passed. All memcg-aware allocations are sharing one set 428c2ecf20Sopenharmony_ci * of chunks and all unaccounted allocations and allocations performed 438c2ecf20Sopenharmony_ci * by processes belonging to the root memory cgroup are using the second set. 448c2ecf20Sopenharmony_ci * 458c2ecf20Sopenharmony_ci * The allocator tries to allocate from the fullest chunk first. Each chunk 468c2ecf20Sopenharmony_ci * is managed by a bitmap with metadata blocks. The allocation map is updated 478c2ecf20Sopenharmony_ci * on every allocation and free to reflect the current state while the boundary 488c2ecf20Sopenharmony_ci * map is only updated on allocation. Each metadata block contains 498c2ecf20Sopenharmony_ci * information to help mitigate the need to iterate over large portions 508c2ecf20Sopenharmony_ci * of the bitmap. The reverse mapping from page to chunk is stored in 518c2ecf20Sopenharmony_ci * the page's index. Lastly, units are lazily backed and grow in unison. 528c2ecf20Sopenharmony_ci * 538c2ecf20Sopenharmony_ci * There is a unique conversion that goes on here between bytes and bits. 548c2ecf20Sopenharmony_ci * Each bit represents a fragment of size PCPU_MIN_ALLOC_SIZE. The chunk 558c2ecf20Sopenharmony_ci * tracks the number of pages it is responsible for in nr_pages. Helper 568c2ecf20Sopenharmony_ci * functions are used to convert from between the bytes, bits, and blocks. 578c2ecf20Sopenharmony_ci * All hints are managed in bits unless explicitly stated. 588c2ecf20Sopenharmony_ci * 598c2ecf20Sopenharmony_ci * To use this allocator, arch code should do the following: 608c2ecf20Sopenharmony_ci * 618c2ecf20Sopenharmony_ci * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate 628c2ecf20Sopenharmony_ci * regular address to percpu pointer and back if they need to be 638c2ecf20Sopenharmony_ci * different from the default 648c2ecf20Sopenharmony_ci * 658c2ecf20Sopenharmony_ci * - use pcpu_setup_first_chunk() during percpu area initialization to 668c2ecf20Sopenharmony_ci * setup the first chunk containing the kernel static percpu area 678c2ecf20Sopenharmony_ci */ 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci#include <linux/bitmap.h> 728c2ecf20Sopenharmony_ci#include <linux/memblock.h> 738c2ecf20Sopenharmony_ci#include <linux/err.h> 748c2ecf20Sopenharmony_ci#include <linux/lcm.h> 758c2ecf20Sopenharmony_ci#include <linux/list.h> 768c2ecf20Sopenharmony_ci#include <linux/log2.h> 778c2ecf20Sopenharmony_ci#include <linux/mm.h> 788c2ecf20Sopenharmony_ci#include <linux/module.h> 798c2ecf20Sopenharmony_ci#include <linux/mutex.h> 808c2ecf20Sopenharmony_ci#include <linux/percpu.h> 818c2ecf20Sopenharmony_ci#include <linux/pfn.h> 828c2ecf20Sopenharmony_ci#include <linux/slab.h> 838c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 848c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 858c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 868c2ecf20Sopenharmony_ci#include <linux/kmemleak.h> 878c2ecf20Sopenharmony_ci#include <linux/sched.h> 888c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 898c2ecf20Sopenharmony_ci#include <linux/memcontrol.h> 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci#include <asm/cacheflush.h> 928c2ecf20Sopenharmony_ci#include <asm/sections.h> 938c2ecf20Sopenharmony_ci#include <asm/tlbflush.h> 948c2ecf20Sopenharmony_ci#include <asm/io.h> 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS 978c2ecf20Sopenharmony_ci#include <trace/events/percpu.h> 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci#include "percpu-internal.h" 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci/* the slots are sorted by free bytes left, 1-31 bytes share the same slot */ 1028c2ecf20Sopenharmony_ci#define PCPU_SLOT_BASE_SHIFT 5 1038c2ecf20Sopenharmony_ci/* chunks in slots below this are subject to being sidelined on failed alloc */ 1048c2ecf20Sopenharmony_ci#define PCPU_SLOT_FAIL_THRESHOLD 3 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci#define PCPU_EMPTY_POP_PAGES_LOW 2 1078c2ecf20Sopenharmony_ci#define PCPU_EMPTY_POP_PAGES_HIGH 4 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 1108c2ecf20Sopenharmony_ci/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ 1118c2ecf20Sopenharmony_ci#ifndef __addr_to_pcpu_ptr 1128c2ecf20Sopenharmony_ci#define __addr_to_pcpu_ptr(addr) \ 1138c2ecf20Sopenharmony_ci (void __percpu *)((unsigned long)(addr) - \ 1148c2ecf20Sopenharmony_ci (unsigned long)pcpu_base_addr + \ 1158c2ecf20Sopenharmony_ci (unsigned long)__per_cpu_start) 1168c2ecf20Sopenharmony_ci#endif 1178c2ecf20Sopenharmony_ci#ifndef __pcpu_ptr_to_addr 1188c2ecf20Sopenharmony_ci#define __pcpu_ptr_to_addr(ptr) \ 1198c2ecf20Sopenharmony_ci (void __force *)((unsigned long)(ptr) + \ 1208c2ecf20Sopenharmony_ci (unsigned long)pcpu_base_addr - \ 1218c2ecf20Sopenharmony_ci (unsigned long)__per_cpu_start) 1228c2ecf20Sopenharmony_ci#endif 1238c2ecf20Sopenharmony_ci#else /* CONFIG_SMP */ 1248c2ecf20Sopenharmony_ci/* on UP, it's always identity mapped */ 1258c2ecf20Sopenharmony_ci#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr) 1268c2ecf20Sopenharmony_ci#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) 1278c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */ 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_cistatic int pcpu_unit_pages __ro_after_init; 1308c2ecf20Sopenharmony_cistatic int pcpu_unit_size __ro_after_init; 1318c2ecf20Sopenharmony_cistatic int pcpu_nr_units __ro_after_init; 1328c2ecf20Sopenharmony_cistatic int pcpu_atom_size __ro_after_init; 1338c2ecf20Sopenharmony_ciint pcpu_nr_slots __ro_after_init; 1348c2ecf20Sopenharmony_cistatic size_t pcpu_chunk_struct_size __ro_after_init; 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci/* cpus with the lowest and highest unit addresses */ 1378c2ecf20Sopenharmony_cistatic unsigned int pcpu_low_unit_cpu __ro_after_init; 1388c2ecf20Sopenharmony_cistatic unsigned int pcpu_high_unit_cpu __ro_after_init; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci/* the address of the first chunk which starts with the kernel static area */ 1418c2ecf20Sopenharmony_civoid *pcpu_base_addr __ro_after_init; 1428c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pcpu_base_addr); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_cistatic const int *pcpu_unit_map __ro_after_init; /* cpu -> unit */ 1458c2ecf20Sopenharmony_ciconst unsigned long *pcpu_unit_offsets __ro_after_init; /* cpu -> unit offset */ 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci/* group information, used for vm allocation */ 1488c2ecf20Sopenharmony_cistatic int pcpu_nr_groups __ro_after_init; 1498c2ecf20Sopenharmony_cistatic const unsigned long *pcpu_group_offsets __ro_after_init; 1508c2ecf20Sopenharmony_cistatic const size_t *pcpu_group_sizes __ro_after_init; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci/* 1538c2ecf20Sopenharmony_ci * The first chunk which always exists. Note that unlike other 1548c2ecf20Sopenharmony_ci * chunks, this one can be allocated and mapped in several different 1558c2ecf20Sopenharmony_ci * ways and thus often doesn't live in the vmalloc area. 1568c2ecf20Sopenharmony_ci */ 1578c2ecf20Sopenharmony_cistruct pcpu_chunk *pcpu_first_chunk __ro_after_init; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci/* 1608c2ecf20Sopenharmony_ci * Optional reserved chunk. This chunk reserves part of the first 1618c2ecf20Sopenharmony_ci * chunk and serves it for reserved allocations. When the reserved 1628c2ecf20Sopenharmony_ci * region doesn't exist, the following variable is NULL. 1638c2ecf20Sopenharmony_ci */ 1648c2ecf20Sopenharmony_cistruct pcpu_chunk *pcpu_reserved_chunk __ro_after_init; 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ciDEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ 1678c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_cistruct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */ 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci/* chunks which need their map areas extended, protected by pcpu_lock */ 1728c2ecf20Sopenharmony_cistatic LIST_HEAD(pcpu_map_extend_chunks); 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci/* 1758c2ecf20Sopenharmony_ci * The number of empty populated pages by chunk type, protected by pcpu_lock. 1768c2ecf20Sopenharmony_ci * The reserved chunk doesn't contribute to the count. 1778c2ecf20Sopenharmony_ci */ 1788c2ecf20Sopenharmony_ciint pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES]; 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci/* 1818c2ecf20Sopenharmony_ci * The number of populated pages in use by the allocator, protected by 1828c2ecf20Sopenharmony_ci * pcpu_lock. This number is kept per a unit per chunk (i.e. when a page gets 1838c2ecf20Sopenharmony_ci * allocated/deallocated, it is allocated/deallocated in all units of a chunk 1848c2ecf20Sopenharmony_ci * and increments/decrements this count by 1). 1858c2ecf20Sopenharmony_ci */ 1868c2ecf20Sopenharmony_cistatic unsigned long pcpu_nr_populated; 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci/* 1898c2ecf20Sopenharmony_ci * Balance work is used to populate or destroy chunks asynchronously. We 1908c2ecf20Sopenharmony_ci * try to keep the number of populated free pages between 1918c2ecf20Sopenharmony_ci * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one 1928c2ecf20Sopenharmony_ci * empty chunk. 1938c2ecf20Sopenharmony_ci */ 1948c2ecf20Sopenharmony_cistatic void pcpu_balance_workfn(struct work_struct *work); 1958c2ecf20Sopenharmony_cistatic DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn); 1968c2ecf20Sopenharmony_cistatic bool pcpu_async_enabled __read_mostly; 1978c2ecf20Sopenharmony_cistatic bool pcpu_atomic_alloc_failed; 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_cistatic void pcpu_schedule_balance_work(void) 2008c2ecf20Sopenharmony_ci{ 2018c2ecf20Sopenharmony_ci if (pcpu_async_enabled) 2028c2ecf20Sopenharmony_ci schedule_work(&pcpu_balance_work); 2038c2ecf20Sopenharmony_ci} 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci/** 2068c2ecf20Sopenharmony_ci * pcpu_addr_in_chunk - check if the address is served from this chunk 2078c2ecf20Sopenharmony_ci * @chunk: chunk of interest 2088c2ecf20Sopenharmony_ci * @addr: percpu address 2098c2ecf20Sopenharmony_ci * 2108c2ecf20Sopenharmony_ci * RETURNS: 2118c2ecf20Sopenharmony_ci * True if the address is served from this chunk. 2128c2ecf20Sopenharmony_ci */ 2138c2ecf20Sopenharmony_cistatic bool pcpu_addr_in_chunk(struct pcpu_chunk *chunk, void *addr) 2148c2ecf20Sopenharmony_ci{ 2158c2ecf20Sopenharmony_ci void *start_addr, *end_addr; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci if (!chunk) 2188c2ecf20Sopenharmony_ci return false; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci start_addr = chunk->base_addr + chunk->start_offset; 2218c2ecf20Sopenharmony_ci end_addr = chunk->base_addr + chunk->nr_pages * PAGE_SIZE - 2228c2ecf20Sopenharmony_ci chunk->end_offset; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci return addr >= start_addr && addr < end_addr; 2258c2ecf20Sopenharmony_ci} 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_cistatic int __pcpu_size_to_slot(int size) 2288c2ecf20Sopenharmony_ci{ 2298c2ecf20Sopenharmony_ci int highbit = fls(size); /* size is in bytes */ 2308c2ecf20Sopenharmony_ci return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_cistatic int pcpu_size_to_slot(int size) 2348c2ecf20Sopenharmony_ci{ 2358c2ecf20Sopenharmony_ci if (size == pcpu_unit_size) 2368c2ecf20Sopenharmony_ci return pcpu_nr_slots - 1; 2378c2ecf20Sopenharmony_ci return __pcpu_size_to_slot(size); 2388c2ecf20Sopenharmony_ci} 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_cistatic int pcpu_chunk_slot(const struct pcpu_chunk *chunk) 2418c2ecf20Sopenharmony_ci{ 2428c2ecf20Sopenharmony_ci const struct pcpu_block_md *chunk_md = &chunk->chunk_md; 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci if (chunk->free_bytes < PCPU_MIN_ALLOC_SIZE || 2458c2ecf20Sopenharmony_ci chunk_md->contig_hint == 0) 2468c2ecf20Sopenharmony_ci return 0; 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci return pcpu_size_to_slot(chunk_md->contig_hint * PCPU_MIN_ALLOC_SIZE); 2498c2ecf20Sopenharmony_ci} 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci/* set the pointer to a chunk in a page struct */ 2528c2ecf20Sopenharmony_cistatic void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu) 2538c2ecf20Sopenharmony_ci{ 2548c2ecf20Sopenharmony_ci page->index = (unsigned long)pcpu; 2558c2ecf20Sopenharmony_ci} 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci/* obtain pointer to a chunk from a page struct */ 2588c2ecf20Sopenharmony_cistatic struct pcpu_chunk *pcpu_get_page_chunk(struct page *page) 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci return (struct pcpu_chunk *)page->index; 2618c2ecf20Sopenharmony_ci} 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_cistatic int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx) 2648c2ecf20Sopenharmony_ci{ 2658c2ecf20Sopenharmony_ci return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; 2668c2ecf20Sopenharmony_ci} 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_cistatic unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx) 2698c2ecf20Sopenharmony_ci{ 2708c2ecf20Sopenharmony_ci return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT); 2718c2ecf20Sopenharmony_ci} 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_cistatic unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, 2748c2ecf20Sopenharmony_ci unsigned int cpu, int page_idx) 2758c2ecf20Sopenharmony_ci{ 2768c2ecf20Sopenharmony_ci return (unsigned long)chunk->base_addr + 2778c2ecf20Sopenharmony_ci pcpu_unit_page_offset(cpu, page_idx); 2788c2ecf20Sopenharmony_ci} 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci/* 2818c2ecf20Sopenharmony_ci * The following are helper functions to help access bitmaps and convert 2828c2ecf20Sopenharmony_ci * between bitmap offsets to address offsets. 2838c2ecf20Sopenharmony_ci */ 2848c2ecf20Sopenharmony_cistatic unsigned long *pcpu_index_alloc_map(struct pcpu_chunk *chunk, int index) 2858c2ecf20Sopenharmony_ci{ 2868c2ecf20Sopenharmony_ci return chunk->alloc_map + 2878c2ecf20Sopenharmony_ci (index * PCPU_BITMAP_BLOCK_BITS / BITS_PER_LONG); 2888c2ecf20Sopenharmony_ci} 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_cistatic unsigned long pcpu_off_to_block_index(int off) 2918c2ecf20Sopenharmony_ci{ 2928c2ecf20Sopenharmony_ci return off / PCPU_BITMAP_BLOCK_BITS; 2938c2ecf20Sopenharmony_ci} 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_cistatic unsigned long pcpu_off_to_block_off(int off) 2968c2ecf20Sopenharmony_ci{ 2978c2ecf20Sopenharmony_ci return off & (PCPU_BITMAP_BLOCK_BITS - 1); 2988c2ecf20Sopenharmony_ci} 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_cistatic unsigned long pcpu_block_off_to_off(int index, int off) 3018c2ecf20Sopenharmony_ci{ 3028c2ecf20Sopenharmony_ci return index * PCPU_BITMAP_BLOCK_BITS + off; 3038c2ecf20Sopenharmony_ci} 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci/* 3068c2ecf20Sopenharmony_ci * pcpu_next_hint - determine which hint to use 3078c2ecf20Sopenharmony_ci * @block: block of interest 3088c2ecf20Sopenharmony_ci * @alloc_bits: size of allocation 3098c2ecf20Sopenharmony_ci * 3108c2ecf20Sopenharmony_ci * This determines if we should scan based on the scan_hint or first_free. 3118c2ecf20Sopenharmony_ci * In general, we want to scan from first_free to fulfill allocations by 3128c2ecf20Sopenharmony_ci * first fit. However, if we know a scan_hint at position scan_hint_start 3138c2ecf20Sopenharmony_ci * cannot fulfill an allocation, we can begin scanning from there knowing 3148c2ecf20Sopenharmony_ci * the contig_hint will be our fallback. 3158c2ecf20Sopenharmony_ci */ 3168c2ecf20Sopenharmony_cistatic int pcpu_next_hint(struct pcpu_block_md *block, int alloc_bits) 3178c2ecf20Sopenharmony_ci{ 3188c2ecf20Sopenharmony_ci /* 3198c2ecf20Sopenharmony_ci * The three conditions below determine if we can skip past the 3208c2ecf20Sopenharmony_ci * scan_hint. First, does the scan hint exist. Second, is the 3218c2ecf20Sopenharmony_ci * contig_hint after the scan_hint (possibly not true iff 3228c2ecf20Sopenharmony_ci * contig_hint == scan_hint). Third, is the allocation request 3238c2ecf20Sopenharmony_ci * larger than the scan_hint. 3248c2ecf20Sopenharmony_ci */ 3258c2ecf20Sopenharmony_ci if (block->scan_hint && 3268c2ecf20Sopenharmony_ci block->contig_hint_start > block->scan_hint_start && 3278c2ecf20Sopenharmony_ci alloc_bits > block->scan_hint) 3288c2ecf20Sopenharmony_ci return block->scan_hint_start + block->scan_hint; 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci return block->first_free; 3318c2ecf20Sopenharmony_ci} 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci/** 3348c2ecf20Sopenharmony_ci * pcpu_next_md_free_region - finds the next hint free area 3358c2ecf20Sopenharmony_ci * @chunk: chunk of interest 3368c2ecf20Sopenharmony_ci * @bit_off: chunk offset 3378c2ecf20Sopenharmony_ci * @bits: size of free area 3388c2ecf20Sopenharmony_ci * 3398c2ecf20Sopenharmony_ci * Helper function for pcpu_for_each_md_free_region. It checks 3408c2ecf20Sopenharmony_ci * block->contig_hint and performs aggregation across blocks to find the 3418c2ecf20Sopenharmony_ci * next hint. It modifies bit_off and bits in-place to be consumed in the 3428c2ecf20Sopenharmony_ci * loop. 3438c2ecf20Sopenharmony_ci */ 3448c2ecf20Sopenharmony_cistatic void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off, 3458c2ecf20Sopenharmony_ci int *bits) 3468c2ecf20Sopenharmony_ci{ 3478c2ecf20Sopenharmony_ci int i = pcpu_off_to_block_index(*bit_off); 3488c2ecf20Sopenharmony_ci int block_off = pcpu_off_to_block_off(*bit_off); 3498c2ecf20Sopenharmony_ci struct pcpu_block_md *block; 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci *bits = 0; 3528c2ecf20Sopenharmony_ci for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk); 3538c2ecf20Sopenharmony_ci block++, i++) { 3548c2ecf20Sopenharmony_ci /* handles contig area across blocks */ 3558c2ecf20Sopenharmony_ci if (*bits) { 3568c2ecf20Sopenharmony_ci *bits += block->left_free; 3578c2ecf20Sopenharmony_ci if (block->left_free == PCPU_BITMAP_BLOCK_BITS) 3588c2ecf20Sopenharmony_ci continue; 3598c2ecf20Sopenharmony_ci return; 3608c2ecf20Sopenharmony_ci } 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci /* 3638c2ecf20Sopenharmony_ci * This checks three things. First is there a contig_hint to 3648c2ecf20Sopenharmony_ci * check. Second, have we checked this hint before by 3658c2ecf20Sopenharmony_ci * comparing the block_off. Third, is this the same as the 3668c2ecf20Sopenharmony_ci * right contig hint. In the last case, it spills over into 3678c2ecf20Sopenharmony_ci * the next block and should be handled by the contig area 3688c2ecf20Sopenharmony_ci * across blocks code. 3698c2ecf20Sopenharmony_ci */ 3708c2ecf20Sopenharmony_ci *bits = block->contig_hint; 3718c2ecf20Sopenharmony_ci if (*bits && block->contig_hint_start >= block_off && 3728c2ecf20Sopenharmony_ci *bits + block->contig_hint_start < PCPU_BITMAP_BLOCK_BITS) { 3738c2ecf20Sopenharmony_ci *bit_off = pcpu_block_off_to_off(i, 3748c2ecf20Sopenharmony_ci block->contig_hint_start); 3758c2ecf20Sopenharmony_ci return; 3768c2ecf20Sopenharmony_ci } 3778c2ecf20Sopenharmony_ci /* reset to satisfy the second predicate above */ 3788c2ecf20Sopenharmony_ci block_off = 0; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci *bits = block->right_free; 3818c2ecf20Sopenharmony_ci *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free; 3828c2ecf20Sopenharmony_ci } 3838c2ecf20Sopenharmony_ci} 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci/** 3868c2ecf20Sopenharmony_ci * pcpu_next_fit_region - finds fit areas for a given allocation request 3878c2ecf20Sopenharmony_ci * @chunk: chunk of interest 3888c2ecf20Sopenharmony_ci * @alloc_bits: size of allocation 3898c2ecf20Sopenharmony_ci * @align: alignment of area (max PAGE_SIZE) 3908c2ecf20Sopenharmony_ci * @bit_off: chunk offset 3918c2ecf20Sopenharmony_ci * @bits: size of free area 3928c2ecf20Sopenharmony_ci * 3938c2ecf20Sopenharmony_ci * Finds the next free region that is viable for use with a given size and 3948c2ecf20Sopenharmony_ci * alignment. This only returns if there is a valid area to be used for this 3958c2ecf20Sopenharmony_ci * allocation. block->first_free is returned if the allocation request fits 3968c2ecf20Sopenharmony_ci * within the block to see if the request can be fulfilled prior to the contig 3978c2ecf20Sopenharmony_ci * hint. 3988c2ecf20Sopenharmony_ci */ 3998c2ecf20Sopenharmony_cistatic void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, 4008c2ecf20Sopenharmony_ci int align, int *bit_off, int *bits) 4018c2ecf20Sopenharmony_ci{ 4028c2ecf20Sopenharmony_ci int i = pcpu_off_to_block_index(*bit_off); 4038c2ecf20Sopenharmony_ci int block_off = pcpu_off_to_block_off(*bit_off); 4048c2ecf20Sopenharmony_ci struct pcpu_block_md *block; 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci *bits = 0; 4078c2ecf20Sopenharmony_ci for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk); 4088c2ecf20Sopenharmony_ci block++, i++) { 4098c2ecf20Sopenharmony_ci /* handles contig area across blocks */ 4108c2ecf20Sopenharmony_ci if (*bits) { 4118c2ecf20Sopenharmony_ci *bits += block->left_free; 4128c2ecf20Sopenharmony_ci if (*bits >= alloc_bits) 4138c2ecf20Sopenharmony_ci return; 4148c2ecf20Sopenharmony_ci if (block->left_free == PCPU_BITMAP_BLOCK_BITS) 4158c2ecf20Sopenharmony_ci continue; 4168c2ecf20Sopenharmony_ci } 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci /* check block->contig_hint */ 4198c2ecf20Sopenharmony_ci *bits = ALIGN(block->contig_hint_start, align) - 4208c2ecf20Sopenharmony_ci block->contig_hint_start; 4218c2ecf20Sopenharmony_ci /* 4228c2ecf20Sopenharmony_ci * This uses the block offset to determine if this has been 4238c2ecf20Sopenharmony_ci * checked in the prior iteration. 4248c2ecf20Sopenharmony_ci */ 4258c2ecf20Sopenharmony_ci if (block->contig_hint && 4268c2ecf20Sopenharmony_ci block->contig_hint_start >= block_off && 4278c2ecf20Sopenharmony_ci block->contig_hint >= *bits + alloc_bits) { 4288c2ecf20Sopenharmony_ci int start = pcpu_next_hint(block, alloc_bits); 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci *bits += alloc_bits + block->contig_hint_start - 4318c2ecf20Sopenharmony_ci start; 4328c2ecf20Sopenharmony_ci *bit_off = pcpu_block_off_to_off(i, start); 4338c2ecf20Sopenharmony_ci return; 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci /* reset to satisfy the second predicate above */ 4368c2ecf20Sopenharmony_ci block_off = 0; 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free, 4398c2ecf20Sopenharmony_ci align); 4408c2ecf20Sopenharmony_ci *bits = PCPU_BITMAP_BLOCK_BITS - *bit_off; 4418c2ecf20Sopenharmony_ci *bit_off = pcpu_block_off_to_off(i, *bit_off); 4428c2ecf20Sopenharmony_ci if (*bits >= alloc_bits) 4438c2ecf20Sopenharmony_ci return; 4448c2ecf20Sopenharmony_ci } 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci /* no valid offsets were found - fail condition */ 4478c2ecf20Sopenharmony_ci *bit_off = pcpu_chunk_map_bits(chunk); 4488c2ecf20Sopenharmony_ci} 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci/* 4518c2ecf20Sopenharmony_ci * Metadata free area iterators. These perform aggregation of free areas 4528c2ecf20Sopenharmony_ci * based on the metadata blocks and return the offset @bit_off and size in 4538c2ecf20Sopenharmony_ci * bits of the free area @bits. pcpu_for_each_fit_region only returns when 4548c2ecf20Sopenharmony_ci * a fit is found for the allocation request. 4558c2ecf20Sopenharmony_ci */ 4568c2ecf20Sopenharmony_ci#define pcpu_for_each_md_free_region(chunk, bit_off, bits) \ 4578c2ecf20Sopenharmony_ci for (pcpu_next_md_free_region((chunk), &(bit_off), &(bits)); \ 4588c2ecf20Sopenharmony_ci (bit_off) < pcpu_chunk_map_bits((chunk)); \ 4598c2ecf20Sopenharmony_ci (bit_off) += (bits) + 1, \ 4608c2ecf20Sopenharmony_ci pcpu_next_md_free_region((chunk), &(bit_off), &(bits))) 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci#define pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) \ 4638c2ecf20Sopenharmony_ci for (pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \ 4648c2ecf20Sopenharmony_ci &(bits)); \ 4658c2ecf20Sopenharmony_ci (bit_off) < pcpu_chunk_map_bits((chunk)); \ 4668c2ecf20Sopenharmony_ci (bit_off) += (bits), \ 4678c2ecf20Sopenharmony_ci pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \ 4688c2ecf20Sopenharmony_ci &(bits))) 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci/** 4718c2ecf20Sopenharmony_ci * pcpu_mem_zalloc - allocate memory 4728c2ecf20Sopenharmony_ci * @size: bytes to allocate 4738c2ecf20Sopenharmony_ci * @gfp: allocation flags 4748c2ecf20Sopenharmony_ci * 4758c2ecf20Sopenharmony_ci * Allocate @size bytes. If @size is smaller than PAGE_SIZE, 4768c2ecf20Sopenharmony_ci * kzalloc() is used; otherwise, the equivalent of vzalloc() is used. 4778c2ecf20Sopenharmony_ci * This is to facilitate passing through whitelisted flags. The 4788c2ecf20Sopenharmony_ci * returned memory is always zeroed. 4798c2ecf20Sopenharmony_ci * 4808c2ecf20Sopenharmony_ci * RETURNS: 4818c2ecf20Sopenharmony_ci * Pointer to the allocated area on success, NULL on failure. 4828c2ecf20Sopenharmony_ci */ 4838c2ecf20Sopenharmony_cistatic void *pcpu_mem_zalloc(size_t size, gfp_t gfp) 4848c2ecf20Sopenharmony_ci{ 4858c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!slab_is_available())) 4868c2ecf20Sopenharmony_ci return NULL; 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci if (size <= PAGE_SIZE) 4898c2ecf20Sopenharmony_ci return kzalloc(size, gfp); 4908c2ecf20Sopenharmony_ci else 4918c2ecf20Sopenharmony_ci return __vmalloc(size, gfp | __GFP_ZERO); 4928c2ecf20Sopenharmony_ci} 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci/** 4958c2ecf20Sopenharmony_ci * pcpu_mem_free - free memory 4968c2ecf20Sopenharmony_ci * @ptr: memory to free 4978c2ecf20Sopenharmony_ci * 4988c2ecf20Sopenharmony_ci * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). 4998c2ecf20Sopenharmony_ci */ 5008c2ecf20Sopenharmony_cistatic void pcpu_mem_free(void *ptr) 5018c2ecf20Sopenharmony_ci{ 5028c2ecf20Sopenharmony_ci kvfree(ptr); 5038c2ecf20Sopenharmony_ci} 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_cistatic void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot, 5068c2ecf20Sopenharmony_ci bool move_front) 5078c2ecf20Sopenharmony_ci{ 5088c2ecf20Sopenharmony_ci if (chunk != pcpu_reserved_chunk) { 5098c2ecf20Sopenharmony_ci struct list_head *pcpu_slot; 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); 5128c2ecf20Sopenharmony_ci if (move_front) 5138c2ecf20Sopenharmony_ci list_move(&chunk->list, &pcpu_slot[slot]); 5148c2ecf20Sopenharmony_ci else 5158c2ecf20Sopenharmony_ci list_move_tail(&chunk->list, &pcpu_slot[slot]); 5168c2ecf20Sopenharmony_ci } 5178c2ecf20Sopenharmony_ci} 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_cistatic void pcpu_chunk_move(struct pcpu_chunk *chunk, int slot) 5208c2ecf20Sopenharmony_ci{ 5218c2ecf20Sopenharmony_ci __pcpu_chunk_move(chunk, slot, true); 5228c2ecf20Sopenharmony_ci} 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci/** 5258c2ecf20Sopenharmony_ci * pcpu_chunk_relocate - put chunk in the appropriate chunk slot 5268c2ecf20Sopenharmony_ci * @chunk: chunk of interest 5278c2ecf20Sopenharmony_ci * @oslot: the previous slot it was on 5288c2ecf20Sopenharmony_ci * 5298c2ecf20Sopenharmony_ci * This function is called after an allocation or free changed @chunk. 5308c2ecf20Sopenharmony_ci * New slot according to the changed state is determined and @chunk is 5318c2ecf20Sopenharmony_ci * moved to the slot. Note that the reserved chunk is never put on 5328c2ecf20Sopenharmony_ci * chunk slots. 5338c2ecf20Sopenharmony_ci * 5348c2ecf20Sopenharmony_ci * CONTEXT: 5358c2ecf20Sopenharmony_ci * pcpu_lock. 5368c2ecf20Sopenharmony_ci */ 5378c2ecf20Sopenharmony_cistatic void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) 5388c2ecf20Sopenharmony_ci{ 5398c2ecf20Sopenharmony_ci int nslot = pcpu_chunk_slot(chunk); 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci if (oslot != nslot) 5428c2ecf20Sopenharmony_ci __pcpu_chunk_move(chunk, nslot, oslot < nslot); 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci/* 5468c2ecf20Sopenharmony_ci * pcpu_update_empty_pages - update empty page counters 5478c2ecf20Sopenharmony_ci * @chunk: chunk of interest 5488c2ecf20Sopenharmony_ci * @nr: nr of empty pages 5498c2ecf20Sopenharmony_ci * 5508c2ecf20Sopenharmony_ci * This is used to keep track of the empty pages now based on the premise 5518c2ecf20Sopenharmony_ci * a md_block covers a page. The hint update functions recognize if a block 5528c2ecf20Sopenharmony_ci * is made full or broken to calculate deltas for keeping track of free pages. 5538c2ecf20Sopenharmony_ci */ 5548c2ecf20Sopenharmony_cistatic inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr) 5558c2ecf20Sopenharmony_ci{ 5568c2ecf20Sopenharmony_ci chunk->nr_empty_pop_pages += nr; 5578c2ecf20Sopenharmony_ci if (chunk != pcpu_reserved_chunk) 5588c2ecf20Sopenharmony_ci pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr; 5598c2ecf20Sopenharmony_ci} 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci/* 5628c2ecf20Sopenharmony_ci * pcpu_region_overlap - determines if two regions overlap 5638c2ecf20Sopenharmony_ci * @a: start of first region, inclusive 5648c2ecf20Sopenharmony_ci * @b: end of first region, exclusive 5658c2ecf20Sopenharmony_ci * @x: start of second region, inclusive 5668c2ecf20Sopenharmony_ci * @y: end of second region, exclusive 5678c2ecf20Sopenharmony_ci * 5688c2ecf20Sopenharmony_ci * This is used to determine if the hint region [a, b) overlaps with the 5698c2ecf20Sopenharmony_ci * allocated region [x, y). 5708c2ecf20Sopenharmony_ci */ 5718c2ecf20Sopenharmony_cistatic inline bool pcpu_region_overlap(int a, int b, int x, int y) 5728c2ecf20Sopenharmony_ci{ 5738c2ecf20Sopenharmony_ci return (a < y) && (x < b); 5748c2ecf20Sopenharmony_ci} 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci/** 5778c2ecf20Sopenharmony_ci * pcpu_block_update - updates a block given a free area 5788c2ecf20Sopenharmony_ci * @block: block of interest 5798c2ecf20Sopenharmony_ci * @start: start offset in block 5808c2ecf20Sopenharmony_ci * @end: end offset in block 5818c2ecf20Sopenharmony_ci * 5828c2ecf20Sopenharmony_ci * Updates a block given a known free area. The region [start, end) is 5838c2ecf20Sopenharmony_ci * expected to be the entirety of the free area within a block. Chooses 5848c2ecf20Sopenharmony_ci * the best starting offset if the contig hints are equal. 5858c2ecf20Sopenharmony_ci */ 5868c2ecf20Sopenharmony_cistatic void pcpu_block_update(struct pcpu_block_md *block, int start, int end) 5878c2ecf20Sopenharmony_ci{ 5888c2ecf20Sopenharmony_ci int contig = end - start; 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci block->first_free = min(block->first_free, start); 5918c2ecf20Sopenharmony_ci if (start == 0) 5928c2ecf20Sopenharmony_ci block->left_free = contig; 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci if (end == block->nr_bits) 5958c2ecf20Sopenharmony_ci block->right_free = contig; 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci if (contig > block->contig_hint) { 5988c2ecf20Sopenharmony_ci /* promote the old contig_hint to be the new scan_hint */ 5998c2ecf20Sopenharmony_ci if (start > block->contig_hint_start) { 6008c2ecf20Sopenharmony_ci if (block->contig_hint > block->scan_hint) { 6018c2ecf20Sopenharmony_ci block->scan_hint_start = 6028c2ecf20Sopenharmony_ci block->contig_hint_start; 6038c2ecf20Sopenharmony_ci block->scan_hint = block->contig_hint; 6048c2ecf20Sopenharmony_ci } else if (start < block->scan_hint_start) { 6058c2ecf20Sopenharmony_ci /* 6068c2ecf20Sopenharmony_ci * The old contig_hint == scan_hint. But, the 6078c2ecf20Sopenharmony_ci * new contig is larger so hold the invariant 6088c2ecf20Sopenharmony_ci * scan_hint_start < contig_hint_start. 6098c2ecf20Sopenharmony_ci */ 6108c2ecf20Sopenharmony_ci block->scan_hint = 0; 6118c2ecf20Sopenharmony_ci } 6128c2ecf20Sopenharmony_ci } else { 6138c2ecf20Sopenharmony_ci block->scan_hint = 0; 6148c2ecf20Sopenharmony_ci } 6158c2ecf20Sopenharmony_ci block->contig_hint_start = start; 6168c2ecf20Sopenharmony_ci block->contig_hint = contig; 6178c2ecf20Sopenharmony_ci } else if (contig == block->contig_hint) { 6188c2ecf20Sopenharmony_ci if (block->contig_hint_start && 6198c2ecf20Sopenharmony_ci (!start || 6208c2ecf20Sopenharmony_ci __ffs(start) > __ffs(block->contig_hint_start))) { 6218c2ecf20Sopenharmony_ci /* start has a better alignment so use it */ 6228c2ecf20Sopenharmony_ci block->contig_hint_start = start; 6238c2ecf20Sopenharmony_ci if (start < block->scan_hint_start && 6248c2ecf20Sopenharmony_ci block->contig_hint > block->scan_hint) 6258c2ecf20Sopenharmony_ci block->scan_hint = 0; 6268c2ecf20Sopenharmony_ci } else if (start > block->scan_hint_start || 6278c2ecf20Sopenharmony_ci block->contig_hint > block->scan_hint) { 6288c2ecf20Sopenharmony_ci /* 6298c2ecf20Sopenharmony_ci * Knowing contig == contig_hint, update the scan_hint 6308c2ecf20Sopenharmony_ci * if it is farther than or larger than the current 6318c2ecf20Sopenharmony_ci * scan_hint. 6328c2ecf20Sopenharmony_ci */ 6338c2ecf20Sopenharmony_ci block->scan_hint_start = start; 6348c2ecf20Sopenharmony_ci block->scan_hint = contig; 6358c2ecf20Sopenharmony_ci } 6368c2ecf20Sopenharmony_ci } else { 6378c2ecf20Sopenharmony_ci /* 6388c2ecf20Sopenharmony_ci * The region is smaller than the contig_hint. So only update 6398c2ecf20Sopenharmony_ci * the scan_hint if it is larger than or equal and farther than 6408c2ecf20Sopenharmony_ci * the current scan_hint. 6418c2ecf20Sopenharmony_ci */ 6428c2ecf20Sopenharmony_ci if ((start < block->contig_hint_start && 6438c2ecf20Sopenharmony_ci (contig > block->scan_hint || 6448c2ecf20Sopenharmony_ci (contig == block->scan_hint && 6458c2ecf20Sopenharmony_ci start > block->scan_hint_start)))) { 6468c2ecf20Sopenharmony_ci block->scan_hint_start = start; 6478c2ecf20Sopenharmony_ci block->scan_hint = contig; 6488c2ecf20Sopenharmony_ci } 6498c2ecf20Sopenharmony_ci } 6508c2ecf20Sopenharmony_ci} 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci/* 6538c2ecf20Sopenharmony_ci * pcpu_block_update_scan - update a block given a free area from a scan 6548c2ecf20Sopenharmony_ci * @chunk: chunk of interest 6558c2ecf20Sopenharmony_ci * @bit_off: chunk offset 6568c2ecf20Sopenharmony_ci * @bits: size of free area 6578c2ecf20Sopenharmony_ci * 6588c2ecf20Sopenharmony_ci * Finding the final allocation spot first goes through pcpu_find_block_fit() 6598c2ecf20Sopenharmony_ci * to find a block that can hold the allocation and then pcpu_alloc_area() 6608c2ecf20Sopenharmony_ci * where a scan is used. When allocations require specific alignments, 6618c2ecf20Sopenharmony_ci * we can inadvertently create holes which will not be seen in the alloc 6628c2ecf20Sopenharmony_ci * or free paths. 6638c2ecf20Sopenharmony_ci * 6648c2ecf20Sopenharmony_ci * This takes a given free area hole and updates a block as it may change the 6658c2ecf20Sopenharmony_ci * scan_hint. We need to scan backwards to ensure we don't miss free bits 6668c2ecf20Sopenharmony_ci * from alignment. 6678c2ecf20Sopenharmony_ci */ 6688c2ecf20Sopenharmony_cistatic void pcpu_block_update_scan(struct pcpu_chunk *chunk, int bit_off, 6698c2ecf20Sopenharmony_ci int bits) 6708c2ecf20Sopenharmony_ci{ 6718c2ecf20Sopenharmony_ci int s_off = pcpu_off_to_block_off(bit_off); 6728c2ecf20Sopenharmony_ci int e_off = s_off + bits; 6738c2ecf20Sopenharmony_ci int s_index, l_bit; 6748c2ecf20Sopenharmony_ci struct pcpu_block_md *block; 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci if (e_off > PCPU_BITMAP_BLOCK_BITS) 6778c2ecf20Sopenharmony_ci return; 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci s_index = pcpu_off_to_block_index(bit_off); 6808c2ecf20Sopenharmony_ci block = chunk->md_blocks + s_index; 6818c2ecf20Sopenharmony_ci 6828c2ecf20Sopenharmony_ci /* scan backwards in case of alignment skipping free bits */ 6838c2ecf20Sopenharmony_ci l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index), s_off); 6848c2ecf20Sopenharmony_ci s_off = (s_off == l_bit) ? 0 : l_bit + 1; 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ci pcpu_block_update(block, s_off, e_off); 6878c2ecf20Sopenharmony_ci} 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci/** 6908c2ecf20Sopenharmony_ci * pcpu_chunk_refresh_hint - updates metadata about a chunk 6918c2ecf20Sopenharmony_ci * @chunk: chunk of interest 6928c2ecf20Sopenharmony_ci * @full_scan: if we should scan from the beginning 6938c2ecf20Sopenharmony_ci * 6948c2ecf20Sopenharmony_ci * Iterates over the metadata blocks to find the largest contig area. 6958c2ecf20Sopenharmony_ci * A full scan can be avoided on the allocation path as this is triggered 6968c2ecf20Sopenharmony_ci * if we broke the contig_hint. In doing so, the scan_hint will be before 6978c2ecf20Sopenharmony_ci * the contig_hint or after if the scan_hint == contig_hint. This cannot 6988c2ecf20Sopenharmony_ci * be prevented on freeing as we want to find the largest area possibly 6998c2ecf20Sopenharmony_ci * spanning blocks. 7008c2ecf20Sopenharmony_ci */ 7018c2ecf20Sopenharmony_cistatic void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk, bool full_scan) 7028c2ecf20Sopenharmony_ci{ 7038c2ecf20Sopenharmony_ci struct pcpu_block_md *chunk_md = &chunk->chunk_md; 7048c2ecf20Sopenharmony_ci int bit_off, bits; 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci /* promote scan_hint to contig_hint */ 7078c2ecf20Sopenharmony_ci if (!full_scan && chunk_md->scan_hint) { 7088c2ecf20Sopenharmony_ci bit_off = chunk_md->scan_hint_start + chunk_md->scan_hint; 7098c2ecf20Sopenharmony_ci chunk_md->contig_hint_start = chunk_md->scan_hint_start; 7108c2ecf20Sopenharmony_ci chunk_md->contig_hint = chunk_md->scan_hint; 7118c2ecf20Sopenharmony_ci chunk_md->scan_hint = 0; 7128c2ecf20Sopenharmony_ci } else { 7138c2ecf20Sopenharmony_ci bit_off = chunk_md->first_free; 7148c2ecf20Sopenharmony_ci chunk_md->contig_hint = 0; 7158c2ecf20Sopenharmony_ci } 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci bits = 0; 7188c2ecf20Sopenharmony_ci pcpu_for_each_md_free_region(chunk, bit_off, bits) 7198c2ecf20Sopenharmony_ci pcpu_block_update(chunk_md, bit_off, bit_off + bits); 7208c2ecf20Sopenharmony_ci} 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci/** 7238c2ecf20Sopenharmony_ci * pcpu_block_refresh_hint 7248c2ecf20Sopenharmony_ci * @chunk: chunk of interest 7258c2ecf20Sopenharmony_ci * @index: index of the metadata block 7268c2ecf20Sopenharmony_ci * 7278c2ecf20Sopenharmony_ci * Scans over the block beginning at first_free and updates the block 7288c2ecf20Sopenharmony_ci * metadata accordingly. 7298c2ecf20Sopenharmony_ci */ 7308c2ecf20Sopenharmony_cistatic void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index) 7318c2ecf20Sopenharmony_ci{ 7328c2ecf20Sopenharmony_ci struct pcpu_block_md *block = chunk->md_blocks + index; 7338c2ecf20Sopenharmony_ci unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index); 7348c2ecf20Sopenharmony_ci unsigned int rs, re, start; /* region start, region end */ 7358c2ecf20Sopenharmony_ci 7368c2ecf20Sopenharmony_ci /* promote scan_hint to contig_hint */ 7378c2ecf20Sopenharmony_ci if (block->scan_hint) { 7388c2ecf20Sopenharmony_ci start = block->scan_hint_start + block->scan_hint; 7398c2ecf20Sopenharmony_ci block->contig_hint_start = block->scan_hint_start; 7408c2ecf20Sopenharmony_ci block->contig_hint = block->scan_hint; 7418c2ecf20Sopenharmony_ci block->scan_hint = 0; 7428c2ecf20Sopenharmony_ci } else { 7438c2ecf20Sopenharmony_ci start = block->first_free; 7448c2ecf20Sopenharmony_ci block->contig_hint = 0; 7458c2ecf20Sopenharmony_ci } 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci block->right_free = 0; 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci /* iterate over free areas and update the contig hints */ 7508c2ecf20Sopenharmony_ci bitmap_for_each_clear_region(alloc_map, rs, re, start, 7518c2ecf20Sopenharmony_ci PCPU_BITMAP_BLOCK_BITS) 7528c2ecf20Sopenharmony_ci pcpu_block_update(block, rs, re); 7538c2ecf20Sopenharmony_ci} 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci/** 7568c2ecf20Sopenharmony_ci * pcpu_block_update_hint_alloc - update hint on allocation path 7578c2ecf20Sopenharmony_ci * @chunk: chunk of interest 7588c2ecf20Sopenharmony_ci * @bit_off: chunk offset 7598c2ecf20Sopenharmony_ci * @bits: size of request 7608c2ecf20Sopenharmony_ci * 7618c2ecf20Sopenharmony_ci * Updates metadata for the allocation path. The metadata only has to be 7628c2ecf20Sopenharmony_ci * refreshed by a full scan iff the chunk's contig hint is broken. Block level 7638c2ecf20Sopenharmony_ci * scans are required if the block's contig hint is broken. 7648c2ecf20Sopenharmony_ci */ 7658c2ecf20Sopenharmony_cistatic void pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off, 7668c2ecf20Sopenharmony_ci int bits) 7678c2ecf20Sopenharmony_ci{ 7688c2ecf20Sopenharmony_ci struct pcpu_block_md *chunk_md = &chunk->chunk_md; 7698c2ecf20Sopenharmony_ci int nr_empty_pages = 0; 7708c2ecf20Sopenharmony_ci struct pcpu_block_md *s_block, *e_block, *block; 7718c2ecf20Sopenharmony_ci int s_index, e_index; /* block indexes of the freed allocation */ 7728c2ecf20Sopenharmony_ci int s_off, e_off; /* block offsets of the freed allocation */ 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci /* 7758c2ecf20Sopenharmony_ci * Calculate per block offsets. 7768c2ecf20Sopenharmony_ci * The calculation uses an inclusive range, but the resulting offsets 7778c2ecf20Sopenharmony_ci * are [start, end). e_index always points to the last block in the 7788c2ecf20Sopenharmony_ci * range. 7798c2ecf20Sopenharmony_ci */ 7808c2ecf20Sopenharmony_ci s_index = pcpu_off_to_block_index(bit_off); 7818c2ecf20Sopenharmony_ci e_index = pcpu_off_to_block_index(bit_off + bits - 1); 7828c2ecf20Sopenharmony_ci s_off = pcpu_off_to_block_off(bit_off); 7838c2ecf20Sopenharmony_ci e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1; 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci s_block = chunk->md_blocks + s_index; 7868c2ecf20Sopenharmony_ci e_block = chunk->md_blocks + e_index; 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci /* 7898c2ecf20Sopenharmony_ci * Update s_block. 7908c2ecf20Sopenharmony_ci * block->first_free must be updated if the allocation takes its place. 7918c2ecf20Sopenharmony_ci * If the allocation breaks the contig_hint, a scan is required to 7928c2ecf20Sopenharmony_ci * restore this hint. 7938c2ecf20Sopenharmony_ci */ 7948c2ecf20Sopenharmony_ci if (s_block->contig_hint == PCPU_BITMAP_BLOCK_BITS) 7958c2ecf20Sopenharmony_ci nr_empty_pages++; 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci if (s_off == s_block->first_free) 7988c2ecf20Sopenharmony_ci s_block->first_free = find_next_zero_bit( 7998c2ecf20Sopenharmony_ci pcpu_index_alloc_map(chunk, s_index), 8008c2ecf20Sopenharmony_ci PCPU_BITMAP_BLOCK_BITS, 8018c2ecf20Sopenharmony_ci s_off + bits); 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci if (pcpu_region_overlap(s_block->scan_hint_start, 8048c2ecf20Sopenharmony_ci s_block->scan_hint_start + s_block->scan_hint, 8058c2ecf20Sopenharmony_ci s_off, 8068c2ecf20Sopenharmony_ci s_off + bits)) 8078c2ecf20Sopenharmony_ci s_block->scan_hint = 0; 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci if (pcpu_region_overlap(s_block->contig_hint_start, 8108c2ecf20Sopenharmony_ci s_block->contig_hint_start + 8118c2ecf20Sopenharmony_ci s_block->contig_hint, 8128c2ecf20Sopenharmony_ci s_off, 8138c2ecf20Sopenharmony_ci s_off + bits)) { 8148c2ecf20Sopenharmony_ci /* block contig hint is broken - scan to fix it */ 8158c2ecf20Sopenharmony_ci if (!s_off) 8168c2ecf20Sopenharmony_ci s_block->left_free = 0; 8178c2ecf20Sopenharmony_ci pcpu_block_refresh_hint(chunk, s_index); 8188c2ecf20Sopenharmony_ci } else { 8198c2ecf20Sopenharmony_ci /* update left and right contig manually */ 8208c2ecf20Sopenharmony_ci s_block->left_free = min(s_block->left_free, s_off); 8218c2ecf20Sopenharmony_ci if (s_index == e_index) 8228c2ecf20Sopenharmony_ci s_block->right_free = min_t(int, s_block->right_free, 8238c2ecf20Sopenharmony_ci PCPU_BITMAP_BLOCK_BITS - e_off); 8248c2ecf20Sopenharmony_ci else 8258c2ecf20Sopenharmony_ci s_block->right_free = 0; 8268c2ecf20Sopenharmony_ci } 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci /* 8298c2ecf20Sopenharmony_ci * Update e_block. 8308c2ecf20Sopenharmony_ci */ 8318c2ecf20Sopenharmony_ci if (s_index != e_index) { 8328c2ecf20Sopenharmony_ci if (e_block->contig_hint == PCPU_BITMAP_BLOCK_BITS) 8338c2ecf20Sopenharmony_ci nr_empty_pages++; 8348c2ecf20Sopenharmony_ci 8358c2ecf20Sopenharmony_ci /* 8368c2ecf20Sopenharmony_ci * When the allocation is across blocks, the end is along 8378c2ecf20Sopenharmony_ci * the left part of the e_block. 8388c2ecf20Sopenharmony_ci */ 8398c2ecf20Sopenharmony_ci e_block->first_free = find_next_zero_bit( 8408c2ecf20Sopenharmony_ci pcpu_index_alloc_map(chunk, e_index), 8418c2ecf20Sopenharmony_ci PCPU_BITMAP_BLOCK_BITS, e_off); 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_ci if (e_off == PCPU_BITMAP_BLOCK_BITS) { 8448c2ecf20Sopenharmony_ci /* reset the block */ 8458c2ecf20Sopenharmony_ci e_block++; 8468c2ecf20Sopenharmony_ci } else { 8478c2ecf20Sopenharmony_ci if (e_off > e_block->scan_hint_start) 8488c2ecf20Sopenharmony_ci e_block->scan_hint = 0; 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_ci e_block->left_free = 0; 8518c2ecf20Sopenharmony_ci if (e_off > e_block->contig_hint_start) { 8528c2ecf20Sopenharmony_ci /* contig hint is broken - scan to fix it */ 8538c2ecf20Sopenharmony_ci pcpu_block_refresh_hint(chunk, e_index); 8548c2ecf20Sopenharmony_ci } else { 8558c2ecf20Sopenharmony_ci e_block->right_free = 8568c2ecf20Sopenharmony_ci min_t(int, e_block->right_free, 8578c2ecf20Sopenharmony_ci PCPU_BITMAP_BLOCK_BITS - e_off); 8588c2ecf20Sopenharmony_ci } 8598c2ecf20Sopenharmony_ci } 8608c2ecf20Sopenharmony_ci 8618c2ecf20Sopenharmony_ci /* update in-between md_blocks */ 8628c2ecf20Sopenharmony_ci nr_empty_pages += (e_index - s_index - 1); 8638c2ecf20Sopenharmony_ci for (block = s_block + 1; block < e_block; block++) { 8648c2ecf20Sopenharmony_ci block->scan_hint = 0; 8658c2ecf20Sopenharmony_ci block->contig_hint = 0; 8668c2ecf20Sopenharmony_ci block->left_free = 0; 8678c2ecf20Sopenharmony_ci block->right_free = 0; 8688c2ecf20Sopenharmony_ci } 8698c2ecf20Sopenharmony_ci } 8708c2ecf20Sopenharmony_ci 8718c2ecf20Sopenharmony_ci if (nr_empty_pages) 8728c2ecf20Sopenharmony_ci pcpu_update_empty_pages(chunk, -nr_empty_pages); 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci if (pcpu_region_overlap(chunk_md->scan_hint_start, 8758c2ecf20Sopenharmony_ci chunk_md->scan_hint_start + 8768c2ecf20Sopenharmony_ci chunk_md->scan_hint, 8778c2ecf20Sopenharmony_ci bit_off, 8788c2ecf20Sopenharmony_ci bit_off + bits)) 8798c2ecf20Sopenharmony_ci chunk_md->scan_hint = 0; 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci /* 8828c2ecf20Sopenharmony_ci * The only time a full chunk scan is required is if the chunk 8838c2ecf20Sopenharmony_ci * contig hint is broken. Otherwise, it means a smaller space 8848c2ecf20Sopenharmony_ci * was used and therefore the chunk contig hint is still correct. 8858c2ecf20Sopenharmony_ci */ 8868c2ecf20Sopenharmony_ci if (pcpu_region_overlap(chunk_md->contig_hint_start, 8878c2ecf20Sopenharmony_ci chunk_md->contig_hint_start + 8888c2ecf20Sopenharmony_ci chunk_md->contig_hint, 8898c2ecf20Sopenharmony_ci bit_off, 8908c2ecf20Sopenharmony_ci bit_off + bits)) 8918c2ecf20Sopenharmony_ci pcpu_chunk_refresh_hint(chunk, false); 8928c2ecf20Sopenharmony_ci} 8938c2ecf20Sopenharmony_ci 8948c2ecf20Sopenharmony_ci/** 8958c2ecf20Sopenharmony_ci * pcpu_block_update_hint_free - updates the block hints on the free path 8968c2ecf20Sopenharmony_ci * @chunk: chunk of interest 8978c2ecf20Sopenharmony_ci * @bit_off: chunk offset 8988c2ecf20Sopenharmony_ci * @bits: size of request 8998c2ecf20Sopenharmony_ci * 9008c2ecf20Sopenharmony_ci * Updates metadata for the allocation path. This avoids a blind block 9018c2ecf20Sopenharmony_ci * refresh by making use of the block contig hints. If this fails, it scans 9028c2ecf20Sopenharmony_ci * forward and backward to determine the extent of the free area. This is 9038c2ecf20Sopenharmony_ci * capped at the boundary of blocks. 9048c2ecf20Sopenharmony_ci * 9058c2ecf20Sopenharmony_ci * A chunk update is triggered if a page becomes free, a block becomes free, 9068c2ecf20Sopenharmony_ci * or the free spans across blocks. This tradeoff is to minimize iterating 9078c2ecf20Sopenharmony_ci * over the block metadata to update chunk_md->contig_hint. 9088c2ecf20Sopenharmony_ci * chunk_md->contig_hint may be off by up to a page, but it will never be more 9098c2ecf20Sopenharmony_ci * than the available space. If the contig hint is contained in one block, it 9108c2ecf20Sopenharmony_ci * will be accurate. 9118c2ecf20Sopenharmony_ci */ 9128c2ecf20Sopenharmony_cistatic void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off, 9138c2ecf20Sopenharmony_ci int bits) 9148c2ecf20Sopenharmony_ci{ 9158c2ecf20Sopenharmony_ci int nr_empty_pages = 0; 9168c2ecf20Sopenharmony_ci struct pcpu_block_md *s_block, *e_block, *block; 9178c2ecf20Sopenharmony_ci int s_index, e_index; /* block indexes of the freed allocation */ 9188c2ecf20Sopenharmony_ci int s_off, e_off; /* block offsets of the freed allocation */ 9198c2ecf20Sopenharmony_ci int start, end; /* start and end of the whole free area */ 9208c2ecf20Sopenharmony_ci 9218c2ecf20Sopenharmony_ci /* 9228c2ecf20Sopenharmony_ci * Calculate per block offsets. 9238c2ecf20Sopenharmony_ci * The calculation uses an inclusive range, but the resulting offsets 9248c2ecf20Sopenharmony_ci * are [start, end). e_index always points to the last block in the 9258c2ecf20Sopenharmony_ci * range. 9268c2ecf20Sopenharmony_ci */ 9278c2ecf20Sopenharmony_ci s_index = pcpu_off_to_block_index(bit_off); 9288c2ecf20Sopenharmony_ci e_index = pcpu_off_to_block_index(bit_off + bits - 1); 9298c2ecf20Sopenharmony_ci s_off = pcpu_off_to_block_off(bit_off); 9308c2ecf20Sopenharmony_ci e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1; 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci s_block = chunk->md_blocks + s_index; 9338c2ecf20Sopenharmony_ci e_block = chunk->md_blocks + e_index; 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci /* 9368c2ecf20Sopenharmony_ci * Check if the freed area aligns with the block->contig_hint. 9378c2ecf20Sopenharmony_ci * If it does, then the scan to find the beginning/end of the 9388c2ecf20Sopenharmony_ci * larger free area can be avoided. 9398c2ecf20Sopenharmony_ci * 9408c2ecf20Sopenharmony_ci * start and end refer to beginning and end of the free area 9418c2ecf20Sopenharmony_ci * within each their respective blocks. This is not necessarily 9428c2ecf20Sopenharmony_ci * the entire free area as it may span blocks past the beginning 9438c2ecf20Sopenharmony_ci * or end of the block. 9448c2ecf20Sopenharmony_ci */ 9458c2ecf20Sopenharmony_ci start = s_off; 9468c2ecf20Sopenharmony_ci if (s_off == s_block->contig_hint + s_block->contig_hint_start) { 9478c2ecf20Sopenharmony_ci start = s_block->contig_hint_start; 9488c2ecf20Sopenharmony_ci } else { 9498c2ecf20Sopenharmony_ci /* 9508c2ecf20Sopenharmony_ci * Scan backwards to find the extent of the free area. 9518c2ecf20Sopenharmony_ci * find_last_bit returns the starting bit, so if the start bit 9528c2ecf20Sopenharmony_ci * is returned, that means there was no last bit and the 9538c2ecf20Sopenharmony_ci * remainder of the chunk is free. 9548c2ecf20Sopenharmony_ci */ 9558c2ecf20Sopenharmony_ci int l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index), 9568c2ecf20Sopenharmony_ci start); 9578c2ecf20Sopenharmony_ci start = (start == l_bit) ? 0 : l_bit + 1; 9588c2ecf20Sopenharmony_ci } 9598c2ecf20Sopenharmony_ci 9608c2ecf20Sopenharmony_ci end = e_off; 9618c2ecf20Sopenharmony_ci if (e_off == e_block->contig_hint_start) 9628c2ecf20Sopenharmony_ci end = e_block->contig_hint_start + e_block->contig_hint; 9638c2ecf20Sopenharmony_ci else 9648c2ecf20Sopenharmony_ci end = find_next_bit(pcpu_index_alloc_map(chunk, e_index), 9658c2ecf20Sopenharmony_ci PCPU_BITMAP_BLOCK_BITS, end); 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_ci /* update s_block */ 9688c2ecf20Sopenharmony_ci e_off = (s_index == e_index) ? end : PCPU_BITMAP_BLOCK_BITS; 9698c2ecf20Sopenharmony_ci if (!start && e_off == PCPU_BITMAP_BLOCK_BITS) 9708c2ecf20Sopenharmony_ci nr_empty_pages++; 9718c2ecf20Sopenharmony_ci pcpu_block_update(s_block, start, e_off); 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci /* freeing in the same block */ 9748c2ecf20Sopenharmony_ci if (s_index != e_index) { 9758c2ecf20Sopenharmony_ci /* update e_block */ 9768c2ecf20Sopenharmony_ci if (end == PCPU_BITMAP_BLOCK_BITS) 9778c2ecf20Sopenharmony_ci nr_empty_pages++; 9788c2ecf20Sopenharmony_ci pcpu_block_update(e_block, 0, end); 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci /* reset md_blocks in the middle */ 9818c2ecf20Sopenharmony_ci nr_empty_pages += (e_index - s_index - 1); 9828c2ecf20Sopenharmony_ci for (block = s_block + 1; block < e_block; block++) { 9838c2ecf20Sopenharmony_ci block->first_free = 0; 9848c2ecf20Sopenharmony_ci block->scan_hint = 0; 9858c2ecf20Sopenharmony_ci block->contig_hint_start = 0; 9868c2ecf20Sopenharmony_ci block->contig_hint = PCPU_BITMAP_BLOCK_BITS; 9878c2ecf20Sopenharmony_ci block->left_free = PCPU_BITMAP_BLOCK_BITS; 9888c2ecf20Sopenharmony_ci block->right_free = PCPU_BITMAP_BLOCK_BITS; 9898c2ecf20Sopenharmony_ci } 9908c2ecf20Sopenharmony_ci } 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_ci if (nr_empty_pages) 9938c2ecf20Sopenharmony_ci pcpu_update_empty_pages(chunk, nr_empty_pages); 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci /* 9968c2ecf20Sopenharmony_ci * Refresh chunk metadata when the free makes a block free or spans 9978c2ecf20Sopenharmony_ci * across blocks. The contig_hint may be off by up to a page, but if 9988c2ecf20Sopenharmony_ci * the contig_hint is contained in a block, it will be accurate with 9998c2ecf20Sopenharmony_ci * the else condition below. 10008c2ecf20Sopenharmony_ci */ 10018c2ecf20Sopenharmony_ci if (((end - start) >= PCPU_BITMAP_BLOCK_BITS) || s_index != e_index) 10028c2ecf20Sopenharmony_ci pcpu_chunk_refresh_hint(chunk, true); 10038c2ecf20Sopenharmony_ci else 10048c2ecf20Sopenharmony_ci pcpu_block_update(&chunk->chunk_md, 10058c2ecf20Sopenharmony_ci pcpu_block_off_to_off(s_index, start), 10068c2ecf20Sopenharmony_ci end); 10078c2ecf20Sopenharmony_ci} 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci/** 10108c2ecf20Sopenharmony_ci * pcpu_is_populated - determines if the region is populated 10118c2ecf20Sopenharmony_ci * @chunk: chunk of interest 10128c2ecf20Sopenharmony_ci * @bit_off: chunk offset 10138c2ecf20Sopenharmony_ci * @bits: size of area 10148c2ecf20Sopenharmony_ci * @next_off: return value for the next offset to start searching 10158c2ecf20Sopenharmony_ci * 10168c2ecf20Sopenharmony_ci * For atomic allocations, check if the backing pages are populated. 10178c2ecf20Sopenharmony_ci * 10188c2ecf20Sopenharmony_ci * RETURNS: 10198c2ecf20Sopenharmony_ci * Bool if the backing pages are populated. 10208c2ecf20Sopenharmony_ci * next_index is to skip over unpopulated blocks in pcpu_find_block_fit. 10218c2ecf20Sopenharmony_ci */ 10228c2ecf20Sopenharmony_cistatic bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits, 10238c2ecf20Sopenharmony_ci int *next_off) 10248c2ecf20Sopenharmony_ci{ 10258c2ecf20Sopenharmony_ci unsigned int page_start, page_end, rs, re; 10268c2ecf20Sopenharmony_ci 10278c2ecf20Sopenharmony_ci page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE); 10288c2ecf20Sopenharmony_ci page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_ci rs = page_start; 10318c2ecf20Sopenharmony_ci bitmap_next_clear_region(chunk->populated, &rs, &re, page_end); 10328c2ecf20Sopenharmony_ci if (rs >= page_end) 10338c2ecf20Sopenharmony_ci return true; 10348c2ecf20Sopenharmony_ci 10358c2ecf20Sopenharmony_ci *next_off = re * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; 10368c2ecf20Sopenharmony_ci return false; 10378c2ecf20Sopenharmony_ci} 10388c2ecf20Sopenharmony_ci 10398c2ecf20Sopenharmony_ci/** 10408c2ecf20Sopenharmony_ci * pcpu_find_block_fit - finds the block index to start searching 10418c2ecf20Sopenharmony_ci * @chunk: chunk of interest 10428c2ecf20Sopenharmony_ci * @alloc_bits: size of request in allocation units 10438c2ecf20Sopenharmony_ci * @align: alignment of area (max PAGE_SIZE bytes) 10448c2ecf20Sopenharmony_ci * @pop_only: use populated regions only 10458c2ecf20Sopenharmony_ci * 10468c2ecf20Sopenharmony_ci * Given a chunk and an allocation spec, find the offset to begin searching 10478c2ecf20Sopenharmony_ci * for a free region. This iterates over the bitmap metadata blocks to 10488c2ecf20Sopenharmony_ci * find an offset that will be guaranteed to fit the requirements. It is 10498c2ecf20Sopenharmony_ci * not quite first fit as if the allocation does not fit in the contig hint 10508c2ecf20Sopenharmony_ci * of a block or chunk, it is skipped. This errs on the side of caution 10518c2ecf20Sopenharmony_ci * to prevent excess iteration. Poor alignment can cause the allocator to 10528c2ecf20Sopenharmony_ci * skip over blocks and chunks that have valid free areas. 10538c2ecf20Sopenharmony_ci * 10548c2ecf20Sopenharmony_ci * RETURNS: 10558c2ecf20Sopenharmony_ci * The offset in the bitmap to begin searching. 10568c2ecf20Sopenharmony_ci * -1 if no offset is found. 10578c2ecf20Sopenharmony_ci */ 10588c2ecf20Sopenharmony_cistatic int pcpu_find_block_fit(struct pcpu_chunk *chunk, int alloc_bits, 10598c2ecf20Sopenharmony_ci size_t align, bool pop_only) 10608c2ecf20Sopenharmony_ci{ 10618c2ecf20Sopenharmony_ci struct pcpu_block_md *chunk_md = &chunk->chunk_md; 10628c2ecf20Sopenharmony_ci int bit_off, bits, next_off; 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci /* 10658c2ecf20Sopenharmony_ci * Check to see if the allocation can fit in the chunk's contig hint. 10668c2ecf20Sopenharmony_ci * This is an optimization to prevent scanning by assuming if it 10678c2ecf20Sopenharmony_ci * cannot fit in the global hint, there is memory pressure and creating 10688c2ecf20Sopenharmony_ci * a new chunk would happen soon. 10698c2ecf20Sopenharmony_ci */ 10708c2ecf20Sopenharmony_ci bit_off = ALIGN(chunk_md->contig_hint_start, align) - 10718c2ecf20Sopenharmony_ci chunk_md->contig_hint_start; 10728c2ecf20Sopenharmony_ci if (bit_off + alloc_bits > chunk_md->contig_hint) 10738c2ecf20Sopenharmony_ci return -1; 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci bit_off = pcpu_next_hint(chunk_md, alloc_bits); 10768c2ecf20Sopenharmony_ci bits = 0; 10778c2ecf20Sopenharmony_ci pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) { 10788c2ecf20Sopenharmony_ci if (!pop_only || pcpu_is_populated(chunk, bit_off, bits, 10798c2ecf20Sopenharmony_ci &next_off)) 10808c2ecf20Sopenharmony_ci break; 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci bit_off = next_off; 10838c2ecf20Sopenharmony_ci bits = 0; 10848c2ecf20Sopenharmony_ci } 10858c2ecf20Sopenharmony_ci 10868c2ecf20Sopenharmony_ci if (bit_off == pcpu_chunk_map_bits(chunk)) 10878c2ecf20Sopenharmony_ci return -1; 10888c2ecf20Sopenharmony_ci 10898c2ecf20Sopenharmony_ci return bit_off; 10908c2ecf20Sopenharmony_ci} 10918c2ecf20Sopenharmony_ci 10928c2ecf20Sopenharmony_ci/* 10938c2ecf20Sopenharmony_ci * pcpu_find_zero_area - modified from bitmap_find_next_zero_area_off() 10948c2ecf20Sopenharmony_ci * @map: the address to base the search on 10958c2ecf20Sopenharmony_ci * @size: the bitmap size in bits 10968c2ecf20Sopenharmony_ci * @start: the bitnumber to start searching at 10978c2ecf20Sopenharmony_ci * @nr: the number of zeroed bits we're looking for 10988c2ecf20Sopenharmony_ci * @align_mask: alignment mask for zero area 10998c2ecf20Sopenharmony_ci * @largest_off: offset of the largest area skipped 11008c2ecf20Sopenharmony_ci * @largest_bits: size of the largest area skipped 11018c2ecf20Sopenharmony_ci * 11028c2ecf20Sopenharmony_ci * The @align_mask should be one less than a power of 2. 11038c2ecf20Sopenharmony_ci * 11048c2ecf20Sopenharmony_ci * This is a modified version of bitmap_find_next_zero_area_off() to remember 11058c2ecf20Sopenharmony_ci * the largest area that was skipped. This is imperfect, but in general is 11068c2ecf20Sopenharmony_ci * good enough. The largest remembered region is the largest failed region 11078c2ecf20Sopenharmony_ci * seen. This does not include anything we possibly skipped due to alignment. 11088c2ecf20Sopenharmony_ci * pcpu_block_update_scan() does scan backwards to try and recover what was 11098c2ecf20Sopenharmony_ci * lost to alignment. While this can cause scanning to miss earlier possible 11108c2ecf20Sopenharmony_ci * free areas, smaller allocations will eventually fill those holes. 11118c2ecf20Sopenharmony_ci */ 11128c2ecf20Sopenharmony_cistatic unsigned long pcpu_find_zero_area(unsigned long *map, 11138c2ecf20Sopenharmony_ci unsigned long size, 11148c2ecf20Sopenharmony_ci unsigned long start, 11158c2ecf20Sopenharmony_ci unsigned long nr, 11168c2ecf20Sopenharmony_ci unsigned long align_mask, 11178c2ecf20Sopenharmony_ci unsigned long *largest_off, 11188c2ecf20Sopenharmony_ci unsigned long *largest_bits) 11198c2ecf20Sopenharmony_ci{ 11208c2ecf20Sopenharmony_ci unsigned long index, end, i, area_off, area_bits; 11218c2ecf20Sopenharmony_ciagain: 11228c2ecf20Sopenharmony_ci index = find_next_zero_bit(map, size, start); 11238c2ecf20Sopenharmony_ci 11248c2ecf20Sopenharmony_ci /* Align allocation */ 11258c2ecf20Sopenharmony_ci index = __ALIGN_MASK(index, align_mask); 11268c2ecf20Sopenharmony_ci area_off = index; 11278c2ecf20Sopenharmony_ci 11288c2ecf20Sopenharmony_ci end = index + nr; 11298c2ecf20Sopenharmony_ci if (end > size) 11308c2ecf20Sopenharmony_ci return end; 11318c2ecf20Sopenharmony_ci i = find_next_bit(map, end, index); 11328c2ecf20Sopenharmony_ci if (i < end) { 11338c2ecf20Sopenharmony_ci area_bits = i - area_off; 11348c2ecf20Sopenharmony_ci /* remember largest unused area with best alignment */ 11358c2ecf20Sopenharmony_ci if (area_bits > *largest_bits || 11368c2ecf20Sopenharmony_ci (area_bits == *largest_bits && *largest_off && 11378c2ecf20Sopenharmony_ci (!area_off || __ffs(area_off) > __ffs(*largest_off)))) { 11388c2ecf20Sopenharmony_ci *largest_off = area_off; 11398c2ecf20Sopenharmony_ci *largest_bits = area_bits; 11408c2ecf20Sopenharmony_ci } 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci start = i + 1; 11438c2ecf20Sopenharmony_ci goto again; 11448c2ecf20Sopenharmony_ci } 11458c2ecf20Sopenharmony_ci return index; 11468c2ecf20Sopenharmony_ci} 11478c2ecf20Sopenharmony_ci 11488c2ecf20Sopenharmony_ci/** 11498c2ecf20Sopenharmony_ci * pcpu_alloc_area - allocates an area from a pcpu_chunk 11508c2ecf20Sopenharmony_ci * @chunk: chunk of interest 11518c2ecf20Sopenharmony_ci * @alloc_bits: size of request in allocation units 11528c2ecf20Sopenharmony_ci * @align: alignment of area (max PAGE_SIZE) 11538c2ecf20Sopenharmony_ci * @start: bit_off to start searching 11548c2ecf20Sopenharmony_ci * 11558c2ecf20Sopenharmony_ci * This function takes in a @start offset to begin searching to fit an 11568c2ecf20Sopenharmony_ci * allocation of @alloc_bits with alignment @align. It needs to scan 11578c2ecf20Sopenharmony_ci * the allocation map because if it fits within the block's contig hint, 11588c2ecf20Sopenharmony_ci * @start will be block->first_free. This is an attempt to fill the 11598c2ecf20Sopenharmony_ci * allocation prior to breaking the contig hint. The allocation and 11608c2ecf20Sopenharmony_ci * boundary maps are updated accordingly if it confirms a valid 11618c2ecf20Sopenharmony_ci * free area. 11628c2ecf20Sopenharmony_ci * 11638c2ecf20Sopenharmony_ci * RETURNS: 11648c2ecf20Sopenharmony_ci * Allocated addr offset in @chunk on success. 11658c2ecf20Sopenharmony_ci * -1 if no matching area is found. 11668c2ecf20Sopenharmony_ci */ 11678c2ecf20Sopenharmony_cistatic int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits, 11688c2ecf20Sopenharmony_ci size_t align, int start) 11698c2ecf20Sopenharmony_ci{ 11708c2ecf20Sopenharmony_ci struct pcpu_block_md *chunk_md = &chunk->chunk_md; 11718c2ecf20Sopenharmony_ci size_t align_mask = (align) ? (align - 1) : 0; 11728c2ecf20Sopenharmony_ci unsigned long area_off = 0, area_bits = 0; 11738c2ecf20Sopenharmony_ci int bit_off, end, oslot; 11748c2ecf20Sopenharmony_ci 11758c2ecf20Sopenharmony_ci lockdep_assert_held(&pcpu_lock); 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ci oslot = pcpu_chunk_slot(chunk); 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci /* 11808c2ecf20Sopenharmony_ci * Search to find a fit. 11818c2ecf20Sopenharmony_ci */ 11828c2ecf20Sopenharmony_ci end = min_t(int, start + alloc_bits + PCPU_BITMAP_BLOCK_BITS, 11838c2ecf20Sopenharmony_ci pcpu_chunk_map_bits(chunk)); 11848c2ecf20Sopenharmony_ci bit_off = pcpu_find_zero_area(chunk->alloc_map, end, start, alloc_bits, 11858c2ecf20Sopenharmony_ci align_mask, &area_off, &area_bits); 11868c2ecf20Sopenharmony_ci if (bit_off >= end) 11878c2ecf20Sopenharmony_ci return -1; 11888c2ecf20Sopenharmony_ci 11898c2ecf20Sopenharmony_ci if (area_bits) 11908c2ecf20Sopenharmony_ci pcpu_block_update_scan(chunk, area_off, area_bits); 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_ci /* update alloc map */ 11938c2ecf20Sopenharmony_ci bitmap_set(chunk->alloc_map, bit_off, alloc_bits); 11948c2ecf20Sopenharmony_ci 11958c2ecf20Sopenharmony_ci /* update boundary map */ 11968c2ecf20Sopenharmony_ci set_bit(bit_off, chunk->bound_map); 11978c2ecf20Sopenharmony_ci bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1); 11988c2ecf20Sopenharmony_ci set_bit(bit_off + alloc_bits, chunk->bound_map); 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE; 12018c2ecf20Sopenharmony_ci 12028c2ecf20Sopenharmony_ci /* update first free bit */ 12038c2ecf20Sopenharmony_ci if (bit_off == chunk_md->first_free) 12048c2ecf20Sopenharmony_ci chunk_md->first_free = find_next_zero_bit( 12058c2ecf20Sopenharmony_ci chunk->alloc_map, 12068c2ecf20Sopenharmony_ci pcpu_chunk_map_bits(chunk), 12078c2ecf20Sopenharmony_ci bit_off + alloc_bits); 12088c2ecf20Sopenharmony_ci 12098c2ecf20Sopenharmony_ci pcpu_block_update_hint_alloc(chunk, bit_off, alloc_bits); 12108c2ecf20Sopenharmony_ci 12118c2ecf20Sopenharmony_ci pcpu_chunk_relocate(chunk, oslot); 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_ci return bit_off * PCPU_MIN_ALLOC_SIZE; 12148c2ecf20Sopenharmony_ci} 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_ci/** 12178c2ecf20Sopenharmony_ci * pcpu_free_area - frees the corresponding offset 12188c2ecf20Sopenharmony_ci * @chunk: chunk of interest 12198c2ecf20Sopenharmony_ci * @off: addr offset into chunk 12208c2ecf20Sopenharmony_ci * 12218c2ecf20Sopenharmony_ci * This function determines the size of an allocation to free using 12228c2ecf20Sopenharmony_ci * the boundary bitmap and clears the allocation map. 12238c2ecf20Sopenharmony_ci * 12248c2ecf20Sopenharmony_ci * RETURNS: 12258c2ecf20Sopenharmony_ci * Number of freed bytes. 12268c2ecf20Sopenharmony_ci */ 12278c2ecf20Sopenharmony_cistatic int pcpu_free_area(struct pcpu_chunk *chunk, int off) 12288c2ecf20Sopenharmony_ci{ 12298c2ecf20Sopenharmony_ci struct pcpu_block_md *chunk_md = &chunk->chunk_md; 12308c2ecf20Sopenharmony_ci int bit_off, bits, end, oslot, freed; 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ci lockdep_assert_held(&pcpu_lock); 12338c2ecf20Sopenharmony_ci pcpu_stats_area_dealloc(chunk); 12348c2ecf20Sopenharmony_ci 12358c2ecf20Sopenharmony_ci oslot = pcpu_chunk_slot(chunk); 12368c2ecf20Sopenharmony_ci 12378c2ecf20Sopenharmony_ci bit_off = off / PCPU_MIN_ALLOC_SIZE; 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci /* find end index */ 12408c2ecf20Sopenharmony_ci end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk), 12418c2ecf20Sopenharmony_ci bit_off + 1); 12428c2ecf20Sopenharmony_ci bits = end - bit_off; 12438c2ecf20Sopenharmony_ci bitmap_clear(chunk->alloc_map, bit_off, bits); 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci freed = bits * PCPU_MIN_ALLOC_SIZE; 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci /* update metadata */ 12488c2ecf20Sopenharmony_ci chunk->free_bytes += freed; 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci /* update first free bit */ 12518c2ecf20Sopenharmony_ci chunk_md->first_free = min(chunk_md->first_free, bit_off); 12528c2ecf20Sopenharmony_ci 12538c2ecf20Sopenharmony_ci pcpu_block_update_hint_free(chunk, bit_off, bits); 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_ci pcpu_chunk_relocate(chunk, oslot); 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_ci return freed; 12588c2ecf20Sopenharmony_ci} 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_cistatic void pcpu_init_md_block(struct pcpu_block_md *block, int nr_bits) 12618c2ecf20Sopenharmony_ci{ 12628c2ecf20Sopenharmony_ci block->scan_hint = 0; 12638c2ecf20Sopenharmony_ci block->contig_hint = nr_bits; 12648c2ecf20Sopenharmony_ci block->left_free = nr_bits; 12658c2ecf20Sopenharmony_ci block->right_free = nr_bits; 12668c2ecf20Sopenharmony_ci block->first_free = 0; 12678c2ecf20Sopenharmony_ci block->nr_bits = nr_bits; 12688c2ecf20Sopenharmony_ci} 12698c2ecf20Sopenharmony_ci 12708c2ecf20Sopenharmony_cistatic void pcpu_init_md_blocks(struct pcpu_chunk *chunk) 12718c2ecf20Sopenharmony_ci{ 12728c2ecf20Sopenharmony_ci struct pcpu_block_md *md_block; 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci /* init the chunk's block */ 12758c2ecf20Sopenharmony_ci pcpu_init_md_block(&chunk->chunk_md, pcpu_chunk_map_bits(chunk)); 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci for (md_block = chunk->md_blocks; 12788c2ecf20Sopenharmony_ci md_block != chunk->md_blocks + pcpu_chunk_nr_blocks(chunk); 12798c2ecf20Sopenharmony_ci md_block++) 12808c2ecf20Sopenharmony_ci pcpu_init_md_block(md_block, PCPU_BITMAP_BLOCK_BITS); 12818c2ecf20Sopenharmony_ci} 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_ci/** 12848c2ecf20Sopenharmony_ci * pcpu_alloc_first_chunk - creates chunks that serve the first chunk 12858c2ecf20Sopenharmony_ci * @tmp_addr: the start of the region served 12868c2ecf20Sopenharmony_ci * @map_size: size of the region served 12878c2ecf20Sopenharmony_ci * 12888c2ecf20Sopenharmony_ci * This is responsible for creating the chunks that serve the first chunk. The 12898c2ecf20Sopenharmony_ci * base_addr is page aligned down of @tmp_addr while the region end is page 12908c2ecf20Sopenharmony_ci * aligned up. Offsets are kept track of to determine the region served. All 12918c2ecf20Sopenharmony_ci * this is done to appease the bitmap allocator in avoiding partial blocks. 12928c2ecf20Sopenharmony_ci * 12938c2ecf20Sopenharmony_ci * RETURNS: 12948c2ecf20Sopenharmony_ci * Chunk serving the region at @tmp_addr of @map_size. 12958c2ecf20Sopenharmony_ci */ 12968c2ecf20Sopenharmony_cistatic struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, 12978c2ecf20Sopenharmony_ci int map_size) 12988c2ecf20Sopenharmony_ci{ 12998c2ecf20Sopenharmony_ci struct pcpu_chunk *chunk; 13008c2ecf20Sopenharmony_ci unsigned long aligned_addr, lcm_align; 13018c2ecf20Sopenharmony_ci int start_offset, offset_bits, region_size, region_bits; 13028c2ecf20Sopenharmony_ci size_t alloc_size; 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci /* region calculations */ 13058c2ecf20Sopenharmony_ci aligned_addr = tmp_addr & PAGE_MASK; 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_ci start_offset = tmp_addr - aligned_addr; 13088c2ecf20Sopenharmony_ci 13098c2ecf20Sopenharmony_ci /* 13108c2ecf20Sopenharmony_ci * Align the end of the region with the LCM of PAGE_SIZE and 13118c2ecf20Sopenharmony_ci * PCPU_BITMAP_BLOCK_SIZE. One of these constants is a multiple of 13128c2ecf20Sopenharmony_ci * the other. 13138c2ecf20Sopenharmony_ci */ 13148c2ecf20Sopenharmony_ci lcm_align = lcm(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE); 13158c2ecf20Sopenharmony_ci region_size = ALIGN(start_offset + map_size, lcm_align); 13168c2ecf20Sopenharmony_ci 13178c2ecf20Sopenharmony_ci /* allocate chunk */ 13188c2ecf20Sopenharmony_ci alloc_size = struct_size(chunk, populated, 13198c2ecf20Sopenharmony_ci BITS_TO_LONGS(region_size >> PAGE_SHIFT)); 13208c2ecf20Sopenharmony_ci chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES); 13218c2ecf20Sopenharmony_ci if (!chunk) 13228c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 13238c2ecf20Sopenharmony_ci alloc_size); 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&chunk->list); 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_ci chunk->base_addr = (void *)aligned_addr; 13288c2ecf20Sopenharmony_ci chunk->start_offset = start_offset; 13298c2ecf20Sopenharmony_ci chunk->end_offset = region_size - chunk->start_offset - map_size; 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_ci chunk->nr_pages = region_size >> PAGE_SHIFT; 13328c2ecf20Sopenharmony_ci region_bits = pcpu_chunk_map_bits(chunk); 13338c2ecf20Sopenharmony_ci 13348c2ecf20Sopenharmony_ci alloc_size = BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]); 13358c2ecf20Sopenharmony_ci chunk->alloc_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES); 13368c2ecf20Sopenharmony_ci if (!chunk->alloc_map) 13378c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 13388c2ecf20Sopenharmony_ci alloc_size); 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_ci alloc_size = 13418c2ecf20Sopenharmony_ci BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]); 13428c2ecf20Sopenharmony_ci chunk->bound_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES); 13438c2ecf20Sopenharmony_ci if (!chunk->bound_map) 13448c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 13458c2ecf20Sopenharmony_ci alloc_size); 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci alloc_size = pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]); 13488c2ecf20Sopenharmony_ci chunk->md_blocks = memblock_alloc(alloc_size, SMP_CACHE_BYTES); 13498c2ecf20Sopenharmony_ci if (!chunk->md_blocks) 13508c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 13518c2ecf20Sopenharmony_ci alloc_size); 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG_KMEM 13548c2ecf20Sopenharmony_ci /* first chunk isn't memcg-aware */ 13558c2ecf20Sopenharmony_ci chunk->obj_cgroups = NULL; 13568c2ecf20Sopenharmony_ci#endif 13578c2ecf20Sopenharmony_ci pcpu_init_md_blocks(chunk); 13588c2ecf20Sopenharmony_ci 13598c2ecf20Sopenharmony_ci /* manage populated page bitmap */ 13608c2ecf20Sopenharmony_ci chunk->immutable = true; 13618c2ecf20Sopenharmony_ci bitmap_fill(chunk->populated, chunk->nr_pages); 13628c2ecf20Sopenharmony_ci chunk->nr_populated = chunk->nr_pages; 13638c2ecf20Sopenharmony_ci chunk->nr_empty_pop_pages = chunk->nr_pages; 13648c2ecf20Sopenharmony_ci 13658c2ecf20Sopenharmony_ci chunk->free_bytes = map_size; 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci if (chunk->start_offset) { 13688c2ecf20Sopenharmony_ci /* hide the beginning of the bitmap */ 13698c2ecf20Sopenharmony_ci offset_bits = chunk->start_offset / PCPU_MIN_ALLOC_SIZE; 13708c2ecf20Sopenharmony_ci bitmap_set(chunk->alloc_map, 0, offset_bits); 13718c2ecf20Sopenharmony_ci set_bit(0, chunk->bound_map); 13728c2ecf20Sopenharmony_ci set_bit(offset_bits, chunk->bound_map); 13738c2ecf20Sopenharmony_ci 13748c2ecf20Sopenharmony_ci chunk->chunk_md.first_free = offset_bits; 13758c2ecf20Sopenharmony_ci 13768c2ecf20Sopenharmony_ci pcpu_block_update_hint_alloc(chunk, 0, offset_bits); 13778c2ecf20Sopenharmony_ci } 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_ci if (chunk->end_offset) { 13808c2ecf20Sopenharmony_ci /* hide the end of the bitmap */ 13818c2ecf20Sopenharmony_ci offset_bits = chunk->end_offset / PCPU_MIN_ALLOC_SIZE; 13828c2ecf20Sopenharmony_ci bitmap_set(chunk->alloc_map, 13838c2ecf20Sopenharmony_ci pcpu_chunk_map_bits(chunk) - offset_bits, 13848c2ecf20Sopenharmony_ci offset_bits); 13858c2ecf20Sopenharmony_ci set_bit((start_offset + map_size) / PCPU_MIN_ALLOC_SIZE, 13868c2ecf20Sopenharmony_ci chunk->bound_map); 13878c2ecf20Sopenharmony_ci set_bit(region_bits, chunk->bound_map); 13888c2ecf20Sopenharmony_ci 13898c2ecf20Sopenharmony_ci pcpu_block_update_hint_alloc(chunk, pcpu_chunk_map_bits(chunk) 13908c2ecf20Sopenharmony_ci - offset_bits, offset_bits); 13918c2ecf20Sopenharmony_ci } 13928c2ecf20Sopenharmony_ci 13938c2ecf20Sopenharmony_ci return chunk; 13948c2ecf20Sopenharmony_ci} 13958c2ecf20Sopenharmony_ci 13968c2ecf20Sopenharmony_cistatic struct pcpu_chunk *pcpu_alloc_chunk(enum pcpu_chunk_type type, gfp_t gfp) 13978c2ecf20Sopenharmony_ci{ 13988c2ecf20Sopenharmony_ci struct pcpu_chunk *chunk; 13998c2ecf20Sopenharmony_ci int region_bits; 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_ci chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp); 14028c2ecf20Sopenharmony_ci if (!chunk) 14038c2ecf20Sopenharmony_ci return NULL; 14048c2ecf20Sopenharmony_ci 14058c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&chunk->list); 14068c2ecf20Sopenharmony_ci chunk->nr_pages = pcpu_unit_pages; 14078c2ecf20Sopenharmony_ci region_bits = pcpu_chunk_map_bits(chunk); 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * 14108c2ecf20Sopenharmony_ci sizeof(chunk->alloc_map[0]), gfp); 14118c2ecf20Sopenharmony_ci if (!chunk->alloc_map) 14128c2ecf20Sopenharmony_ci goto alloc_map_fail; 14138c2ecf20Sopenharmony_ci 14148c2ecf20Sopenharmony_ci chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * 14158c2ecf20Sopenharmony_ci sizeof(chunk->bound_map[0]), gfp); 14168c2ecf20Sopenharmony_ci if (!chunk->bound_map) 14178c2ecf20Sopenharmony_ci goto bound_map_fail; 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * 14208c2ecf20Sopenharmony_ci sizeof(chunk->md_blocks[0]), gfp); 14218c2ecf20Sopenharmony_ci if (!chunk->md_blocks) 14228c2ecf20Sopenharmony_ci goto md_blocks_fail; 14238c2ecf20Sopenharmony_ci 14248c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG_KMEM 14258c2ecf20Sopenharmony_ci if (pcpu_is_memcg_chunk(type)) { 14268c2ecf20Sopenharmony_ci chunk->obj_cgroups = 14278c2ecf20Sopenharmony_ci pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) * 14288c2ecf20Sopenharmony_ci sizeof(struct obj_cgroup *), gfp); 14298c2ecf20Sopenharmony_ci if (!chunk->obj_cgroups) 14308c2ecf20Sopenharmony_ci goto objcg_fail; 14318c2ecf20Sopenharmony_ci } 14328c2ecf20Sopenharmony_ci#endif 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci pcpu_init_md_blocks(chunk); 14358c2ecf20Sopenharmony_ci 14368c2ecf20Sopenharmony_ci /* init metadata */ 14378c2ecf20Sopenharmony_ci chunk->free_bytes = chunk->nr_pages * PAGE_SIZE; 14388c2ecf20Sopenharmony_ci 14398c2ecf20Sopenharmony_ci return chunk; 14408c2ecf20Sopenharmony_ci 14418c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG_KMEM 14428c2ecf20Sopenharmony_ciobjcg_fail: 14438c2ecf20Sopenharmony_ci pcpu_mem_free(chunk->md_blocks); 14448c2ecf20Sopenharmony_ci#endif 14458c2ecf20Sopenharmony_cimd_blocks_fail: 14468c2ecf20Sopenharmony_ci pcpu_mem_free(chunk->bound_map); 14478c2ecf20Sopenharmony_cibound_map_fail: 14488c2ecf20Sopenharmony_ci pcpu_mem_free(chunk->alloc_map); 14498c2ecf20Sopenharmony_cialloc_map_fail: 14508c2ecf20Sopenharmony_ci pcpu_mem_free(chunk); 14518c2ecf20Sopenharmony_ci 14528c2ecf20Sopenharmony_ci return NULL; 14538c2ecf20Sopenharmony_ci} 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_cistatic void pcpu_free_chunk(struct pcpu_chunk *chunk) 14568c2ecf20Sopenharmony_ci{ 14578c2ecf20Sopenharmony_ci if (!chunk) 14588c2ecf20Sopenharmony_ci return; 14598c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG_KMEM 14608c2ecf20Sopenharmony_ci pcpu_mem_free(chunk->obj_cgroups); 14618c2ecf20Sopenharmony_ci#endif 14628c2ecf20Sopenharmony_ci pcpu_mem_free(chunk->md_blocks); 14638c2ecf20Sopenharmony_ci pcpu_mem_free(chunk->bound_map); 14648c2ecf20Sopenharmony_ci pcpu_mem_free(chunk->alloc_map); 14658c2ecf20Sopenharmony_ci pcpu_mem_free(chunk); 14668c2ecf20Sopenharmony_ci} 14678c2ecf20Sopenharmony_ci 14688c2ecf20Sopenharmony_ci/** 14698c2ecf20Sopenharmony_ci * pcpu_chunk_populated - post-population bookkeeping 14708c2ecf20Sopenharmony_ci * @chunk: pcpu_chunk which got populated 14718c2ecf20Sopenharmony_ci * @page_start: the start page 14728c2ecf20Sopenharmony_ci * @page_end: the end page 14738c2ecf20Sopenharmony_ci * 14748c2ecf20Sopenharmony_ci * Pages in [@page_start,@page_end) have been populated to @chunk. Update 14758c2ecf20Sopenharmony_ci * the bookkeeping information accordingly. Must be called after each 14768c2ecf20Sopenharmony_ci * successful population. 14778c2ecf20Sopenharmony_ci * 14788c2ecf20Sopenharmony_ci * If this is @for_alloc, do not increment pcpu_nr_empty_pop_pages because it 14798c2ecf20Sopenharmony_ci * is to serve an allocation in that area. 14808c2ecf20Sopenharmony_ci */ 14818c2ecf20Sopenharmony_cistatic void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start, 14828c2ecf20Sopenharmony_ci int page_end) 14838c2ecf20Sopenharmony_ci{ 14848c2ecf20Sopenharmony_ci int nr = page_end - page_start; 14858c2ecf20Sopenharmony_ci 14868c2ecf20Sopenharmony_ci lockdep_assert_held(&pcpu_lock); 14878c2ecf20Sopenharmony_ci 14888c2ecf20Sopenharmony_ci bitmap_set(chunk->populated, page_start, nr); 14898c2ecf20Sopenharmony_ci chunk->nr_populated += nr; 14908c2ecf20Sopenharmony_ci pcpu_nr_populated += nr; 14918c2ecf20Sopenharmony_ci 14928c2ecf20Sopenharmony_ci pcpu_update_empty_pages(chunk, nr); 14938c2ecf20Sopenharmony_ci} 14948c2ecf20Sopenharmony_ci 14958c2ecf20Sopenharmony_ci/** 14968c2ecf20Sopenharmony_ci * pcpu_chunk_depopulated - post-depopulation bookkeeping 14978c2ecf20Sopenharmony_ci * @chunk: pcpu_chunk which got depopulated 14988c2ecf20Sopenharmony_ci * @page_start: the start page 14998c2ecf20Sopenharmony_ci * @page_end: the end page 15008c2ecf20Sopenharmony_ci * 15018c2ecf20Sopenharmony_ci * Pages in [@page_start,@page_end) have been depopulated from @chunk. 15028c2ecf20Sopenharmony_ci * Update the bookkeeping information accordingly. Must be called after 15038c2ecf20Sopenharmony_ci * each successful depopulation. 15048c2ecf20Sopenharmony_ci */ 15058c2ecf20Sopenharmony_cistatic void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, 15068c2ecf20Sopenharmony_ci int page_start, int page_end) 15078c2ecf20Sopenharmony_ci{ 15088c2ecf20Sopenharmony_ci int nr = page_end - page_start; 15098c2ecf20Sopenharmony_ci 15108c2ecf20Sopenharmony_ci lockdep_assert_held(&pcpu_lock); 15118c2ecf20Sopenharmony_ci 15128c2ecf20Sopenharmony_ci bitmap_clear(chunk->populated, page_start, nr); 15138c2ecf20Sopenharmony_ci chunk->nr_populated -= nr; 15148c2ecf20Sopenharmony_ci pcpu_nr_populated -= nr; 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci pcpu_update_empty_pages(chunk, -nr); 15178c2ecf20Sopenharmony_ci} 15188c2ecf20Sopenharmony_ci 15198c2ecf20Sopenharmony_ci/* 15208c2ecf20Sopenharmony_ci * Chunk management implementation. 15218c2ecf20Sopenharmony_ci * 15228c2ecf20Sopenharmony_ci * To allow different implementations, chunk alloc/free and 15238c2ecf20Sopenharmony_ci * [de]population are implemented in a separate file which is pulled 15248c2ecf20Sopenharmony_ci * into this file and compiled together. The following functions 15258c2ecf20Sopenharmony_ci * should be implemented. 15268c2ecf20Sopenharmony_ci * 15278c2ecf20Sopenharmony_ci * pcpu_populate_chunk - populate the specified range of a chunk 15288c2ecf20Sopenharmony_ci * pcpu_depopulate_chunk - depopulate the specified range of a chunk 15298c2ecf20Sopenharmony_ci * pcpu_create_chunk - create a new chunk 15308c2ecf20Sopenharmony_ci * pcpu_destroy_chunk - destroy a chunk, always preceded by full depop 15318c2ecf20Sopenharmony_ci * pcpu_addr_to_page - translate address to physical address 15328c2ecf20Sopenharmony_ci * pcpu_verify_alloc_info - check alloc_info is acceptable during init 15338c2ecf20Sopenharmony_ci */ 15348c2ecf20Sopenharmony_cistatic int pcpu_populate_chunk(struct pcpu_chunk *chunk, 15358c2ecf20Sopenharmony_ci int page_start, int page_end, gfp_t gfp); 15368c2ecf20Sopenharmony_cistatic void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, 15378c2ecf20Sopenharmony_ci int page_start, int page_end); 15388c2ecf20Sopenharmony_cistatic struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, 15398c2ecf20Sopenharmony_ci gfp_t gfp); 15408c2ecf20Sopenharmony_cistatic void pcpu_destroy_chunk(struct pcpu_chunk *chunk); 15418c2ecf20Sopenharmony_cistatic struct page *pcpu_addr_to_page(void *addr); 15428c2ecf20Sopenharmony_cistatic int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci#ifdef CONFIG_NEED_PER_CPU_KM 15458c2ecf20Sopenharmony_ci#include "percpu-km.c" 15468c2ecf20Sopenharmony_ci#else 15478c2ecf20Sopenharmony_ci#include "percpu-vm.c" 15488c2ecf20Sopenharmony_ci#endif 15498c2ecf20Sopenharmony_ci 15508c2ecf20Sopenharmony_ci/** 15518c2ecf20Sopenharmony_ci * pcpu_chunk_addr_search - determine chunk containing specified address 15528c2ecf20Sopenharmony_ci * @addr: address for which the chunk needs to be determined. 15538c2ecf20Sopenharmony_ci * 15548c2ecf20Sopenharmony_ci * This is an internal function that handles all but static allocations. 15558c2ecf20Sopenharmony_ci * Static percpu address values should never be passed into the allocator. 15568c2ecf20Sopenharmony_ci * 15578c2ecf20Sopenharmony_ci * RETURNS: 15588c2ecf20Sopenharmony_ci * The address of the found chunk. 15598c2ecf20Sopenharmony_ci */ 15608c2ecf20Sopenharmony_cistatic struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) 15618c2ecf20Sopenharmony_ci{ 15628c2ecf20Sopenharmony_ci /* is it in the dynamic region (first chunk)? */ 15638c2ecf20Sopenharmony_ci if (pcpu_addr_in_chunk(pcpu_first_chunk, addr)) 15648c2ecf20Sopenharmony_ci return pcpu_first_chunk; 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_ci /* is it in the reserved region? */ 15678c2ecf20Sopenharmony_ci if (pcpu_addr_in_chunk(pcpu_reserved_chunk, addr)) 15688c2ecf20Sopenharmony_ci return pcpu_reserved_chunk; 15698c2ecf20Sopenharmony_ci 15708c2ecf20Sopenharmony_ci /* 15718c2ecf20Sopenharmony_ci * The address is relative to unit0 which might be unused and 15728c2ecf20Sopenharmony_ci * thus unmapped. Offset the address to the unit space of the 15738c2ecf20Sopenharmony_ci * current processor before looking it up in the vmalloc 15748c2ecf20Sopenharmony_ci * space. Note that any possible cpu id can be used here, so 15758c2ecf20Sopenharmony_ci * there's no need to worry about preemption or cpu hotplug. 15768c2ecf20Sopenharmony_ci */ 15778c2ecf20Sopenharmony_ci addr += pcpu_unit_offsets[raw_smp_processor_id()]; 15788c2ecf20Sopenharmony_ci return pcpu_get_page_chunk(pcpu_addr_to_page(addr)); 15798c2ecf20Sopenharmony_ci} 15808c2ecf20Sopenharmony_ci 15818c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG_KMEM 15828c2ecf20Sopenharmony_cistatic enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, 15838c2ecf20Sopenharmony_ci struct obj_cgroup **objcgp) 15848c2ecf20Sopenharmony_ci{ 15858c2ecf20Sopenharmony_ci struct obj_cgroup *objcg; 15868c2ecf20Sopenharmony_ci 15878c2ecf20Sopenharmony_ci if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT)) 15888c2ecf20Sopenharmony_ci return PCPU_CHUNK_ROOT; 15898c2ecf20Sopenharmony_ci 15908c2ecf20Sopenharmony_ci objcg = get_obj_cgroup_from_current(); 15918c2ecf20Sopenharmony_ci if (!objcg) 15928c2ecf20Sopenharmony_ci return PCPU_CHUNK_ROOT; 15938c2ecf20Sopenharmony_ci 15948c2ecf20Sopenharmony_ci if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) { 15958c2ecf20Sopenharmony_ci obj_cgroup_put(objcg); 15968c2ecf20Sopenharmony_ci return PCPU_FAIL_ALLOC; 15978c2ecf20Sopenharmony_ci } 15988c2ecf20Sopenharmony_ci 15998c2ecf20Sopenharmony_ci *objcgp = objcg; 16008c2ecf20Sopenharmony_ci return PCPU_CHUNK_MEMCG; 16018c2ecf20Sopenharmony_ci} 16028c2ecf20Sopenharmony_ci 16038c2ecf20Sopenharmony_cistatic void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, 16048c2ecf20Sopenharmony_ci struct pcpu_chunk *chunk, int off, 16058c2ecf20Sopenharmony_ci size_t size) 16068c2ecf20Sopenharmony_ci{ 16078c2ecf20Sopenharmony_ci if (!objcg) 16088c2ecf20Sopenharmony_ci return; 16098c2ecf20Sopenharmony_ci 16108c2ecf20Sopenharmony_ci if (chunk) { 16118c2ecf20Sopenharmony_ci chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg; 16128c2ecf20Sopenharmony_ci 16138c2ecf20Sopenharmony_ci rcu_read_lock(); 16148c2ecf20Sopenharmony_ci mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, 16158c2ecf20Sopenharmony_ci size * num_possible_cpus()); 16168c2ecf20Sopenharmony_ci rcu_read_unlock(); 16178c2ecf20Sopenharmony_ci } else { 16188c2ecf20Sopenharmony_ci obj_cgroup_uncharge(objcg, size * num_possible_cpus()); 16198c2ecf20Sopenharmony_ci obj_cgroup_put(objcg); 16208c2ecf20Sopenharmony_ci } 16218c2ecf20Sopenharmony_ci} 16228c2ecf20Sopenharmony_ci 16238c2ecf20Sopenharmony_cistatic void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) 16248c2ecf20Sopenharmony_ci{ 16258c2ecf20Sopenharmony_ci struct obj_cgroup *objcg; 16268c2ecf20Sopenharmony_ci 16278c2ecf20Sopenharmony_ci if (!pcpu_is_memcg_chunk(pcpu_chunk_type(chunk))) 16288c2ecf20Sopenharmony_ci return; 16298c2ecf20Sopenharmony_ci 16308c2ecf20Sopenharmony_ci objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT]; 16318c2ecf20Sopenharmony_ci chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL; 16328c2ecf20Sopenharmony_ci 16338c2ecf20Sopenharmony_ci obj_cgroup_uncharge(objcg, size * num_possible_cpus()); 16348c2ecf20Sopenharmony_ci 16358c2ecf20Sopenharmony_ci rcu_read_lock(); 16368c2ecf20Sopenharmony_ci mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, 16378c2ecf20Sopenharmony_ci -(size * num_possible_cpus())); 16388c2ecf20Sopenharmony_ci rcu_read_unlock(); 16398c2ecf20Sopenharmony_ci 16408c2ecf20Sopenharmony_ci obj_cgroup_put(objcg); 16418c2ecf20Sopenharmony_ci} 16428c2ecf20Sopenharmony_ci 16438c2ecf20Sopenharmony_ci#else /* CONFIG_MEMCG_KMEM */ 16448c2ecf20Sopenharmony_cistatic enum pcpu_chunk_type 16458c2ecf20Sopenharmony_cipcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp) 16468c2ecf20Sopenharmony_ci{ 16478c2ecf20Sopenharmony_ci return PCPU_CHUNK_ROOT; 16488c2ecf20Sopenharmony_ci} 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_cistatic void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, 16518c2ecf20Sopenharmony_ci struct pcpu_chunk *chunk, int off, 16528c2ecf20Sopenharmony_ci size_t size) 16538c2ecf20Sopenharmony_ci{ 16548c2ecf20Sopenharmony_ci} 16558c2ecf20Sopenharmony_ci 16568c2ecf20Sopenharmony_cistatic void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) 16578c2ecf20Sopenharmony_ci{ 16588c2ecf20Sopenharmony_ci} 16598c2ecf20Sopenharmony_ci#endif /* CONFIG_MEMCG_KMEM */ 16608c2ecf20Sopenharmony_ci 16618c2ecf20Sopenharmony_ci/** 16628c2ecf20Sopenharmony_ci * pcpu_alloc - the percpu allocator 16638c2ecf20Sopenharmony_ci * @size: size of area to allocate in bytes 16648c2ecf20Sopenharmony_ci * @align: alignment of area (max PAGE_SIZE) 16658c2ecf20Sopenharmony_ci * @reserved: allocate from the reserved chunk if available 16668c2ecf20Sopenharmony_ci * @gfp: allocation flags 16678c2ecf20Sopenharmony_ci * 16688c2ecf20Sopenharmony_ci * Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't 16698c2ecf20Sopenharmony_ci * contain %GFP_KERNEL, the allocation is atomic. If @gfp has __GFP_NOWARN 16708c2ecf20Sopenharmony_ci * then no warning will be triggered on invalid or failed allocation 16718c2ecf20Sopenharmony_ci * requests. 16728c2ecf20Sopenharmony_ci * 16738c2ecf20Sopenharmony_ci * RETURNS: 16748c2ecf20Sopenharmony_ci * Percpu pointer to the allocated area on success, NULL on failure. 16758c2ecf20Sopenharmony_ci */ 16768c2ecf20Sopenharmony_cistatic void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, 16778c2ecf20Sopenharmony_ci gfp_t gfp) 16788c2ecf20Sopenharmony_ci{ 16798c2ecf20Sopenharmony_ci gfp_t pcpu_gfp; 16808c2ecf20Sopenharmony_ci bool is_atomic; 16818c2ecf20Sopenharmony_ci bool do_warn; 16828c2ecf20Sopenharmony_ci enum pcpu_chunk_type type; 16838c2ecf20Sopenharmony_ci struct list_head *pcpu_slot; 16848c2ecf20Sopenharmony_ci struct obj_cgroup *objcg = NULL; 16858c2ecf20Sopenharmony_ci static int warn_limit = 10; 16868c2ecf20Sopenharmony_ci struct pcpu_chunk *chunk, *next; 16878c2ecf20Sopenharmony_ci const char *err; 16888c2ecf20Sopenharmony_ci int slot, off, cpu, ret; 16898c2ecf20Sopenharmony_ci unsigned long flags; 16908c2ecf20Sopenharmony_ci void __percpu *ptr; 16918c2ecf20Sopenharmony_ci size_t bits, bit_align; 16928c2ecf20Sopenharmony_ci 16938c2ecf20Sopenharmony_ci gfp = current_gfp_context(gfp); 16948c2ecf20Sopenharmony_ci /* whitelisted flags that can be passed to the backing allocators */ 16958c2ecf20Sopenharmony_ci pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); 16968c2ecf20Sopenharmony_ci is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; 16978c2ecf20Sopenharmony_ci do_warn = !(gfp & __GFP_NOWARN); 16988c2ecf20Sopenharmony_ci 16998c2ecf20Sopenharmony_ci /* 17008c2ecf20Sopenharmony_ci * There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE, 17018c2ecf20Sopenharmony_ci * therefore alignment must be a minimum of that many bytes. 17028c2ecf20Sopenharmony_ci * An allocation may have internal fragmentation from rounding up 17038c2ecf20Sopenharmony_ci * of up to PCPU_MIN_ALLOC_SIZE - 1 bytes. 17048c2ecf20Sopenharmony_ci */ 17058c2ecf20Sopenharmony_ci if (unlikely(align < PCPU_MIN_ALLOC_SIZE)) 17068c2ecf20Sopenharmony_ci align = PCPU_MIN_ALLOC_SIZE; 17078c2ecf20Sopenharmony_ci 17088c2ecf20Sopenharmony_ci size = ALIGN(size, PCPU_MIN_ALLOC_SIZE); 17098c2ecf20Sopenharmony_ci bits = size >> PCPU_MIN_ALLOC_SHIFT; 17108c2ecf20Sopenharmony_ci bit_align = align >> PCPU_MIN_ALLOC_SHIFT; 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_ci if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE || 17138c2ecf20Sopenharmony_ci !is_power_of_2(align))) { 17148c2ecf20Sopenharmony_ci WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n", 17158c2ecf20Sopenharmony_ci size, align); 17168c2ecf20Sopenharmony_ci return NULL; 17178c2ecf20Sopenharmony_ci } 17188c2ecf20Sopenharmony_ci 17198c2ecf20Sopenharmony_ci type = pcpu_memcg_pre_alloc_hook(size, gfp, &objcg); 17208c2ecf20Sopenharmony_ci if (unlikely(type == PCPU_FAIL_ALLOC)) 17218c2ecf20Sopenharmony_ci return NULL; 17228c2ecf20Sopenharmony_ci pcpu_slot = pcpu_chunk_list(type); 17238c2ecf20Sopenharmony_ci 17248c2ecf20Sopenharmony_ci if (!is_atomic) { 17258c2ecf20Sopenharmony_ci /* 17268c2ecf20Sopenharmony_ci * pcpu_balance_workfn() allocates memory under this mutex, 17278c2ecf20Sopenharmony_ci * and it may wait for memory reclaim. Allow current task 17288c2ecf20Sopenharmony_ci * to become OOM victim, in case of memory pressure. 17298c2ecf20Sopenharmony_ci */ 17308c2ecf20Sopenharmony_ci if (gfp & __GFP_NOFAIL) { 17318c2ecf20Sopenharmony_ci mutex_lock(&pcpu_alloc_mutex); 17328c2ecf20Sopenharmony_ci } else if (mutex_lock_killable(&pcpu_alloc_mutex)) { 17338c2ecf20Sopenharmony_ci pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); 17348c2ecf20Sopenharmony_ci return NULL; 17358c2ecf20Sopenharmony_ci } 17368c2ecf20Sopenharmony_ci } 17378c2ecf20Sopenharmony_ci 17388c2ecf20Sopenharmony_ci spin_lock_irqsave(&pcpu_lock, flags); 17398c2ecf20Sopenharmony_ci 17408c2ecf20Sopenharmony_ci /* serve reserved allocations from the reserved chunk if available */ 17418c2ecf20Sopenharmony_ci if (reserved && pcpu_reserved_chunk) { 17428c2ecf20Sopenharmony_ci chunk = pcpu_reserved_chunk; 17438c2ecf20Sopenharmony_ci 17448c2ecf20Sopenharmony_ci off = pcpu_find_block_fit(chunk, bits, bit_align, is_atomic); 17458c2ecf20Sopenharmony_ci if (off < 0) { 17468c2ecf20Sopenharmony_ci err = "alloc from reserved chunk failed"; 17478c2ecf20Sopenharmony_ci goto fail_unlock; 17488c2ecf20Sopenharmony_ci } 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci off = pcpu_alloc_area(chunk, bits, bit_align, off); 17518c2ecf20Sopenharmony_ci if (off >= 0) 17528c2ecf20Sopenharmony_ci goto area_found; 17538c2ecf20Sopenharmony_ci 17548c2ecf20Sopenharmony_ci err = "alloc from reserved chunk failed"; 17558c2ecf20Sopenharmony_ci goto fail_unlock; 17568c2ecf20Sopenharmony_ci } 17578c2ecf20Sopenharmony_ci 17588c2ecf20Sopenharmony_cirestart: 17598c2ecf20Sopenharmony_ci /* search through normal chunks */ 17608c2ecf20Sopenharmony_ci for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { 17618c2ecf20Sopenharmony_ci list_for_each_entry_safe(chunk, next, &pcpu_slot[slot], list) { 17628c2ecf20Sopenharmony_ci off = pcpu_find_block_fit(chunk, bits, bit_align, 17638c2ecf20Sopenharmony_ci is_atomic); 17648c2ecf20Sopenharmony_ci if (off < 0) { 17658c2ecf20Sopenharmony_ci if (slot < PCPU_SLOT_FAIL_THRESHOLD) 17668c2ecf20Sopenharmony_ci pcpu_chunk_move(chunk, 0); 17678c2ecf20Sopenharmony_ci continue; 17688c2ecf20Sopenharmony_ci } 17698c2ecf20Sopenharmony_ci 17708c2ecf20Sopenharmony_ci off = pcpu_alloc_area(chunk, bits, bit_align, off); 17718c2ecf20Sopenharmony_ci if (off >= 0) 17728c2ecf20Sopenharmony_ci goto area_found; 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_ci } 17758c2ecf20Sopenharmony_ci } 17768c2ecf20Sopenharmony_ci 17778c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pcpu_lock, flags); 17788c2ecf20Sopenharmony_ci 17798c2ecf20Sopenharmony_ci /* 17808c2ecf20Sopenharmony_ci * No space left. Create a new chunk. We don't want multiple 17818c2ecf20Sopenharmony_ci * tasks to create chunks simultaneously. Serialize and create iff 17828c2ecf20Sopenharmony_ci * there's still no empty chunk after grabbing the mutex. 17838c2ecf20Sopenharmony_ci */ 17848c2ecf20Sopenharmony_ci if (is_atomic) { 17858c2ecf20Sopenharmony_ci err = "atomic alloc failed, no space left"; 17868c2ecf20Sopenharmony_ci goto fail; 17878c2ecf20Sopenharmony_ci } 17888c2ecf20Sopenharmony_ci 17898c2ecf20Sopenharmony_ci if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { 17908c2ecf20Sopenharmony_ci chunk = pcpu_create_chunk(type, pcpu_gfp); 17918c2ecf20Sopenharmony_ci if (!chunk) { 17928c2ecf20Sopenharmony_ci err = "failed to allocate new chunk"; 17938c2ecf20Sopenharmony_ci goto fail; 17948c2ecf20Sopenharmony_ci } 17958c2ecf20Sopenharmony_ci 17968c2ecf20Sopenharmony_ci spin_lock_irqsave(&pcpu_lock, flags); 17978c2ecf20Sopenharmony_ci pcpu_chunk_relocate(chunk, -1); 17988c2ecf20Sopenharmony_ci } else { 17998c2ecf20Sopenharmony_ci spin_lock_irqsave(&pcpu_lock, flags); 18008c2ecf20Sopenharmony_ci } 18018c2ecf20Sopenharmony_ci 18028c2ecf20Sopenharmony_ci goto restart; 18038c2ecf20Sopenharmony_ci 18048c2ecf20Sopenharmony_ciarea_found: 18058c2ecf20Sopenharmony_ci pcpu_stats_area_alloc(chunk, size); 18068c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pcpu_lock, flags); 18078c2ecf20Sopenharmony_ci 18088c2ecf20Sopenharmony_ci /* populate if not all pages are already there */ 18098c2ecf20Sopenharmony_ci if (!is_atomic) { 18108c2ecf20Sopenharmony_ci unsigned int page_start, page_end, rs, re; 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci page_start = PFN_DOWN(off); 18138c2ecf20Sopenharmony_ci page_end = PFN_UP(off + size); 18148c2ecf20Sopenharmony_ci 18158c2ecf20Sopenharmony_ci bitmap_for_each_clear_region(chunk->populated, rs, re, 18168c2ecf20Sopenharmony_ci page_start, page_end) { 18178c2ecf20Sopenharmony_ci WARN_ON(chunk->immutable); 18188c2ecf20Sopenharmony_ci 18198c2ecf20Sopenharmony_ci ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); 18208c2ecf20Sopenharmony_ci 18218c2ecf20Sopenharmony_ci spin_lock_irqsave(&pcpu_lock, flags); 18228c2ecf20Sopenharmony_ci if (ret) { 18238c2ecf20Sopenharmony_ci pcpu_free_area(chunk, off); 18248c2ecf20Sopenharmony_ci err = "failed to populate"; 18258c2ecf20Sopenharmony_ci goto fail_unlock; 18268c2ecf20Sopenharmony_ci } 18278c2ecf20Sopenharmony_ci pcpu_chunk_populated(chunk, rs, re); 18288c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pcpu_lock, flags); 18298c2ecf20Sopenharmony_ci } 18308c2ecf20Sopenharmony_ci 18318c2ecf20Sopenharmony_ci mutex_unlock(&pcpu_alloc_mutex); 18328c2ecf20Sopenharmony_ci } 18338c2ecf20Sopenharmony_ci 18348c2ecf20Sopenharmony_ci if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_LOW) 18358c2ecf20Sopenharmony_ci pcpu_schedule_balance_work(); 18368c2ecf20Sopenharmony_ci 18378c2ecf20Sopenharmony_ci /* clear the areas and return address relative to base address */ 18388c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 18398c2ecf20Sopenharmony_ci memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); 18408c2ecf20Sopenharmony_ci 18418c2ecf20Sopenharmony_ci ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); 18428c2ecf20Sopenharmony_ci kmemleak_alloc_percpu(ptr, size, gfp); 18438c2ecf20Sopenharmony_ci 18448c2ecf20Sopenharmony_ci trace_percpu_alloc_percpu(reserved, is_atomic, size, align, 18458c2ecf20Sopenharmony_ci chunk->base_addr, off, ptr); 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_ci pcpu_memcg_post_alloc_hook(objcg, chunk, off, size); 18488c2ecf20Sopenharmony_ci 18498c2ecf20Sopenharmony_ci return ptr; 18508c2ecf20Sopenharmony_ci 18518c2ecf20Sopenharmony_cifail_unlock: 18528c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pcpu_lock, flags); 18538c2ecf20Sopenharmony_cifail: 18548c2ecf20Sopenharmony_ci trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); 18558c2ecf20Sopenharmony_ci 18568c2ecf20Sopenharmony_ci if (!is_atomic && do_warn && warn_limit) { 18578c2ecf20Sopenharmony_ci pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", 18588c2ecf20Sopenharmony_ci size, align, is_atomic, err); 18598c2ecf20Sopenharmony_ci dump_stack(); 18608c2ecf20Sopenharmony_ci if (!--warn_limit) 18618c2ecf20Sopenharmony_ci pr_info("limit reached, disable warning\n"); 18628c2ecf20Sopenharmony_ci } 18638c2ecf20Sopenharmony_ci if (is_atomic) { 18648c2ecf20Sopenharmony_ci /* see the flag handling in pcpu_blance_workfn() */ 18658c2ecf20Sopenharmony_ci pcpu_atomic_alloc_failed = true; 18668c2ecf20Sopenharmony_ci pcpu_schedule_balance_work(); 18678c2ecf20Sopenharmony_ci } else { 18688c2ecf20Sopenharmony_ci mutex_unlock(&pcpu_alloc_mutex); 18698c2ecf20Sopenharmony_ci } 18708c2ecf20Sopenharmony_ci 18718c2ecf20Sopenharmony_ci pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); 18728c2ecf20Sopenharmony_ci 18738c2ecf20Sopenharmony_ci return NULL; 18748c2ecf20Sopenharmony_ci} 18758c2ecf20Sopenharmony_ci 18768c2ecf20Sopenharmony_ci/** 18778c2ecf20Sopenharmony_ci * __alloc_percpu_gfp - allocate dynamic percpu area 18788c2ecf20Sopenharmony_ci * @size: size of area to allocate in bytes 18798c2ecf20Sopenharmony_ci * @align: alignment of area (max PAGE_SIZE) 18808c2ecf20Sopenharmony_ci * @gfp: allocation flags 18818c2ecf20Sopenharmony_ci * 18828c2ecf20Sopenharmony_ci * Allocate zero-filled percpu area of @size bytes aligned at @align. If 18838c2ecf20Sopenharmony_ci * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can 18848c2ecf20Sopenharmony_ci * be called from any context but is a lot more likely to fail. If @gfp 18858c2ecf20Sopenharmony_ci * has __GFP_NOWARN then no warning will be triggered on invalid or failed 18868c2ecf20Sopenharmony_ci * allocation requests. 18878c2ecf20Sopenharmony_ci * 18888c2ecf20Sopenharmony_ci * RETURNS: 18898c2ecf20Sopenharmony_ci * Percpu pointer to the allocated area on success, NULL on failure. 18908c2ecf20Sopenharmony_ci */ 18918c2ecf20Sopenharmony_civoid __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) 18928c2ecf20Sopenharmony_ci{ 18938c2ecf20Sopenharmony_ci return pcpu_alloc(size, align, false, gfp); 18948c2ecf20Sopenharmony_ci} 18958c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__alloc_percpu_gfp); 18968c2ecf20Sopenharmony_ci 18978c2ecf20Sopenharmony_ci/** 18988c2ecf20Sopenharmony_ci * __alloc_percpu - allocate dynamic percpu area 18998c2ecf20Sopenharmony_ci * @size: size of area to allocate in bytes 19008c2ecf20Sopenharmony_ci * @align: alignment of area (max PAGE_SIZE) 19018c2ecf20Sopenharmony_ci * 19028c2ecf20Sopenharmony_ci * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL). 19038c2ecf20Sopenharmony_ci */ 19048c2ecf20Sopenharmony_civoid __percpu *__alloc_percpu(size_t size, size_t align) 19058c2ecf20Sopenharmony_ci{ 19068c2ecf20Sopenharmony_ci return pcpu_alloc(size, align, false, GFP_KERNEL); 19078c2ecf20Sopenharmony_ci} 19088c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__alloc_percpu); 19098c2ecf20Sopenharmony_ci 19108c2ecf20Sopenharmony_ci/** 19118c2ecf20Sopenharmony_ci * __alloc_reserved_percpu - allocate reserved percpu area 19128c2ecf20Sopenharmony_ci * @size: size of area to allocate in bytes 19138c2ecf20Sopenharmony_ci * @align: alignment of area (max PAGE_SIZE) 19148c2ecf20Sopenharmony_ci * 19158c2ecf20Sopenharmony_ci * Allocate zero-filled percpu area of @size bytes aligned at @align 19168c2ecf20Sopenharmony_ci * from reserved percpu area if arch has set it up; otherwise, 19178c2ecf20Sopenharmony_ci * allocation is served from the same dynamic area. Might sleep. 19188c2ecf20Sopenharmony_ci * Might trigger writeouts. 19198c2ecf20Sopenharmony_ci * 19208c2ecf20Sopenharmony_ci * CONTEXT: 19218c2ecf20Sopenharmony_ci * Does GFP_KERNEL allocation. 19228c2ecf20Sopenharmony_ci * 19238c2ecf20Sopenharmony_ci * RETURNS: 19248c2ecf20Sopenharmony_ci * Percpu pointer to the allocated area on success, NULL on failure. 19258c2ecf20Sopenharmony_ci */ 19268c2ecf20Sopenharmony_civoid __percpu *__alloc_reserved_percpu(size_t size, size_t align) 19278c2ecf20Sopenharmony_ci{ 19288c2ecf20Sopenharmony_ci return pcpu_alloc(size, align, true, GFP_KERNEL); 19298c2ecf20Sopenharmony_ci} 19308c2ecf20Sopenharmony_ci 19318c2ecf20Sopenharmony_ci/** 19328c2ecf20Sopenharmony_ci * __pcpu_balance_workfn - manage the amount of free chunks and populated pages 19338c2ecf20Sopenharmony_ci * @type: chunk type 19348c2ecf20Sopenharmony_ci * 19358c2ecf20Sopenharmony_ci * Reclaim all fully free chunks except for the first one. This is also 19368c2ecf20Sopenharmony_ci * responsible for maintaining the pool of empty populated pages. However, 19378c2ecf20Sopenharmony_ci * it is possible that this is called when physical memory is scarce causing 19388c2ecf20Sopenharmony_ci * OOM killer to be triggered. We should avoid doing so until an actual 19398c2ecf20Sopenharmony_ci * allocation causes the failure as it is possible that requests can be 19408c2ecf20Sopenharmony_ci * serviced from already backed regions. 19418c2ecf20Sopenharmony_ci */ 19428c2ecf20Sopenharmony_cistatic void __pcpu_balance_workfn(enum pcpu_chunk_type type) 19438c2ecf20Sopenharmony_ci{ 19448c2ecf20Sopenharmony_ci /* gfp flags passed to underlying allocators */ 19458c2ecf20Sopenharmony_ci const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; 19468c2ecf20Sopenharmony_ci LIST_HEAD(to_free); 19478c2ecf20Sopenharmony_ci struct list_head *pcpu_slot = pcpu_chunk_list(type); 19488c2ecf20Sopenharmony_ci struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; 19498c2ecf20Sopenharmony_ci struct pcpu_chunk *chunk, *next; 19508c2ecf20Sopenharmony_ci int slot, nr_to_pop, ret; 19518c2ecf20Sopenharmony_ci 19528c2ecf20Sopenharmony_ci /* 19538c2ecf20Sopenharmony_ci * There's no reason to keep around multiple unused chunks and VM 19548c2ecf20Sopenharmony_ci * areas can be scarce. Destroy all free chunks except for one. 19558c2ecf20Sopenharmony_ci */ 19568c2ecf20Sopenharmony_ci mutex_lock(&pcpu_alloc_mutex); 19578c2ecf20Sopenharmony_ci spin_lock_irq(&pcpu_lock); 19588c2ecf20Sopenharmony_ci 19598c2ecf20Sopenharmony_ci list_for_each_entry_safe(chunk, next, free_head, list) { 19608c2ecf20Sopenharmony_ci WARN_ON(chunk->immutable); 19618c2ecf20Sopenharmony_ci 19628c2ecf20Sopenharmony_ci /* spare the first one */ 19638c2ecf20Sopenharmony_ci if (chunk == list_first_entry(free_head, struct pcpu_chunk, list)) 19648c2ecf20Sopenharmony_ci continue; 19658c2ecf20Sopenharmony_ci 19668c2ecf20Sopenharmony_ci list_move(&chunk->list, &to_free); 19678c2ecf20Sopenharmony_ci } 19688c2ecf20Sopenharmony_ci 19698c2ecf20Sopenharmony_ci spin_unlock_irq(&pcpu_lock); 19708c2ecf20Sopenharmony_ci 19718c2ecf20Sopenharmony_ci list_for_each_entry_safe(chunk, next, &to_free, list) { 19728c2ecf20Sopenharmony_ci unsigned int rs, re; 19738c2ecf20Sopenharmony_ci 19748c2ecf20Sopenharmony_ci bitmap_for_each_set_region(chunk->populated, rs, re, 0, 19758c2ecf20Sopenharmony_ci chunk->nr_pages) { 19768c2ecf20Sopenharmony_ci pcpu_depopulate_chunk(chunk, rs, re); 19778c2ecf20Sopenharmony_ci spin_lock_irq(&pcpu_lock); 19788c2ecf20Sopenharmony_ci pcpu_chunk_depopulated(chunk, rs, re); 19798c2ecf20Sopenharmony_ci spin_unlock_irq(&pcpu_lock); 19808c2ecf20Sopenharmony_ci } 19818c2ecf20Sopenharmony_ci pcpu_destroy_chunk(chunk); 19828c2ecf20Sopenharmony_ci cond_resched(); 19838c2ecf20Sopenharmony_ci } 19848c2ecf20Sopenharmony_ci 19858c2ecf20Sopenharmony_ci /* 19868c2ecf20Sopenharmony_ci * Ensure there are certain number of free populated pages for 19878c2ecf20Sopenharmony_ci * atomic allocs. Fill up from the most packed so that atomic 19888c2ecf20Sopenharmony_ci * allocs don't increase fragmentation. If atomic allocation 19898c2ecf20Sopenharmony_ci * failed previously, always populate the maximum amount. This 19908c2ecf20Sopenharmony_ci * should prevent atomic allocs larger than PAGE_SIZE from keeping 19918c2ecf20Sopenharmony_ci * failing indefinitely; however, large atomic allocs are not 19928c2ecf20Sopenharmony_ci * something we support properly and can be highly unreliable and 19938c2ecf20Sopenharmony_ci * inefficient. 19948c2ecf20Sopenharmony_ci */ 19958c2ecf20Sopenharmony_ciretry_pop: 19968c2ecf20Sopenharmony_ci if (pcpu_atomic_alloc_failed) { 19978c2ecf20Sopenharmony_ci nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH; 19988c2ecf20Sopenharmony_ci /* best effort anyway, don't worry about synchronization */ 19998c2ecf20Sopenharmony_ci pcpu_atomic_alloc_failed = false; 20008c2ecf20Sopenharmony_ci } else { 20018c2ecf20Sopenharmony_ci nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - 20028c2ecf20Sopenharmony_ci pcpu_nr_empty_pop_pages[type], 20038c2ecf20Sopenharmony_ci 0, PCPU_EMPTY_POP_PAGES_HIGH); 20048c2ecf20Sopenharmony_ci } 20058c2ecf20Sopenharmony_ci 20068c2ecf20Sopenharmony_ci for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) { 20078c2ecf20Sopenharmony_ci unsigned int nr_unpop = 0, rs, re; 20088c2ecf20Sopenharmony_ci 20098c2ecf20Sopenharmony_ci if (!nr_to_pop) 20108c2ecf20Sopenharmony_ci break; 20118c2ecf20Sopenharmony_ci 20128c2ecf20Sopenharmony_ci spin_lock_irq(&pcpu_lock); 20138c2ecf20Sopenharmony_ci list_for_each_entry(chunk, &pcpu_slot[slot], list) { 20148c2ecf20Sopenharmony_ci nr_unpop = chunk->nr_pages - chunk->nr_populated; 20158c2ecf20Sopenharmony_ci if (nr_unpop) 20168c2ecf20Sopenharmony_ci break; 20178c2ecf20Sopenharmony_ci } 20188c2ecf20Sopenharmony_ci spin_unlock_irq(&pcpu_lock); 20198c2ecf20Sopenharmony_ci 20208c2ecf20Sopenharmony_ci if (!nr_unpop) 20218c2ecf20Sopenharmony_ci continue; 20228c2ecf20Sopenharmony_ci 20238c2ecf20Sopenharmony_ci /* @chunk can't go away while pcpu_alloc_mutex is held */ 20248c2ecf20Sopenharmony_ci bitmap_for_each_clear_region(chunk->populated, rs, re, 0, 20258c2ecf20Sopenharmony_ci chunk->nr_pages) { 20268c2ecf20Sopenharmony_ci int nr = min_t(int, re - rs, nr_to_pop); 20278c2ecf20Sopenharmony_ci 20288c2ecf20Sopenharmony_ci ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp); 20298c2ecf20Sopenharmony_ci if (!ret) { 20308c2ecf20Sopenharmony_ci nr_to_pop -= nr; 20318c2ecf20Sopenharmony_ci spin_lock_irq(&pcpu_lock); 20328c2ecf20Sopenharmony_ci pcpu_chunk_populated(chunk, rs, rs + nr); 20338c2ecf20Sopenharmony_ci spin_unlock_irq(&pcpu_lock); 20348c2ecf20Sopenharmony_ci } else { 20358c2ecf20Sopenharmony_ci nr_to_pop = 0; 20368c2ecf20Sopenharmony_ci } 20378c2ecf20Sopenharmony_ci 20388c2ecf20Sopenharmony_ci if (!nr_to_pop) 20398c2ecf20Sopenharmony_ci break; 20408c2ecf20Sopenharmony_ci } 20418c2ecf20Sopenharmony_ci } 20428c2ecf20Sopenharmony_ci 20438c2ecf20Sopenharmony_ci if (nr_to_pop) { 20448c2ecf20Sopenharmony_ci /* ran out of chunks to populate, create a new one and retry */ 20458c2ecf20Sopenharmony_ci chunk = pcpu_create_chunk(type, gfp); 20468c2ecf20Sopenharmony_ci if (chunk) { 20478c2ecf20Sopenharmony_ci spin_lock_irq(&pcpu_lock); 20488c2ecf20Sopenharmony_ci pcpu_chunk_relocate(chunk, -1); 20498c2ecf20Sopenharmony_ci spin_unlock_irq(&pcpu_lock); 20508c2ecf20Sopenharmony_ci goto retry_pop; 20518c2ecf20Sopenharmony_ci } 20528c2ecf20Sopenharmony_ci } 20538c2ecf20Sopenharmony_ci 20548c2ecf20Sopenharmony_ci mutex_unlock(&pcpu_alloc_mutex); 20558c2ecf20Sopenharmony_ci} 20568c2ecf20Sopenharmony_ci 20578c2ecf20Sopenharmony_ci/** 20588c2ecf20Sopenharmony_ci * pcpu_balance_workfn - manage the amount of free chunks and populated pages 20598c2ecf20Sopenharmony_ci * @work: unused 20608c2ecf20Sopenharmony_ci * 20618c2ecf20Sopenharmony_ci * Call __pcpu_balance_workfn() for each chunk type. 20628c2ecf20Sopenharmony_ci */ 20638c2ecf20Sopenharmony_cistatic void pcpu_balance_workfn(struct work_struct *work) 20648c2ecf20Sopenharmony_ci{ 20658c2ecf20Sopenharmony_ci enum pcpu_chunk_type type; 20668c2ecf20Sopenharmony_ci 20678c2ecf20Sopenharmony_ci for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) 20688c2ecf20Sopenharmony_ci __pcpu_balance_workfn(type); 20698c2ecf20Sopenharmony_ci} 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci/** 20728c2ecf20Sopenharmony_ci * free_percpu - free percpu area 20738c2ecf20Sopenharmony_ci * @ptr: pointer to area to free 20748c2ecf20Sopenharmony_ci * 20758c2ecf20Sopenharmony_ci * Free percpu area @ptr. 20768c2ecf20Sopenharmony_ci * 20778c2ecf20Sopenharmony_ci * CONTEXT: 20788c2ecf20Sopenharmony_ci * Can be called from atomic context. 20798c2ecf20Sopenharmony_ci */ 20808c2ecf20Sopenharmony_civoid free_percpu(void __percpu *ptr) 20818c2ecf20Sopenharmony_ci{ 20828c2ecf20Sopenharmony_ci void *addr; 20838c2ecf20Sopenharmony_ci struct pcpu_chunk *chunk; 20848c2ecf20Sopenharmony_ci unsigned long flags; 20858c2ecf20Sopenharmony_ci int size, off; 20868c2ecf20Sopenharmony_ci bool need_balance = false; 20878c2ecf20Sopenharmony_ci struct list_head *pcpu_slot; 20888c2ecf20Sopenharmony_ci 20898c2ecf20Sopenharmony_ci if (!ptr) 20908c2ecf20Sopenharmony_ci return; 20918c2ecf20Sopenharmony_ci 20928c2ecf20Sopenharmony_ci kmemleak_free_percpu(ptr); 20938c2ecf20Sopenharmony_ci 20948c2ecf20Sopenharmony_ci addr = __pcpu_ptr_to_addr(ptr); 20958c2ecf20Sopenharmony_ci 20968c2ecf20Sopenharmony_ci spin_lock_irqsave(&pcpu_lock, flags); 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_ci chunk = pcpu_chunk_addr_search(addr); 20998c2ecf20Sopenharmony_ci off = addr - chunk->base_addr; 21008c2ecf20Sopenharmony_ci 21018c2ecf20Sopenharmony_ci size = pcpu_free_area(chunk, off); 21028c2ecf20Sopenharmony_ci 21038c2ecf20Sopenharmony_ci pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_ci pcpu_memcg_free_hook(chunk, off, size); 21068c2ecf20Sopenharmony_ci 21078c2ecf20Sopenharmony_ci /* if there are more than one fully free chunks, wake up grim reaper */ 21088c2ecf20Sopenharmony_ci if (chunk->free_bytes == pcpu_unit_size) { 21098c2ecf20Sopenharmony_ci struct pcpu_chunk *pos; 21108c2ecf20Sopenharmony_ci 21118c2ecf20Sopenharmony_ci list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) 21128c2ecf20Sopenharmony_ci if (pos != chunk) { 21138c2ecf20Sopenharmony_ci need_balance = true; 21148c2ecf20Sopenharmony_ci break; 21158c2ecf20Sopenharmony_ci } 21168c2ecf20Sopenharmony_ci } 21178c2ecf20Sopenharmony_ci 21188c2ecf20Sopenharmony_ci trace_percpu_free_percpu(chunk->base_addr, off, ptr); 21198c2ecf20Sopenharmony_ci 21208c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pcpu_lock, flags); 21218c2ecf20Sopenharmony_ci 21228c2ecf20Sopenharmony_ci if (need_balance) 21238c2ecf20Sopenharmony_ci pcpu_schedule_balance_work(); 21248c2ecf20Sopenharmony_ci} 21258c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(free_percpu); 21268c2ecf20Sopenharmony_ci 21278c2ecf20Sopenharmony_cibool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr) 21288c2ecf20Sopenharmony_ci{ 21298c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 21308c2ecf20Sopenharmony_ci const size_t static_size = __per_cpu_end - __per_cpu_start; 21318c2ecf20Sopenharmony_ci void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); 21328c2ecf20Sopenharmony_ci unsigned int cpu; 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 21358c2ecf20Sopenharmony_ci void *start = per_cpu_ptr(base, cpu); 21368c2ecf20Sopenharmony_ci void *va = (void *)addr; 21378c2ecf20Sopenharmony_ci 21388c2ecf20Sopenharmony_ci if (va >= start && va < start + static_size) { 21398c2ecf20Sopenharmony_ci if (can_addr) { 21408c2ecf20Sopenharmony_ci *can_addr = (unsigned long) (va - start); 21418c2ecf20Sopenharmony_ci *can_addr += (unsigned long) 21428c2ecf20Sopenharmony_ci per_cpu_ptr(base, get_boot_cpu_id()); 21438c2ecf20Sopenharmony_ci } 21448c2ecf20Sopenharmony_ci return true; 21458c2ecf20Sopenharmony_ci } 21468c2ecf20Sopenharmony_ci } 21478c2ecf20Sopenharmony_ci#endif 21488c2ecf20Sopenharmony_ci /* on UP, can't distinguish from other static vars, always false */ 21498c2ecf20Sopenharmony_ci return false; 21508c2ecf20Sopenharmony_ci} 21518c2ecf20Sopenharmony_ci 21528c2ecf20Sopenharmony_ci/** 21538c2ecf20Sopenharmony_ci * is_kernel_percpu_address - test whether address is from static percpu area 21548c2ecf20Sopenharmony_ci * @addr: address to test 21558c2ecf20Sopenharmony_ci * 21568c2ecf20Sopenharmony_ci * Test whether @addr belongs to in-kernel static percpu area. Module 21578c2ecf20Sopenharmony_ci * static percpu areas are not considered. For those, use 21588c2ecf20Sopenharmony_ci * is_module_percpu_address(). 21598c2ecf20Sopenharmony_ci * 21608c2ecf20Sopenharmony_ci * RETURNS: 21618c2ecf20Sopenharmony_ci * %true if @addr is from in-kernel static percpu area, %false otherwise. 21628c2ecf20Sopenharmony_ci */ 21638c2ecf20Sopenharmony_cibool is_kernel_percpu_address(unsigned long addr) 21648c2ecf20Sopenharmony_ci{ 21658c2ecf20Sopenharmony_ci return __is_kernel_percpu_address(addr, NULL); 21668c2ecf20Sopenharmony_ci} 21678c2ecf20Sopenharmony_ci 21688c2ecf20Sopenharmony_ci/** 21698c2ecf20Sopenharmony_ci * per_cpu_ptr_to_phys - convert translated percpu address to physical address 21708c2ecf20Sopenharmony_ci * @addr: the address to be converted to physical address 21718c2ecf20Sopenharmony_ci * 21728c2ecf20Sopenharmony_ci * Given @addr which is dereferenceable address obtained via one of 21738c2ecf20Sopenharmony_ci * percpu access macros, this function translates it into its physical 21748c2ecf20Sopenharmony_ci * address. The caller is responsible for ensuring @addr stays valid 21758c2ecf20Sopenharmony_ci * until this function finishes. 21768c2ecf20Sopenharmony_ci * 21778c2ecf20Sopenharmony_ci * percpu allocator has special setup for the first chunk, which currently 21788c2ecf20Sopenharmony_ci * supports either embedding in linear address space or vmalloc mapping, 21798c2ecf20Sopenharmony_ci * and, from the second one, the backing allocator (currently either vm or 21808c2ecf20Sopenharmony_ci * km) provides translation. 21818c2ecf20Sopenharmony_ci * 21828c2ecf20Sopenharmony_ci * The addr can be translated simply without checking if it falls into the 21838c2ecf20Sopenharmony_ci * first chunk. But the current code reflects better how percpu allocator 21848c2ecf20Sopenharmony_ci * actually works, and the verification can discover both bugs in percpu 21858c2ecf20Sopenharmony_ci * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current 21868c2ecf20Sopenharmony_ci * code. 21878c2ecf20Sopenharmony_ci * 21888c2ecf20Sopenharmony_ci * RETURNS: 21898c2ecf20Sopenharmony_ci * The physical address for @addr. 21908c2ecf20Sopenharmony_ci */ 21918c2ecf20Sopenharmony_ciphys_addr_t per_cpu_ptr_to_phys(void *addr) 21928c2ecf20Sopenharmony_ci{ 21938c2ecf20Sopenharmony_ci void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); 21948c2ecf20Sopenharmony_ci bool in_first_chunk = false; 21958c2ecf20Sopenharmony_ci unsigned long first_low, first_high; 21968c2ecf20Sopenharmony_ci unsigned int cpu; 21978c2ecf20Sopenharmony_ci 21988c2ecf20Sopenharmony_ci /* 21998c2ecf20Sopenharmony_ci * The following test on unit_low/high isn't strictly 22008c2ecf20Sopenharmony_ci * necessary but will speed up lookups of addresses which 22018c2ecf20Sopenharmony_ci * aren't in the first chunk. 22028c2ecf20Sopenharmony_ci * 22038c2ecf20Sopenharmony_ci * The address check is against full chunk sizes. pcpu_base_addr 22048c2ecf20Sopenharmony_ci * points to the beginning of the first chunk including the 22058c2ecf20Sopenharmony_ci * static region. Assumes good intent as the first chunk may 22068c2ecf20Sopenharmony_ci * not be full (ie. < pcpu_unit_pages in size). 22078c2ecf20Sopenharmony_ci */ 22088c2ecf20Sopenharmony_ci first_low = (unsigned long)pcpu_base_addr + 22098c2ecf20Sopenharmony_ci pcpu_unit_page_offset(pcpu_low_unit_cpu, 0); 22108c2ecf20Sopenharmony_ci first_high = (unsigned long)pcpu_base_addr + 22118c2ecf20Sopenharmony_ci pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages); 22128c2ecf20Sopenharmony_ci if ((unsigned long)addr >= first_low && 22138c2ecf20Sopenharmony_ci (unsigned long)addr < first_high) { 22148c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 22158c2ecf20Sopenharmony_ci void *start = per_cpu_ptr(base, cpu); 22168c2ecf20Sopenharmony_ci 22178c2ecf20Sopenharmony_ci if (addr >= start && addr < start + pcpu_unit_size) { 22188c2ecf20Sopenharmony_ci in_first_chunk = true; 22198c2ecf20Sopenharmony_ci break; 22208c2ecf20Sopenharmony_ci } 22218c2ecf20Sopenharmony_ci } 22228c2ecf20Sopenharmony_ci } 22238c2ecf20Sopenharmony_ci 22248c2ecf20Sopenharmony_ci if (in_first_chunk) { 22258c2ecf20Sopenharmony_ci if (!is_vmalloc_addr(addr)) 22268c2ecf20Sopenharmony_ci return __pa(addr); 22278c2ecf20Sopenharmony_ci else 22288c2ecf20Sopenharmony_ci return page_to_phys(vmalloc_to_page(addr)) + 22298c2ecf20Sopenharmony_ci offset_in_page(addr); 22308c2ecf20Sopenharmony_ci } else 22318c2ecf20Sopenharmony_ci return page_to_phys(pcpu_addr_to_page(addr)) + 22328c2ecf20Sopenharmony_ci offset_in_page(addr); 22338c2ecf20Sopenharmony_ci} 22348c2ecf20Sopenharmony_ci 22358c2ecf20Sopenharmony_ci/** 22368c2ecf20Sopenharmony_ci * pcpu_alloc_alloc_info - allocate percpu allocation info 22378c2ecf20Sopenharmony_ci * @nr_groups: the number of groups 22388c2ecf20Sopenharmony_ci * @nr_units: the number of units 22398c2ecf20Sopenharmony_ci * 22408c2ecf20Sopenharmony_ci * Allocate ai which is large enough for @nr_groups groups containing 22418c2ecf20Sopenharmony_ci * @nr_units units. The returned ai's groups[0].cpu_map points to the 22428c2ecf20Sopenharmony_ci * cpu_map array which is long enough for @nr_units and filled with 22438c2ecf20Sopenharmony_ci * NR_CPUS. It's the caller's responsibility to initialize cpu_map 22448c2ecf20Sopenharmony_ci * pointer of other groups. 22458c2ecf20Sopenharmony_ci * 22468c2ecf20Sopenharmony_ci * RETURNS: 22478c2ecf20Sopenharmony_ci * Pointer to the allocated pcpu_alloc_info on success, NULL on 22488c2ecf20Sopenharmony_ci * failure. 22498c2ecf20Sopenharmony_ci */ 22508c2ecf20Sopenharmony_cistruct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, 22518c2ecf20Sopenharmony_ci int nr_units) 22528c2ecf20Sopenharmony_ci{ 22538c2ecf20Sopenharmony_ci struct pcpu_alloc_info *ai; 22548c2ecf20Sopenharmony_ci size_t base_size, ai_size; 22558c2ecf20Sopenharmony_ci void *ptr; 22568c2ecf20Sopenharmony_ci int unit; 22578c2ecf20Sopenharmony_ci 22588c2ecf20Sopenharmony_ci base_size = ALIGN(struct_size(ai, groups, nr_groups), 22598c2ecf20Sopenharmony_ci __alignof__(ai->groups[0].cpu_map[0])); 22608c2ecf20Sopenharmony_ci ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); 22618c2ecf20Sopenharmony_ci 22628c2ecf20Sopenharmony_ci ptr = memblock_alloc(PFN_ALIGN(ai_size), PAGE_SIZE); 22638c2ecf20Sopenharmony_ci if (!ptr) 22648c2ecf20Sopenharmony_ci return NULL; 22658c2ecf20Sopenharmony_ci ai = ptr; 22668c2ecf20Sopenharmony_ci ptr += base_size; 22678c2ecf20Sopenharmony_ci 22688c2ecf20Sopenharmony_ci ai->groups[0].cpu_map = ptr; 22698c2ecf20Sopenharmony_ci 22708c2ecf20Sopenharmony_ci for (unit = 0; unit < nr_units; unit++) 22718c2ecf20Sopenharmony_ci ai->groups[0].cpu_map[unit] = NR_CPUS; 22728c2ecf20Sopenharmony_ci 22738c2ecf20Sopenharmony_ci ai->nr_groups = nr_groups; 22748c2ecf20Sopenharmony_ci ai->__ai_size = PFN_ALIGN(ai_size); 22758c2ecf20Sopenharmony_ci 22768c2ecf20Sopenharmony_ci return ai; 22778c2ecf20Sopenharmony_ci} 22788c2ecf20Sopenharmony_ci 22798c2ecf20Sopenharmony_ci/** 22808c2ecf20Sopenharmony_ci * pcpu_free_alloc_info - free percpu allocation info 22818c2ecf20Sopenharmony_ci * @ai: pcpu_alloc_info to free 22828c2ecf20Sopenharmony_ci * 22838c2ecf20Sopenharmony_ci * Free @ai which was allocated by pcpu_alloc_alloc_info(). 22848c2ecf20Sopenharmony_ci */ 22858c2ecf20Sopenharmony_civoid __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) 22868c2ecf20Sopenharmony_ci{ 22878c2ecf20Sopenharmony_ci memblock_free_early(__pa(ai), ai->__ai_size); 22888c2ecf20Sopenharmony_ci} 22898c2ecf20Sopenharmony_ci 22908c2ecf20Sopenharmony_ci/** 22918c2ecf20Sopenharmony_ci * pcpu_dump_alloc_info - print out information about pcpu_alloc_info 22928c2ecf20Sopenharmony_ci * @lvl: loglevel 22938c2ecf20Sopenharmony_ci * @ai: allocation info to dump 22948c2ecf20Sopenharmony_ci * 22958c2ecf20Sopenharmony_ci * Print out information about @ai using loglevel @lvl. 22968c2ecf20Sopenharmony_ci */ 22978c2ecf20Sopenharmony_cistatic void pcpu_dump_alloc_info(const char *lvl, 22988c2ecf20Sopenharmony_ci const struct pcpu_alloc_info *ai) 22998c2ecf20Sopenharmony_ci{ 23008c2ecf20Sopenharmony_ci int group_width = 1, cpu_width = 1, width; 23018c2ecf20Sopenharmony_ci char empty_str[] = "--------"; 23028c2ecf20Sopenharmony_ci int alloc = 0, alloc_end = 0; 23038c2ecf20Sopenharmony_ci int group, v; 23048c2ecf20Sopenharmony_ci int upa, apl; /* units per alloc, allocs per line */ 23058c2ecf20Sopenharmony_ci 23068c2ecf20Sopenharmony_ci v = ai->nr_groups; 23078c2ecf20Sopenharmony_ci while (v /= 10) 23088c2ecf20Sopenharmony_ci group_width++; 23098c2ecf20Sopenharmony_ci 23108c2ecf20Sopenharmony_ci v = num_possible_cpus(); 23118c2ecf20Sopenharmony_ci while (v /= 10) 23128c2ecf20Sopenharmony_ci cpu_width++; 23138c2ecf20Sopenharmony_ci empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0'; 23148c2ecf20Sopenharmony_ci 23158c2ecf20Sopenharmony_ci upa = ai->alloc_size / ai->unit_size; 23168c2ecf20Sopenharmony_ci width = upa * (cpu_width + 1) + group_width + 3; 23178c2ecf20Sopenharmony_ci apl = rounddown_pow_of_two(max(60 / width, 1)); 23188c2ecf20Sopenharmony_ci 23198c2ecf20Sopenharmony_ci printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu", 23208c2ecf20Sopenharmony_ci lvl, ai->static_size, ai->reserved_size, ai->dyn_size, 23218c2ecf20Sopenharmony_ci ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size); 23228c2ecf20Sopenharmony_ci 23238c2ecf20Sopenharmony_ci for (group = 0; group < ai->nr_groups; group++) { 23248c2ecf20Sopenharmony_ci const struct pcpu_group_info *gi = &ai->groups[group]; 23258c2ecf20Sopenharmony_ci int unit = 0, unit_end = 0; 23268c2ecf20Sopenharmony_ci 23278c2ecf20Sopenharmony_ci BUG_ON(gi->nr_units % upa); 23288c2ecf20Sopenharmony_ci for (alloc_end += gi->nr_units / upa; 23298c2ecf20Sopenharmony_ci alloc < alloc_end; alloc++) { 23308c2ecf20Sopenharmony_ci if (!(alloc % apl)) { 23318c2ecf20Sopenharmony_ci pr_cont("\n"); 23328c2ecf20Sopenharmony_ci printk("%spcpu-alloc: ", lvl); 23338c2ecf20Sopenharmony_ci } 23348c2ecf20Sopenharmony_ci pr_cont("[%0*d] ", group_width, group); 23358c2ecf20Sopenharmony_ci 23368c2ecf20Sopenharmony_ci for (unit_end += upa; unit < unit_end; unit++) 23378c2ecf20Sopenharmony_ci if (gi->cpu_map[unit] != NR_CPUS) 23388c2ecf20Sopenharmony_ci pr_cont("%0*d ", 23398c2ecf20Sopenharmony_ci cpu_width, gi->cpu_map[unit]); 23408c2ecf20Sopenharmony_ci else 23418c2ecf20Sopenharmony_ci pr_cont("%s ", empty_str); 23428c2ecf20Sopenharmony_ci } 23438c2ecf20Sopenharmony_ci } 23448c2ecf20Sopenharmony_ci pr_cont("\n"); 23458c2ecf20Sopenharmony_ci} 23468c2ecf20Sopenharmony_ci 23478c2ecf20Sopenharmony_ci/** 23488c2ecf20Sopenharmony_ci * pcpu_setup_first_chunk - initialize the first percpu chunk 23498c2ecf20Sopenharmony_ci * @ai: pcpu_alloc_info describing how to percpu area is shaped 23508c2ecf20Sopenharmony_ci * @base_addr: mapped address 23518c2ecf20Sopenharmony_ci * 23528c2ecf20Sopenharmony_ci * Initialize the first percpu chunk which contains the kernel static 23538c2ecf20Sopenharmony_ci * percpu area. This function is to be called from arch percpu area 23548c2ecf20Sopenharmony_ci * setup path. 23558c2ecf20Sopenharmony_ci * 23568c2ecf20Sopenharmony_ci * @ai contains all information necessary to initialize the first 23578c2ecf20Sopenharmony_ci * chunk and prime the dynamic percpu allocator. 23588c2ecf20Sopenharmony_ci * 23598c2ecf20Sopenharmony_ci * @ai->static_size is the size of static percpu area. 23608c2ecf20Sopenharmony_ci * 23618c2ecf20Sopenharmony_ci * @ai->reserved_size, if non-zero, specifies the amount of bytes to 23628c2ecf20Sopenharmony_ci * reserve after the static area in the first chunk. This reserves 23638c2ecf20Sopenharmony_ci * the first chunk such that it's available only through reserved 23648c2ecf20Sopenharmony_ci * percpu allocation. This is primarily used to serve module percpu 23658c2ecf20Sopenharmony_ci * static areas on architectures where the addressing model has 23668c2ecf20Sopenharmony_ci * limited offset range for symbol relocations to guarantee module 23678c2ecf20Sopenharmony_ci * percpu symbols fall inside the relocatable range. 23688c2ecf20Sopenharmony_ci * 23698c2ecf20Sopenharmony_ci * @ai->dyn_size determines the number of bytes available for dynamic 23708c2ecf20Sopenharmony_ci * allocation in the first chunk. The area between @ai->static_size + 23718c2ecf20Sopenharmony_ci * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused. 23728c2ecf20Sopenharmony_ci * 23738c2ecf20Sopenharmony_ci * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE 23748c2ecf20Sopenharmony_ci * and equal to or larger than @ai->static_size + @ai->reserved_size + 23758c2ecf20Sopenharmony_ci * @ai->dyn_size. 23768c2ecf20Sopenharmony_ci * 23778c2ecf20Sopenharmony_ci * @ai->atom_size is the allocation atom size and used as alignment 23788c2ecf20Sopenharmony_ci * for vm areas. 23798c2ecf20Sopenharmony_ci * 23808c2ecf20Sopenharmony_ci * @ai->alloc_size is the allocation size and always multiple of 23818c2ecf20Sopenharmony_ci * @ai->atom_size. This is larger than @ai->atom_size if 23828c2ecf20Sopenharmony_ci * @ai->unit_size is larger than @ai->atom_size. 23838c2ecf20Sopenharmony_ci * 23848c2ecf20Sopenharmony_ci * @ai->nr_groups and @ai->groups describe virtual memory layout of 23858c2ecf20Sopenharmony_ci * percpu areas. Units which should be colocated are put into the 23868c2ecf20Sopenharmony_ci * same group. Dynamic VM areas will be allocated according to these 23878c2ecf20Sopenharmony_ci * groupings. If @ai->nr_groups is zero, a single group containing 23888c2ecf20Sopenharmony_ci * all units is assumed. 23898c2ecf20Sopenharmony_ci * 23908c2ecf20Sopenharmony_ci * The caller should have mapped the first chunk at @base_addr and 23918c2ecf20Sopenharmony_ci * copied static data to each unit. 23928c2ecf20Sopenharmony_ci * 23938c2ecf20Sopenharmony_ci * The first chunk will always contain a static and a dynamic region. 23948c2ecf20Sopenharmony_ci * However, the static region is not managed by any chunk. If the first 23958c2ecf20Sopenharmony_ci * chunk also contains a reserved region, it is served by two chunks - 23968c2ecf20Sopenharmony_ci * one for the reserved region and one for the dynamic region. They 23978c2ecf20Sopenharmony_ci * share the same vm, but use offset regions in the area allocation map. 23988c2ecf20Sopenharmony_ci * The chunk serving the dynamic region is circulated in the chunk slots 23998c2ecf20Sopenharmony_ci * and available for dynamic allocation like any other chunk. 24008c2ecf20Sopenharmony_ci */ 24018c2ecf20Sopenharmony_civoid __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, 24028c2ecf20Sopenharmony_ci void *base_addr) 24038c2ecf20Sopenharmony_ci{ 24048c2ecf20Sopenharmony_ci size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; 24058c2ecf20Sopenharmony_ci size_t static_size, dyn_size; 24068c2ecf20Sopenharmony_ci struct pcpu_chunk *chunk; 24078c2ecf20Sopenharmony_ci unsigned long *group_offsets; 24088c2ecf20Sopenharmony_ci size_t *group_sizes; 24098c2ecf20Sopenharmony_ci unsigned long *unit_off; 24108c2ecf20Sopenharmony_ci unsigned int cpu; 24118c2ecf20Sopenharmony_ci int *unit_map; 24128c2ecf20Sopenharmony_ci int group, unit, i; 24138c2ecf20Sopenharmony_ci int map_size; 24148c2ecf20Sopenharmony_ci unsigned long tmp_addr; 24158c2ecf20Sopenharmony_ci size_t alloc_size; 24168c2ecf20Sopenharmony_ci enum pcpu_chunk_type type; 24178c2ecf20Sopenharmony_ci 24188c2ecf20Sopenharmony_ci#define PCPU_SETUP_BUG_ON(cond) do { \ 24198c2ecf20Sopenharmony_ci if (unlikely(cond)) { \ 24208c2ecf20Sopenharmony_ci pr_emerg("failed to initialize, %s\n", #cond); \ 24218c2ecf20Sopenharmony_ci pr_emerg("cpu_possible_mask=%*pb\n", \ 24228c2ecf20Sopenharmony_ci cpumask_pr_args(cpu_possible_mask)); \ 24238c2ecf20Sopenharmony_ci pcpu_dump_alloc_info(KERN_EMERG, ai); \ 24248c2ecf20Sopenharmony_ci BUG(); \ 24258c2ecf20Sopenharmony_ci } \ 24268c2ecf20Sopenharmony_ci} while (0) 24278c2ecf20Sopenharmony_ci 24288c2ecf20Sopenharmony_ci /* sanity checks */ 24298c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); 24308c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 24318c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(!ai->static_size); 24328c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(offset_in_page(__per_cpu_start)); 24338c2ecf20Sopenharmony_ci#endif 24348c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(!base_addr); 24358c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(offset_in_page(base_addr)); 24368c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); 24378c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(offset_in_page(ai->unit_size)); 24388c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); 24398c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE)); 24408c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE); 24418c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(!ai->dyn_size); 24428c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE)); 24438c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) || 24448c2ecf20Sopenharmony_ci IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE))); 24458c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); 24468c2ecf20Sopenharmony_ci 24478c2ecf20Sopenharmony_ci /* process group information and build config tables accordingly */ 24488c2ecf20Sopenharmony_ci alloc_size = ai->nr_groups * sizeof(group_offsets[0]); 24498c2ecf20Sopenharmony_ci group_offsets = memblock_alloc(alloc_size, SMP_CACHE_BYTES); 24508c2ecf20Sopenharmony_ci if (!group_offsets) 24518c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 24528c2ecf20Sopenharmony_ci alloc_size); 24538c2ecf20Sopenharmony_ci 24548c2ecf20Sopenharmony_ci alloc_size = ai->nr_groups * sizeof(group_sizes[0]); 24558c2ecf20Sopenharmony_ci group_sizes = memblock_alloc(alloc_size, SMP_CACHE_BYTES); 24568c2ecf20Sopenharmony_ci if (!group_sizes) 24578c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 24588c2ecf20Sopenharmony_ci alloc_size); 24598c2ecf20Sopenharmony_ci 24608c2ecf20Sopenharmony_ci alloc_size = nr_cpu_ids * sizeof(unit_map[0]); 24618c2ecf20Sopenharmony_ci unit_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES); 24628c2ecf20Sopenharmony_ci if (!unit_map) 24638c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 24648c2ecf20Sopenharmony_ci alloc_size); 24658c2ecf20Sopenharmony_ci 24668c2ecf20Sopenharmony_ci alloc_size = nr_cpu_ids * sizeof(unit_off[0]); 24678c2ecf20Sopenharmony_ci unit_off = memblock_alloc(alloc_size, SMP_CACHE_BYTES); 24688c2ecf20Sopenharmony_ci if (!unit_off) 24698c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 24708c2ecf20Sopenharmony_ci alloc_size); 24718c2ecf20Sopenharmony_ci 24728c2ecf20Sopenharmony_ci for (cpu = 0; cpu < nr_cpu_ids; cpu++) 24738c2ecf20Sopenharmony_ci unit_map[cpu] = UINT_MAX; 24748c2ecf20Sopenharmony_ci 24758c2ecf20Sopenharmony_ci pcpu_low_unit_cpu = NR_CPUS; 24768c2ecf20Sopenharmony_ci pcpu_high_unit_cpu = NR_CPUS; 24778c2ecf20Sopenharmony_ci 24788c2ecf20Sopenharmony_ci for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { 24798c2ecf20Sopenharmony_ci const struct pcpu_group_info *gi = &ai->groups[group]; 24808c2ecf20Sopenharmony_ci 24818c2ecf20Sopenharmony_ci group_offsets[group] = gi->base_offset; 24828c2ecf20Sopenharmony_ci group_sizes[group] = gi->nr_units * ai->unit_size; 24838c2ecf20Sopenharmony_ci 24848c2ecf20Sopenharmony_ci for (i = 0; i < gi->nr_units; i++) { 24858c2ecf20Sopenharmony_ci cpu = gi->cpu_map[i]; 24868c2ecf20Sopenharmony_ci if (cpu == NR_CPUS) 24878c2ecf20Sopenharmony_ci continue; 24888c2ecf20Sopenharmony_ci 24898c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(cpu >= nr_cpu_ids); 24908c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(!cpu_possible(cpu)); 24918c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX); 24928c2ecf20Sopenharmony_ci 24938c2ecf20Sopenharmony_ci unit_map[cpu] = unit + i; 24948c2ecf20Sopenharmony_ci unit_off[cpu] = gi->base_offset + i * ai->unit_size; 24958c2ecf20Sopenharmony_ci 24968c2ecf20Sopenharmony_ci /* determine low/high unit_cpu */ 24978c2ecf20Sopenharmony_ci if (pcpu_low_unit_cpu == NR_CPUS || 24988c2ecf20Sopenharmony_ci unit_off[cpu] < unit_off[pcpu_low_unit_cpu]) 24998c2ecf20Sopenharmony_ci pcpu_low_unit_cpu = cpu; 25008c2ecf20Sopenharmony_ci if (pcpu_high_unit_cpu == NR_CPUS || 25018c2ecf20Sopenharmony_ci unit_off[cpu] > unit_off[pcpu_high_unit_cpu]) 25028c2ecf20Sopenharmony_ci pcpu_high_unit_cpu = cpu; 25038c2ecf20Sopenharmony_ci } 25048c2ecf20Sopenharmony_ci } 25058c2ecf20Sopenharmony_ci pcpu_nr_units = unit; 25068c2ecf20Sopenharmony_ci 25078c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 25088c2ecf20Sopenharmony_ci PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX); 25098c2ecf20Sopenharmony_ci 25108c2ecf20Sopenharmony_ci /* we're done parsing the input, undefine BUG macro and dump config */ 25118c2ecf20Sopenharmony_ci#undef PCPU_SETUP_BUG_ON 25128c2ecf20Sopenharmony_ci pcpu_dump_alloc_info(KERN_DEBUG, ai); 25138c2ecf20Sopenharmony_ci 25148c2ecf20Sopenharmony_ci pcpu_nr_groups = ai->nr_groups; 25158c2ecf20Sopenharmony_ci pcpu_group_offsets = group_offsets; 25168c2ecf20Sopenharmony_ci pcpu_group_sizes = group_sizes; 25178c2ecf20Sopenharmony_ci pcpu_unit_map = unit_map; 25188c2ecf20Sopenharmony_ci pcpu_unit_offsets = unit_off; 25198c2ecf20Sopenharmony_ci 25208c2ecf20Sopenharmony_ci /* determine basic parameters */ 25218c2ecf20Sopenharmony_ci pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; 25228c2ecf20Sopenharmony_ci pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 25238c2ecf20Sopenharmony_ci pcpu_atom_size = ai->atom_size; 25248c2ecf20Sopenharmony_ci pcpu_chunk_struct_size = struct_size(chunk, populated, 25258c2ecf20Sopenharmony_ci BITS_TO_LONGS(pcpu_unit_pages)); 25268c2ecf20Sopenharmony_ci 25278c2ecf20Sopenharmony_ci pcpu_stats_save_ai(ai); 25288c2ecf20Sopenharmony_ci 25298c2ecf20Sopenharmony_ci /* 25308c2ecf20Sopenharmony_ci * Allocate chunk slots. The additional last slot is for 25318c2ecf20Sopenharmony_ci * empty chunks. 25328c2ecf20Sopenharmony_ci */ 25338c2ecf20Sopenharmony_ci pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; 25348c2ecf20Sopenharmony_ci pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots * 25358c2ecf20Sopenharmony_ci sizeof(pcpu_chunk_lists[0]) * 25368c2ecf20Sopenharmony_ci PCPU_NR_CHUNK_TYPES, 25378c2ecf20Sopenharmony_ci SMP_CACHE_BYTES); 25388c2ecf20Sopenharmony_ci if (!pcpu_chunk_lists) 25398c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 25408c2ecf20Sopenharmony_ci pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]) * 25418c2ecf20Sopenharmony_ci PCPU_NR_CHUNK_TYPES); 25428c2ecf20Sopenharmony_ci 25438c2ecf20Sopenharmony_ci for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) 25448c2ecf20Sopenharmony_ci for (i = 0; i < pcpu_nr_slots; i++) 25458c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&pcpu_chunk_list(type)[i]); 25468c2ecf20Sopenharmony_ci 25478c2ecf20Sopenharmony_ci /* 25488c2ecf20Sopenharmony_ci * The end of the static region needs to be aligned with the 25498c2ecf20Sopenharmony_ci * minimum allocation size as this offsets the reserved and 25508c2ecf20Sopenharmony_ci * dynamic region. The first chunk ends page aligned by 25518c2ecf20Sopenharmony_ci * expanding the dynamic region, therefore the dynamic region 25528c2ecf20Sopenharmony_ci * can be shrunk to compensate while still staying above the 25538c2ecf20Sopenharmony_ci * configured sizes. 25548c2ecf20Sopenharmony_ci */ 25558c2ecf20Sopenharmony_ci static_size = ALIGN(ai->static_size, PCPU_MIN_ALLOC_SIZE); 25568c2ecf20Sopenharmony_ci dyn_size = ai->dyn_size - (static_size - ai->static_size); 25578c2ecf20Sopenharmony_ci 25588c2ecf20Sopenharmony_ci /* 25598c2ecf20Sopenharmony_ci * Initialize first chunk. 25608c2ecf20Sopenharmony_ci * If the reserved_size is non-zero, this initializes the reserved 25618c2ecf20Sopenharmony_ci * chunk. If the reserved_size is zero, the reserved chunk is NULL 25628c2ecf20Sopenharmony_ci * and the dynamic region is initialized here. The first chunk, 25638c2ecf20Sopenharmony_ci * pcpu_first_chunk, will always point to the chunk that serves 25648c2ecf20Sopenharmony_ci * the dynamic region. 25658c2ecf20Sopenharmony_ci */ 25668c2ecf20Sopenharmony_ci tmp_addr = (unsigned long)base_addr + static_size; 25678c2ecf20Sopenharmony_ci map_size = ai->reserved_size ?: dyn_size; 25688c2ecf20Sopenharmony_ci chunk = pcpu_alloc_first_chunk(tmp_addr, map_size); 25698c2ecf20Sopenharmony_ci 25708c2ecf20Sopenharmony_ci /* init dynamic chunk if necessary */ 25718c2ecf20Sopenharmony_ci if (ai->reserved_size) { 25728c2ecf20Sopenharmony_ci pcpu_reserved_chunk = chunk; 25738c2ecf20Sopenharmony_ci 25748c2ecf20Sopenharmony_ci tmp_addr = (unsigned long)base_addr + static_size + 25758c2ecf20Sopenharmony_ci ai->reserved_size; 25768c2ecf20Sopenharmony_ci map_size = dyn_size; 25778c2ecf20Sopenharmony_ci chunk = pcpu_alloc_first_chunk(tmp_addr, map_size); 25788c2ecf20Sopenharmony_ci } 25798c2ecf20Sopenharmony_ci 25808c2ecf20Sopenharmony_ci /* link the first chunk in */ 25818c2ecf20Sopenharmony_ci pcpu_first_chunk = chunk; 25828c2ecf20Sopenharmony_ci pcpu_nr_empty_pop_pages[PCPU_CHUNK_ROOT] = pcpu_first_chunk->nr_empty_pop_pages; 25838c2ecf20Sopenharmony_ci pcpu_chunk_relocate(pcpu_first_chunk, -1); 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci /* include all regions of the first chunk */ 25868c2ecf20Sopenharmony_ci pcpu_nr_populated += PFN_DOWN(size_sum); 25878c2ecf20Sopenharmony_ci 25888c2ecf20Sopenharmony_ci pcpu_stats_chunk_alloc(); 25898c2ecf20Sopenharmony_ci trace_percpu_create_chunk(base_addr); 25908c2ecf20Sopenharmony_ci 25918c2ecf20Sopenharmony_ci /* we're done */ 25928c2ecf20Sopenharmony_ci pcpu_base_addr = base_addr; 25938c2ecf20Sopenharmony_ci} 25948c2ecf20Sopenharmony_ci 25958c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 25968c2ecf20Sopenharmony_ci 25978c2ecf20Sopenharmony_ciconst char * const pcpu_fc_names[PCPU_FC_NR] __initconst = { 25988c2ecf20Sopenharmony_ci [PCPU_FC_AUTO] = "auto", 25998c2ecf20Sopenharmony_ci [PCPU_FC_EMBED] = "embed", 26008c2ecf20Sopenharmony_ci [PCPU_FC_PAGE] = "page", 26018c2ecf20Sopenharmony_ci}; 26028c2ecf20Sopenharmony_ci 26038c2ecf20Sopenharmony_cienum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; 26048c2ecf20Sopenharmony_ci 26058c2ecf20Sopenharmony_cistatic int __init percpu_alloc_setup(char *str) 26068c2ecf20Sopenharmony_ci{ 26078c2ecf20Sopenharmony_ci if (!str) 26088c2ecf20Sopenharmony_ci return -EINVAL; 26098c2ecf20Sopenharmony_ci 26108c2ecf20Sopenharmony_ci if (0) 26118c2ecf20Sopenharmony_ci /* nada */; 26128c2ecf20Sopenharmony_ci#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK 26138c2ecf20Sopenharmony_ci else if (!strcmp(str, "embed")) 26148c2ecf20Sopenharmony_ci pcpu_chosen_fc = PCPU_FC_EMBED; 26158c2ecf20Sopenharmony_ci#endif 26168c2ecf20Sopenharmony_ci#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK 26178c2ecf20Sopenharmony_ci else if (!strcmp(str, "page")) 26188c2ecf20Sopenharmony_ci pcpu_chosen_fc = PCPU_FC_PAGE; 26198c2ecf20Sopenharmony_ci#endif 26208c2ecf20Sopenharmony_ci else 26218c2ecf20Sopenharmony_ci pr_warn("unknown allocator %s specified\n", str); 26228c2ecf20Sopenharmony_ci 26238c2ecf20Sopenharmony_ci return 0; 26248c2ecf20Sopenharmony_ci} 26258c2ecf20Sopenharmony_ciearly_param("percpu_alloc", percpu_alloc_setup); 26268c2ecf20Sopenharmony_ci 26278c2ecf20Sopenharmony_ci/* 26288c2ecf20Sopenharmony_ci * pcpu_embed_first_chunk() is used by the generic percpu setup. 26298c2ecf20Sopenharmony_ci * Build it if needed by the arch config or the generic setup is going 26308c2ecf20Sopenharmony_ci * to be used. 26318c2ecf20Sopenharmony_ci */ 26328c2ecf20Sopenharmony_ci#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ 26338c2ecf20Sopenharmony_ci !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) 26348c2ecf20Sopenharmony_ci#define BUILD_EMBED_FIRST_CHUNK 26358c2ecf20Sopenharmony_ci#endif 26368c2ecf20Sopenharmony_ci 26378c2ecf20Sopenharmony_ci/* build pcpu_page_first_chunk() iff needed by the arch config */ 26388c2ecf20Sopenharmony_ci#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) 26398c2ecf20Sopenharmony_ci#define BUILD_PAGE_FIRST_CHUNK 26408c2ecf20Sopenharmony_ci#endif 26418c2ecf20Sopenharmony_ci 26428c2ecf20Sopenharmony_ci/* pcpu_build_alloc_info() is used by both embed and page first chunk */ 26438c2ecf20Sopenharmony_ci#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK) 26448c2ecf20Sopenharmony_ci/** 26458c2ecf20Sopenharmony_ci * pcpu_build_alloc_info - build alloc_info considering distances between CPUs 26468c2ecf20Sopenharmony_ci * @reserved_size: the size of reserved percpu area in bytes 26478c2ecf20Sopenharmony_ci * @dyn_size: minimum free size for dynamic allocation in bytes 26488c2ecf20Sopenharmony_ci * @atom_size: allocation atom size 26498c2ecf20Sopenharmony_ci * @cpu_distance_fn: callback to determine distance between cpus, optional 26508c2ecf20Sopenharmony_ci * 26518c2ecf20Sopenharmony_ci * This function determines grouping of units, their mappings to cpus 26528c2ecf20Sopenharmony_ci * and other parameters considering needed percpu size, allocation 26538c2ecf20Sopenharmony_ci * atom size and distances between CPUs. 26548c2ecf20Sopenharmony_ci * 26558c2ecf20Sopenharmony_ci * Groups are always multiples of atom size and CPUs which are of 26568c2ecf20Sopenharmony_ci * LOCAL_DISTANCE both ways are grouped together and share space for 26578c2ecf20Sopenharmony_ci * units in the same group. The returned configuration is guaranteed 26588c2ecf20Sopenharmony_ci * to have CPUs on different nodes on different groups and >=75% usage 26598c2ecf20Sopenharmony_ci * of allocated virtual address space. 26608c2ecf20Sopenharmony_ci * 26618c2ecf20Sopenharmony_ci * RETURNS: 26628c2ecf20Sopenharmony_ci * On success, pointer to the new allocation_info is returned. On 26638c2ecf20Sopenharmony_ci * failure, ERR_PTR value is returned. 26648c2ecf20Sopenharmony_ci */ 26658c2ecf20Sopenharmony_cistatic struct pcpu_alloc_info * __init pcpu_build_alloc_info( 26668c2ecf20Sopenharmony_ci size_t reserved_size, size_t dyn_size, 26678c2ecf20Sopenharmony_ci size_t atom_size, 26688c2ecf20Sopenharmony_ci pcpu_fc_cpu_distance_fn_t cpu_distance_fn) 26698c2ecf20Sopenharmony_ci{ 26708c2ecf20Sopenharmony_ci static int group_map[NR_CPUS] __initdata; 26718c2ecf20Sopenharmony_ci static int group_cnt[NR_CPUS] __initdata; 26728c2ecf20Sopenharmony_ci const size_t static_size = __per_cpu_end - __per_cpu_start; 26738c2ecf20Sopenharmony_ci int nr_groups = 1, nr_units = 0; 26748c2ecf20Sopenharmony_ci size_t size_sum, min_unit_size, alloc_size; 26758c2ecf20Sopenharmony_ci int upa, max_upa, best_upa; /* units_per_alloc */ 26768c2ecf20Sopenharmony_ci int last_allocs, group, unit; 26778c2ecf20Sopenharmony_ci unsigned int cpu, tcpu; 26788c2ecf20Sopenharmony_ci struct pcpu_alloc_info *ai; 26798c2ecf20Sopenharmony_ci unsigned int *cpu_map; 26808c2ecf20Sopenharmony_ci 26818c2ecf20Sopenharmony_ci /* this function may be called multiple times */ 26828c2ecf20Sopenharmony_ci memset(group_map, 0, sizeof(group_map)); 26838c2ecf20Sopenharmony_ci memset(group_cnt, 0, sizeof(group_cnt)); 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci /* calculate size_sum and ensure dyn_size is enough for early alloc */ 26868c2ecf20Sopenharmony_ci size_sum = PFN_ALIGN(static_size + reserved_size + 26878c2ecf20Sopenharmony_ci max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); 26888c2ecf20Sopenharmony_ci dyn_size = size_sum - static_size - reserved_size; 26898c2ecf20Sopenharmony_ci 26908c2ecf20Sopenharmony_ci /* 26918c2ecf20Sopenharmony_ci * Determine min_unit_size, alloc_size and max_upa such that 26928c2ecf20Sopenharmony_ci * alloc_size is multiple of atom_size and is the smallest 26938c2ecf20Sopenharmony_ci * which can accommodate 4k aligned segments which are equal to 26948c2ecf20Sopenharmony_ci * or larger than min_unit_size. 26958c2ecf20Sopenharmony_ci */ 26968c2ecf20Sopenharmony_ci min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); 26978c2ecf20Sopenharmony_ci 26988c2ecf20Sopenharmony_ci /* determine the maximum # of units that can fit in an allocation */ 26998c2ecf20Sopenharmony_ci alloc_size = roundup(min_unit_size, atom_size); 27008c2ecf20Sopenharmony_ci upa = alloc_size / min_unit_size; 27018c2ecf20Sopenharmony_ci while (alloc_size % upa || (offset_in_page(alloc_size / upa))) 27028c2ecf20Sopenharmony_ci upa--; 27038c2ecf20Sopenharmony_ci max_upa = upa; 27048c2ecf20Sopenharmony_ci 27058c2ecf20Sopenharmony_ci /* group cpus according to their proximity */ 27068c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 27078c2ecf20Sopenharmony_ci group = 0; 27088c2ecf20Sopenharmony_ci next_group: 27098c2ecf20Sopenharmony_ci for_each_possible_cpu(tcpu) { 27108c2ecf20Sopenharmony_ci if (cpu == tcpu) 27118c2ecf20Sopenharmony_ci break; 27128c2ecf20Sopenharmony_ci if (group_map[tcpu] == group && cpu_distance_fn && 27138c2ecf20Sopenharmony_ci (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || 27148c2ecf20Sopenharmony_ci cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { 27158c2ecf20Sopenharmony_ci group++; 27168c2ecf20Sopenharmony_ci nr_groups = max(nr_groups, group + 1); 27178c2ecf20Sopenharmony_ci goto next_group; 27188c2ecf20Sopenharmony_ci } 27198c2ecf20Sopenharmony_ci } 27208c2ecf20Sopenharmony_ci group_map[cpu] = group; 27218c2ecf20Sopenharmony_ci group_cnt[group]++; 27228c2ecf20Sopenharmony_ci } 27238c2ecf20Sopenharmony_ci 27248c2ecf20Sopenharmony_ci /* 27258c2ecf20Sopenharmony_ci * Wasted space is caused by a ratio imbalance of upa to group_cnt. 27268c2ecf20Sopenharmony_ci * Expand the unit_size until we use >= 75% of the units allocated. 27278c2ecf20Sopenharmony_ci * Related to atom_size, which could be much larger than the unit_size. 27288c2ecf20Sopenharmony_ci */ 27298c2ecf20Sopenharmony_ci last_allocs = INT_MAX; 27308c2ecf20Sopenharmony_ci for (upa = max_upa; upa; upa--) { 27318c2ecf20Sopenharmony_ci int allocs = 0, wasted = 0; 27328c2ecf20Sopenharmony_ci 27338c2ecf20Sopenharmony_ci if (alloc_size % upa || (offset_in_page(alloc_size / upa))) 27348c2ecf20Sopenharmony_ci continue; 27358c2ecf20Sopenharmony_ci 27368c2ecf20Sopenharmony_ci for (group = 0; group < nr_groups; group++) { 27378c2ecf20Sopenharmony_ci int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); 27388c2ecf20Sopenharmony_ci allocs += this_allocs; 27398c2ecf20Sopenharmony_ci wasted += this_allocs * upa - group_cnt[group]; 27408c2ecf20Sopenharmony_ci } 27418c2ecf20Sopenharmony_ci 27428c2ecf20Sopenharmony_ci /* 27438c2ecf20Sopenharmony_ci * Don't accept if wastage is over 1/3. The 27448c2ecf20Sopenharmony_ci * greater-than comparison ensures upa==1 always 27458c2ecf20Sopenharmony_ci * passes the following check. 27468c2ecf20Sopenharmony_ci */ 27478c2ecf20Sopenharmony_ci if (wasted > num_possible_cpus() / 3) 27488c2ecf20Sopenharmony_ci continue; 27498c2ecf20Sopenharmony_ci 27508c2ecf20Sopenharmony_ci /* and then don't consume more memory */ 27518c2ecf20Sopenharmony_ci if (allocs > last_allocs) 27528c2ecf20Sopenharmony_ci break; 27538c2ecf20Sopenharmony_ci last_allocs = allocs; 27548c2ecf20Sopenharmony_ci best_upa = upa; 27558c2ecf20Sopenharmony_ci } 27568c2ecf20Sopenharmony_ci upa = best_upa; 27578c2ecf20Sopenharmony_ci 27588c2ecf20Sopenharmony_ci /* allocate and fill alloc_info */ 27598c2ecf20Sopenharmony_ci for (group = 0; group < nr_groups; group++) 27608c2ecf20Sopenharmony_ci nr_units += roundup(group_cnt[group], upa); 27618c2ecf20Sopenharmony_ci 27628c2ecf20Sopenharmony_ci ai = pcpu_alloc_alloc_info(nr_groups, nr_units); 27638c2ecf20Sopenharmony_ci if (!ai) 27648c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 27658c2ecf20Sopenharmony_ci cpu_map = ai->groups[0].cpu_map; 27668c2ecf20Sopenharmony_ci 27678c2ecf20Sopenharmony_ci for (group = 0; group < nr_groups; group++) { 27688c2ecf20Sopenharmony_ci ai->groups[group].cpu_map = cpu_map; 27698c2ecf20Sopenharmony_ci cpu_map += roundup(group_cnt[group], upa); 27708c2ecf20Sopenharmony_ci } 27718c2ecf20Sopenharmony_ci 27728c2ecf20Sopenharmony_ci ai->static_size = static_size; 27738c2ecf20Sopenharmony_ci ai->reserved_size = reserved_size; 27748c2ecf20Sopenharmony_ci ai->dyn_size = dyn_size; 27758c2ecf20Sopenharmony_ci ai->unit_size = alloc_size / upa; 27768c2ecf20Sopenharmony_ci ai->atom_size = atom_size; 27778c2ecf20Sopenharmony_ci ai->alloc_size = alloc_size; 27788c2ecf20Sopenharmony_ci 27798c2ecf20Sopenharmony_ci for (group = 0, unit = 0; group < nr_groups; group++) { 27808c2ecf20Sopenharmony_ci struct pcpu_group_info *gi = &ai->groups[group]; 27818c2ecf20Sopenharmony_ci 27828c2ecf20Sopenharmony_ci /* 27838c2ecf20Sopenharmony_ci * Initialize base_offset as if all groups are located 27848c2ecf20Sopenharmony_ci * back-to-back. The caller should update this to 27858c2ecf20Sopenharmony_ci * reflect actual allocation. 27868c2ecf20Sopenharmony_ci */ 27878c2ecf20Sopenharmony_ci gi->base_offset = unit * ai->unit_size; 27888c2ecf20Sopenharmony_ci 27898c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 27908c2ecf20Sopenharmony_ci if (group_map[cpu] == group) 27918c2ecf20Sopenharmony_ci gi->cpu_map[gi->nr_units++] = cpu; 27928c2ecf20Sopenharmony_ci gi->nr_units = roundup(gi->nr_units, upa); 27938c2ecf20Sopenharmony_ci unit += gi->nr_units; 27948c2ecf20Sopenharmony_ci } 27958c2ecf20Sopenharmony_ci BUG_ON(unit != nr_units); 27968c2ecf20Sopenharmony_ci 27978c2ecf20Sopenharmony_ci return ai; 27988c2ecf20Sopenharmony_ci} 27998c2ecf20Sopenharmony_ci#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */ 28008c2ecf20Sopenharmony_ci 28018c2ecf20Sopenharmony_ci#if defined(BUILD_EMBED_FIRST_CHUNK) 28028c2ecf20Sopenharmony_ci/** 28038c2ecf20Sopenharmony_ci * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem 28048c2ecf20Sopenharmony_ci * @reserved_size: the size of reserved percpu area in bytes 28058c2ecf20Sopenharmony_ci * @dyn_size: minimum free size for dynamic allocation in bytes 28068c2ecf20Sopenharmony_ci * @atom_size: allocation atom size 28078c2ecf20Sopenharmony_ci * @cpu_distance_fn: callback to determine distance between cpus, optional 28088c2ecf20Sopenharmony_ci * @alloc_fn: function to allocate percpu page 28098c2ecf20Sopenharmony_ci * @free_fn: function to free percpu page 28108c2ecf20Sopenharmony_ci * 28118c2ecf20Sopenharmony_ci * This is a helper to ease setting up embedded first percpu chunk and 28128c2ecf20Sopenharmony_ci * can be called where pcpu_setup_first_chunk() is expected. 28138c2ecf20Sopenharmony_ci * 28148c2ecf20Sopenharmony_ci * If this function is used to setup the first chunk, it is allocated 28158c2ecf20Sopenharmony_ci * by calling @alloc_fn and used as-is without being mapped into 28168c2ecf20Sopenharmony_ci * vmalloc area. Allocations are always whole multiples of @atom_size 28178c2ecf20Sopenharmony_ci * aligned to @atom_size. 28188c2ecf20Sopenharmony_ci * 28198c2ecf20Sopenharmony_ci * This enables the first chunk to piggy back on the linear physical 28208c2ecf20Sopenharmony_ci * mapping which often uses larger page size. Please note that this 28218c2ecf20Sopenharmony_ci * can result in very sparse cpu->unit mapping on NUMA machines thus 28228c2ecf20Sopenharmony_ci * requiring large vmalloc address space. Don't use this allocator if 28238c2ecf20Sopenharmony_ci * vmalloc space is not orders of magnitude larger than distances 28248c2ecf20Sopenharmony_ci * between node memory addresses (ie. 32bit NUMA machines). 28258c2ecf20Sopenharmony_ci * 28268c2ecf20Sopenharmony_ci * @dyn_size specifies the minimum dynamic area size. 28278c2ecf20Sopenharmony_ci * 28288c2ecf20Sopenharmony_ci * If the needed size is smaller than the minimum or specified unit 28298c2ecf20Sopenharmony_ci * size, the leftover is returned using @free_fn. 28308c2ecf20Sopenharmony_ci * 28318c2ecf20Sopenharmony_ci * RETURNS: 28328c2ecf20Sopenharmony_ci * 0 on success, -errno on failure. 28338c2ecf20Sopenharmony_ci */ 28348c2ecf20Sopenharmony_ciint __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, 28358c2ecf20Sopenharmony_ci size_t atom_size, 28368c2ecf20Sopenharmony_ci pcpu_fc_cpu_distance_fn_t cpu_distance_fn, 28378c2ecf20Sopenharmony_ci pcpu_fc_alloc_fn_t alloc_fn, 28388c2ecf20Sopenharmony_ci pcpu_fc_free_fn_t free_fn) 28398c2ecf20Sopenharmony_ci{ 28408c2ecf20Sopenharmony_ci void *base = (void *)ULONG_MAX; 28418c2ecf20Sopenharmony_ci void **areas = NULL; 28428c2ecf20Sopenharmony_ci struct pcpu_alloc_info *ai; 28438c2ecf20Sopenharmony_ci size_t size_sum, areas_size; 28448c2ecf20Sopenharmony_ci unsigned long max_distance; 28458c2ecf20Sopenharmony_ci int group, i, highest_group, rc = 0; 28468c2ecf20Sopenharmony_ci 28478c2ecf20Sopenharmony_ci ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, 28488c2ecf20Sopenharmony_ci cpu_distance_fn); 28498c2ecf20Sopenharmony_ci if (IS_ERR(ai)) 28508c2ecf20Sopenharmony_ci return PTR_ERR(ai); 28518c2ecf20Sopenharmony_ci 28528c2ecf20Sopenharmony_ci size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; 28538c2ecf20Sopenharmony_ci areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); 28548c2ecf20Sopenharmony_ci 28558c2ecf20Sopenharmony_ci areas = memblock_alloc(areas_size, SMP_CACHE_BYTES); 28568c2ecf20Sopenharmony_ci if (!areas) { 28578c2ecf20Sopenharmony_ci rc = -ENOMEM; 28588c2ecf20Sopenharmony_ci goto out_free; 28598c2ecf20Sopenharmony_ci } 28608c2ecf20Sopenharmony_ci 28618c2ecf20Sopenharmony_ci /* allocate, copy and determine base address & max_distance */ 28628c2ecf20Sopenharmony_ci highest_group = 0; 28638c2ecf20Sopenharmony_ci for (group = 0; group < ai->nr_groups; group++) { 28648c2ecf20Sopenharmony_ci struct pcpu_group_info *gi = &ai->groups[group]; 28658c2ecf20Sopenharmony_ci unsigned int cpu = NR_CPUS; 28668c2ecf20Sopenharmony_ci void *ptr; 28678c2ecf20Sopenharmony_ci 28688c2ecf20Sopenharmony_ci for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++) 28698c2ecf20Sopenharmony_ci cpu = gi->cpu_map[i]; 28708c2ecf20Sopenharmony_ci BUG_ON(cpu == NR_CPUS); 28718c2ecf20Sopenharmony_ci 28728c2ecf20Sopenharmony_ci /* allocate space for the whole group */ 28738c2ecf20Sopenharmony_ci ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size); 28748c2ecf20Sopenharmony_ci if (!ptr) { 28758c2ecf20Sopenharmony_ci rc = -ENOMEM; 28768c2ecf20Sopenharmony_ci goto out_free_areas; 28778c2ecf20Sopenharmony_ci } 28788c2ecf20Sopenharmony_ci /* kmemleak tracks the percpu allocations separately */ 28798c2ecf20Sopenharmony_ci kmemleak_free(ptr); 28808c2ecf20Sopenharmony_ci areas[group] = ptr; 28818c2ecf20Sopenharmony_ci 28828c2ecf20Sopenharmony_ci base = min(ptr, base); 28838c2ecf20Sopenharmony_ci if (ptr > areas[highest_group]) 28848c2ecf20Sopenharmony_ci highest_group = group; 28858c2ecf20Sopenharmony_ci } 28868c2ecf20Sopenharmony_ci max_distance = areas[highest_group] - base; 28878c2ecf20Sopenharmony_ci max_distance += ai->unit_size * ai->groups[highest_group].nr_units; 28888c2ecf20Sopenharmony_ci 28898c2ecf20Sopenharmony_ci /* warn if maximum distance is further than 75% of vmalloc space */ 28908c2ecf20Sopenharmony_ci if (max_distance > VMALLOC_TOTAL * 3 / 4) { 28918c2ecf20Sopenharmony_ci pr_warn("max_distance=0x%lx too large for vmalloc space 0x%lx\n", 28928c2ecf20Sopenharmony_ci max_distance, VMALLOC_TOTAL); 28938c2ecf20Sopenharmony_ci#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK 28948c2ecf20Sopenharmony_ci /* and fail if we have fallback */ 28958c2ecf20Sopenharmony_ci rc = -EINVAL; 28968c2ecf20Sopenharmony_ci goto out_free_areas; 28978c2ecf20Sopenharmony_ci#endif 28988c2ecf20Sopenharmony_ci } 28998c2ecf20Sopenharmony_ci 29008c2ecf20Sopenharmony_ci /* 29018c2ecf20Sopenharmony_ci * Copy data and free unused parts. This should happen after all 29028c2ecf20Sopenharmony_ci * allocations are complete; otherwise, we may end up with 29038c2ecf20Sopenharmony_ci * overlapping groups. 29048c2ecf20Sopenharmony_ci */ 29058c2ecf20Sopenharmony_ci for (group = 0; group < ai->nr_groups; group++) { 29068c2ecf20Sopenharmony_ci struct pcpu_group_info *gi = &ai->groups[group]; 29078c2ecf20Sopenharmony_ci void *ptr = areas[group]; 29088c2ecf20Sopenharmony_ci 29098c2ecf20Sopenharmony_ci for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) { 29108c2ecf20Sopenharmony_ci if (gi->cpu_map[i] == NR_CPUS) { 29118c2ecf20Sopenharmony_ci /* unused unit, free whole */ 29128c2ecf20Sopenharmony_ci free_fn(ptr, ai->unit_size); 29138c2ecf20Sopenharmony_ci continue; 29148c2ecf20Sopenharmony_ci } 29158c2ecf20Sopenharmony_ci /* copy and return the unused part */ 29168c2ecf20Sopenharmony_ci memcpy(ptr, __per_cpu_load, ai->static_size); 29178c2ecf20Sopenharmony_ci free_fn(ptr + size_sum, ai->unit_size - size_sum); 29188c2ecf20Sopenharmony_ci } 29198c2ecf20Sopenharmony_ci } 29208c2ecf20Sopenharmony_ci 29218c2ecf20Sopenharmony_ci /* base address is now known, determine group base offsets */ 29228c2ecf20Sopenharmony_ci for (group = 0; group < ai->nr_groups; group++) { 29238c2ecf20Sopenharmony_ci ai->groups[group].base_offset = areas[group] - base; 29248c2ecf20Sopenharmony_ci } 29258c2ecf20Sopenharmony_ci 29268c2ecf20Sopenharmony_ci pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n", 29278c2ecf20Sopenharmony_ci PFN_DOWN(size_sum), ai->static_size, ai->reserved_size, 29288c2ecf20Sopenharmony_ci ai->dyn_size, ai->unit_size); 29298c2ecf20Sopenharmony_ci 29308c2ecf20Sopenharmony_ci pcpu_setup_first_chunk(ai, base); 29318c2ecf20Sopenharmony_ci goto out_free; 29328c2ecf20Sopenharmony_ci 29338c2ecf20Sopenharmony_ciout_free_areas: 29348c2ecf20Sopenharmony_ci for (group = 0; group < ai->nr_groups; group++) 29358c2ecf20Sopenharmony_ci if (areas[group]) 29368c2ecf20Sopenharmony_ci free_fn(areas[group], 29378c2ecf20Sopenharmony_ci ai->groups[group].nr_units * ai->unit_size); 29388c2ecf20Sopenharmony_ciout_free: 29398c2ecf20Sopenharmony_ci pcpu_free_alloc_info(ai); 29408c2ecf20Sopenharmony_ci if (areas) 29418c2ecf20Sopenharmony_ci memblock_free_early(__pa(areas), areas_size); 29428c2ecf20Sopenharmony_ci return rc; 29438c2ecf20Sopenharmony_ci} 29448c2ecf20Sopenharmony_ci#endif /* BUILD_EMBED_FIRST_CHUNK */ 29458c2ecf20Sopenharmony_ci 29468c2ecf20Sopenharmony_ci#ifdef BUILD_PAGE_FIRST_CHUNK 29478c2ecf20Sopenharmony_ci/** 29488c2ecf20Sopenharmony_ci * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages 29498c2ecf20Sopenharmony_ci * @reserved_size: the size of reserved percpu area in bytes 29508c2ecf20Sopenharmony_ci * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE 29518c2ecf20Sopenharmony_ci * @free_fn: function to free percpu page, always called with PAGE_SIZE 29528c2ecf20Sopenharmony_ci * @populate_pte_fn: function to populate pte 29538c2ecf20Sopenharmony_ci * 29548c2ecf20Sopenharmony_ci * This is a helper to ease setting up page-remapped first percpu 29558c2ecf20Sopenharmony_ci * chunk and can be called where pcpu_setup_first_chunk() is expected. 29568c2ecf20Sopenharmony_ci * 29578c2ecf20Sopenharmony_ci * This is the basic allocator. Static percpu area is allocated 29588c2ecf20Sopenharmony_ci * page-by-page into vmalloc area. 29598c2ecf20Sopenharmony_ci * 29608c2ecf20Sopenharmony_ci * RETURNS: 29618c2ecf20Sopenharmony_ci * 0 on success, -errno on failure. 29628c2ecf20Sopenharmony_ci */ 29638c2ecf20Sopenharmony_ciint __init pcpu_page_first_chunk(size_t reserved_size, 29648c2ecf20Sopenharmony_ci pcpu_fc_alloc_fn_t alloc_fn, 29658c2ecf20Sopenharmony_ci pcpu_fc_free_fn_t free_fn, 29668c2ecf20Sopenharmony_ci pcpu_fc_populate_pte_fn_t populate_pte_fn) 29678c2ecf20Sopenharmony_ci{ 29688c2ecf20Sopenharmony_ci static struct vm_struct vm; 29698c2ecf20Sopenharmony_ci struct pcpu_alloc_info *ai; 29708c2ecf20Sopenharmony_ci char psize_str[16]; 29718c2ecf20Sopenharmony_ci int unit_pages; 29728c2ecf20Sopenharmony_ci size_t pages_size; 29738c2ecf20Sopenharmony_ci struct page **pages; 29748c2ecf20Sopenharmony_ci int unit, i, j, rc = 0; 29758c2ecf20Sopenharmony_ci int upa; 29768c2ecf20Sopenharmony_ci int nr_g0_units; 29778c2ecf20Sopenharmony_ci 29788c2ecf20Sopenharmony_ci snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); 29798c2ecf20Sopenharmony_ci 29808c2ecf20Sopenharmony_ci ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL); 29818c2ecf20Sopenharmony_ci if (IS_ERR(ai)) 29828c2ecf20Sopenharmony_ci return PTR_ERR(ai); 29838c2ecf20Sopenharmony_ci BUG_ON(ai->nr_groups != 1); 29848c2ecf20Sopenharmony_ci upa = ai->alloc_size/ai->unit_size; 29858c2ecf20Sopenharmony_ci nr_g0_units = roundup(num_possible_cpus(), upa); 29868c2ecf20Sopenharmony_ci if (WARN_ON(ai->groups[0].nr_units != nr_g0_units)) { 29878c2ecf20Sopenharmony_ci pcpu_free_alloc_info(ai); 29888c2ecf20Sopenharmony_ci return -EINVAL; 29898c2ecf20Sopenharmony_ci } 29908c2ecf20Sopenharmony_ci 29918c2ecf20Sopenharmony_ci unit_pages = ai->unit_size >> PAGE_SHIFT; 29928c2ecf20Sopenharmony_ci 29938c2ecf20Sopenharmony_ci /* unaligned allocations can't be freed, round up to page size */ 29948c2ecf20Sopenharmony_ci pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * 29958c2ecf20Sopenharmony_ci sizeof(pages[0])); 29968c2ecf20Sopenharmony_ci pages = memblock_alloc(pages_size, SMP_CACHE_BYTES); 29978c2ecf20Sopenharmony_ci if (!pages) 29988c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %zu bytes\n", __func__, 29998c2ecf20Sopenharmony_ci pages_size); 30008c2ecf20Sopenharmony_ci 30018c2ecf20Sopenharmony_ci /* allocate pages */ 30028c2ecf20Sopenharmony_ci j = 0; 30038c2ecf20Sopenharmony_ci for (unit = 0; unit < num_possible_cpus(); unit++) { 30048c2ecf20Sopenharmony_ci unsigned int cpu = ai->groups[0].cpu_map[unit]; 30058c2ecf20Sopenharmony_ci for (i = 0; i < unit_pages; i++) { 30068c2ecf20Sopenharmony_ci void *ptr; 30078c2ecf20Sopenharmony_ci 30088c2ecf20Sopenharmony_ci ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); 30098c2ecf20Sopenharmony_ci if (!ptr) { 30108c2ecf20Sopenharmony_ci pr_warn("failed to allocate %s page for cpu%u\n", 30118c2ecf20Sopenharmony_ci psize_str, cpu); 30128c2ecf20Sopenharmony_ci goto enomem; 30138c2ecf20Sopenharmony_ci } 30148c2ecf20Sopenharmony_ci /* kmemleak tracks the percpu allocations separately */ 30158c2ecf20Sopenharmony_ci kmemleak_free(ptr); 30168c2ecf20Sopenharmony_ci pages[j++] = virt_to_page(ptr); 30178c2ecf20Sopenharmony_ci } 30188c2ecf20Sopenharmony_ci } 30198c2ecf20Sopenharmony_ci 30208c2ecf20Sopenharmony_ci /* allocate vm area, map the pages and copy static data */ 30218c2ecf20Sopenharmony_ci vm.flags = VM_ALLOC; 30228c2ecf20Sopenharmony_ci vm.size = num_possible_cpus() * ai->unit_size; 30238c2ecf20Sopenharmony_ci vm_area_register_early(&vm, PAGE_SIZE); 30248c2ecf20Sopenharmony_ci 30258c2ecf20Sopenharmony_ci for (unit = 0; unit < num_possible_cpus(); unit++) { 30268c2ecf20Sopenharmony_ci unsigned long unit_addr = 30278c2ecf20Sopenharmony_ci (unsigned long)vm.addr + unit * ai->unit_size; 30288c2ecf20Sopenharmony_ci 30298c2ecf20Sopenharmony_ci for (i = 0; i < unit_pages; i++) 30308c2ecf20Sopenharmony_ci populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); 30318c2ecf20Sopenharmony_ci 30328c2ecf20Sopenharmony_ci /* pte already populated, the following shouldn't fail */ 30338c2ecf20Sopenharmony_ci rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], 30348c2ecf20Sopenharmony_ci unit_pages); 30358c2ecf20Sopenharmony_ci if (rc < 0) 30368c2ecf20Sopenharmony_ci panic("failed to map percpu area, err=%d\n", rc); 30378c2ecf20Sopenharmony_ci 30388c2ecf20Sopenharmony_ci /* 30398c2ecf20Sopenharmony_ci * FIXME: Archs with virtual cache should flush local 30408c2ecf20Sopenharmony_ci * cache for the linear mapping here - something 30418c2ecf20Sopenharmony_ci * equivalent to flush_cache_vmap() on the local cpu. 30428c2ecf20Sopenharmony_ci * flush_cache_vmap() can't be used as most supporting 30438c2ecf20Sopenharmony_ci * data structures are not set up yet. 30448c2ecf20Sopenharmony_ci */ 30458c2ecf20Sopenharmony_ci 30468c2ecf20Sopenharmony_ci /* copy static data */ 30478c2ecf20Sopenharmony_ci memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); 30488c2ecf20Sopenharmony_ci } 30498c2ecf20Sopenharmony_ci 30508c2ecf20Sopenharmony_ci /* we're ready, commit */ 30518c2ecf20Sopenharmony_ci pr_info("%d %s pages/cpu s%zu r%zu d%zu\n", 30528c2ecf20Sopenharmony_ci unit_pages, psize_str, ai->static_size, 30538c2ecf20Sopenharmony_ci ai->reserved_size, ai->dyn_size); 30548c2ecf20Sopenharmony_ci 30558c2ecf20Sopenharmony_ci pcpu_setup_first_chunk(ai, vm.addr); 30568c2ecf20Sopenharmony_ci goto out_free_ar; 30578c2ecf20Sopenharmony_ci 30588c2ecf20Sopenharmony_cienomem: 30598c2ecf20Sopenharmony_ci while (--j >= 0) 30608c2ecf20Sopenharmony_ci free_fn(page_address(pages[j]), PAGE_SIZE); 30618c2ecf20Sopenharmony_ci rc = -ENOMEM; 30628c2ecf20Sopenharmony_ciout_free_ar: 30638c2ecf20Sopenharmony_ci memblock_free_early(__pa(pages), pages_size); 30648c2ecf20Sopenharmony_ci pcpu_free_alloc_info(ai); 30658c2ecf20Sopenharmony_ci return rc; 30668c2ecf20Sopenharmony_ci} 30678c2ecf20Sopenharmony_ci#endif /* BUILD_PAGE_FIRST_CHUNK */ 30688c2ecf20Sopenharmony_ci 30698c2ecf20Sopenharmony_ci#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA 30708c2ecf20Sopenharmony_ci/* 30718c2ecf20Sopenharmony_ci * Generic SMP percpu area setup. 30728c2ecf20Sopenharmony_ci * 30738c2ecf20Sopenharmony_ci * The embedding helper is used because its behavior closely resembles 30748c2ecf20Sopenharmony_ci * the original non-dynamic generic percpu area setup. This is 30758c2ecf20Sopenharmony_ci * important because many archs have addressing restrictions and might 30768c2ecf20Sopenharmony_ci * fail if the percpu area is located far away from the previous 30778c2ecf20Sopenharmony_ci * location. As an added bonus, in non-NUMA cases, embedding is 30788c2ecf20Sopenharmony_ci * generally a good idea TLB-wise because percpu area can piggy back 30798c2ecf20Sopenharmony_ci * on the physical linear memory mapping which uses large page 30808c2ecf20Sopenharmony_ci * mappings on applicable archs. 30818c2ecf20Sopenharmony_ci */ 30828c2ecf20Sopenharmony_ciunsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 30838c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__per_cpu_offset); 30848c2ecf20Sopenharmony_ci 30858c2ecf20Sopenharmony_cistatic void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, 30868c2ecf20Sopenharmony_ci size_t align) 30878c2ecf20Sopenharmony_ci{ 30888c2ecf20Sopenharmony_ci return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS)); 30898c2ecf20Sopenharmony_ci} 30908c2ecf20Sopenharmony_ci 30918c2ecf20Sopenharmony_cistatic void __init pcpu_dfl_fc_free(void *ptr, size_t size) 30928c2ecf20Sopenharmony_ci{ 30938c2ecf20Sopenharmony_ci memblock_free_early(__pa(ptr), size); 30948c2ecf20Sopenharmony_ci} 30958c2ecf20Sopenharmony_ci 30968c2ecf20Sopenharmony_civoid __init setup_per_cpu_areas(void) 30978c2ecf20Sopenharmony_ci{ 30988c2ecf20Sopenharmony_ci unsigned long delta; 30998c2ecf20Sopenharmony_ci unsigned int cpu; 31008c2ecf20Sopenharmony_ci int rc; 31018c2ecf20Sopenharmony_ci 31028c2ecf20Sopenharmony_ci /* 31038c2ecf20Sopenharmony_ci * Always reserve area for module percpu variables. That's 31048c2ecf20Sopenharmony_ci * what the legacy allocator did. 31058c2ecf20Sopenharmony_ci */ 31068c2ecf20Sopenharmony_ci rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, 31078c2ecf20Sopenharmony_ci PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, 31088c2ecf20Sopenharmony_ci pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); 31098c2ecf20Sopenharmony_ci if (rc < 0) 31108c2ecf20Sopenharmony_ci panic("Failed to initialize percpu areas."); 31118c2ecf20Sopenharmony_ci 31128c2ecf20Sopenharmony_ci delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 31138c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 31148c2ecf20Sopenharmony_ci __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 31158c2ecf20Sopenharmony_ci} 31168c2ecf20Sopenharmony_ci#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ 31178c2ecf20Sopenharmony_ci 31188c2ecf20Sopenharmony_ci#else /* CONFIG_SMP */ 31198c2ecf20Sopenharmony_ci 31208c2ecf20Sopenharmony_ci/* 31218c2ecf20Sopenharmony_ci * UP percpu area setup. 31228c2ecf20Sopenharmony_ci * 31238c2ecf20Sopenharmony_ci * UP always uses km-based percpu allocator with identity mapping. 31248c2ecf20Sopenharmony_ci * Static percpu variables are indistinguishable from the usual static 31258c2ecf20Sopenharmony_ci * variables and don't require any special preparation. 31268c2ecf20Sopenharmony_ci */ 31278c2ecf20Sopenharmony_civoid __init setup_per_cpu_areas(void) 31288c2ecf20Sopenharmony_ci{ 31298c2ecf20Sopenharmony_ci const size_t unit_size = 31308c2ecf20Sopenharmony_ci roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE, 31318c2ecf20Sopenharmony_ci PERCPU_DYNAMIC_RESERVE)); 31328c2ecf20Sopenharmony_ci struct pcpu_alloc_info *ai; 31338c2ecf20Sopenharmony_ci void *fc; 31348c2ecf20Sopenharmony_ci 31358c2ecf20Sopenharmony_ci ai = pcpu_alloc_alloc_info(1, 1); 31368c2ecf20Sopenharmony_ci fc = memblock_alloc_from(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 31378c2ecf20Sopenharmony_ci if (!ai || !fc) 31388c2ecf20Sopenharmony_ci panic("Failed to allocate memory for percpu areas."); 31398c2ecf20Sopenharmony_ci /* kmemleak tracks the percpu allocations separately */ 31408c2ecf20Sopenharmony_ci kmemleak_free(fc); 31418c2ecf20Sopenharmony_ci 31428c2ecf20Sopenharmony_ci ai->dyn_size = unit_size; 31438c2ecf20Sopenharmony_ci ai->unit_size = unit_size; 31448c2ecf20Sopenharmony_ci ai->atom_size = unit_size; 31458c2ecf20Sopenharmony_ci ai->alloc_size = unit_size; 31468c2ecf20Sopenharmony_ci ai->groups[0].nr_units = 1; 31478c2ecf20Sopenharmony_ci ai->groups[0].cpu_map[0] = 0; 31488c2ecf20Sopenharmony_ci 31498c2ecf20Sopenharmony_ci pcpu_setup_first_chunk(ai, fc); 31508c2ecf20Sopenharmony_ci pcpu_free_alloc_info(ai); 31518c2ecf20Sopenharmony_ci} 31528c2ecf20Sopenharmony_ci 31538c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */ 31548c2ecf20Sopenharmony_ci 31558c2ecf20Sopenharmony_ci/* 31568c2ecf20Sopenharmony_ci * pcpu_nr_pages - calculate total number of populated backing pages 31578c2ecf20Sopenharmony_ci * 31588c2ecf20Sopenharmony_ci * This reflects the number of pages populated to back chunks. Metadata is 31598c2ecf20Sopenharmony_ci * excluded in the number exposed in meminfo as the number of backing pages 31608c2ecf20Sopenharmony_ci * scales with the number of cpus and can quickly outweigh the memory used for 31618c2ecf20Sopenharmony_ci * metadata. It also keeps this calculation nice and simple. 31628c2ecf20Sopenharmony_ci * 31638c2ecf20Sopenharmony_ci * RETURNS: 31648c2ecf20Sopenharmony_ci * Total number of populated backing pages in use by the allocator. 31658c2ecf20Sopenharmony_ci */ 31668c2ecf20Sopenharmony_ciunsigned long pcpu_nr_pages(void) 31678c2ecf20Sopenharmony_ci{ 31688c2ecf20Sopenharmony_ci return pcpu_nr_populated * pcpu_nr_units; 31698c2ecf20Sopenharmony_ci} 31708c2ecf20Sopenharmony_ci 31718c2ecf20Sopenharmony_ci/* 31728c2ecf20Sopenharmony_ci * Percpu allocator is initialized early during boot when neither slab or 31738c2ecf20Sopenharmony_ci * workqueue is available. Plug async management until everything is up 31748c2ecf20Sopenharmony_ci * and running. 31758c2ecf20Sopenharmony_ci */ 31768c2ecf20Sopenharmony_cistatic int __init percpu_enable_async(void) 31778c2ecf20Sopenharmony_ci{ 31788c2ecf20Sopenharmony_ci pcpu_async_enabled = true; 31798c2ecf20Sopenharmony_ci return 0; 31808c2ecf20Sopenharmony_ci} 31818c2ecf20Sopenharmony_cisubsys_initcall(percpu_enable_async); 3182