18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * MMU context allocation for 64-bit kernels. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/sched.h> 98c2ecf20Sopenharmony_ci#include <linux/kernel.h> 108c2ecf20Sopenharmony_ci#include <linux/errno.h> 118c2ecf20Sopenharmony_ci#include <linux/string.h> 128c2ecf20Sopenharmony_ci#include <linux/types.h> 138c2ecf20Sopenharmony_ci#include <linux/mm.h> 148c2ecf20Sopenharmony_ci#include <linux/pkeys.h> 158c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 168c2ecf20Sopenharmony_ci#include <linux/idr.h> 178c2ecf20Sopenharmony_ci#include <linux/export.h> 188c2ecf20Sopenharmony_ci#include <linux/gfp.h> 198c2ecf20Sopenharmony_ci#include <linux/slab.h> 208c2ecf20Sopenharmony_ci#include <linux/cpu.h> 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#include <asm/mmu_context.h> 238c2ecf20Sopenharmony_ci#include <asm/pgalloc.h> 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include "internal.h" 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_cistatic DEFINE_IDA(mmu_context_ida); 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_cistatic int alloc_context_id(int min_id, int max_id) 308c2ecf20Sopenharmony_ci{ 318c2ecf20Sopenharmony_ci return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL); 328c2ecf20Sopenharmony_ci} 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_civoid hash__reserve_context_id(int id) 358c2ecf20Sopenharmony_ci{ 368c2ecf20Sopenharmony_ci int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL); 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result); 398c2ecf20Sopenharmony_ci} 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ciint hash__alloc_context_id(void) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci unsigned long max; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci if (mmu_has_feature(MMU_FTR_68_BIT_VA)) 468c2ecf20Sopenharmony_ci max = MAX_USER_CONTEXT; 478c2ecf20Sopenharmony_ci else 488c2ecf20Sopenharmony_ci max = MAX_USER_CONTEXT_65BIT_VA; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci return alloc_context_id(MIN_USER_CONTEXT, max); 518c2ecf20Sopenharmony_ci} 528c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hash__alloc_context_id); 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cistatic int realloc_context_ids(mm_context_t *ctx) 558c2ecf20Sopenharmony_ci{ 568c2ecf20Sopenharmony_ci int i, id; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci /* 598c2ecf20Sopenharmony_ci * id 0 (aka. ctx->id) is special, we always allocate a new one, even if 608c2ecf20Sopenharmony_ci * there wasn't one allocated previously (which happens in the exec 618c2ecf20Sopenharmony_ci * case where ctx is newly allocated). 628c2ecf20Sopenharmony_ci * 638c2ecf20Sopenharmony_ci * We have to be a bit careful here. We must keep the existing ids in 648c2ecf20Sopenharmony_ci * the array, so that we can test if they're non-zero to decide if we 658c2ecf20Sopenharmony_ci * need to allocate a new one. However in case of error we must free the 668c2ecf20Sopenharmony_ci * ids we've allocated but *not* any of the existing ones (or risk a 678c2ecf20Sopenharmony_ci * UAF). That's why we decrement i at the start of the error handling 688c2ecf20Sopenharmony_ci * loop, to skip the id that we just tested but couldn't reallocate. 698c2ecf20Sopenharmony_ci */ 708c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { 718c2ecf20Sopenharmony_ci if (i == 0 || ctx->extended_id[i]) { 728c2ecf20Sopenharmony_ci id = hash__alloc_context_id(); 738c2ecf20Sopenharmony_ci if (id < 0) 748c2ecf20Sopenharmony_ci goto error; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci ctx->extended_id[i] = id; 778c2ecf20Sopenharmony_ci } 788c2ecf20Sopenharmony_ci } 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci /* The caller expects us to return id */ 818c2ecf20Sopenharmony_ci return ctx->id; 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_cierror: 848c2ecf20Sopenharmony_ci for (i--; i >= 0; i--) { 858c2ecf20Sopenharmony_ci if (ctx->extended_id[i]) 868c2ecf20Sopenharmony_ci ida_free(&mmu_context_ida, ctx->extended_id[i]); 878c2ecf20Sopenharmony_ci } 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci return id; 908c2ecf20Sopenharmony_ci} 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_cistatic int hash__init_new_context(struct mm_struct *mm) 938c2ecf20Sopenharmony_ci{ 948c2ecf20Sopenharmony_ci int index; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), 978c2ecf20Sopenharmony_ci GFP_KERNEL); 988c2ecf20Sopenharmony_ci if (!mm->context.hash_context) 998c2ecf20Sopenharmony_ci return -ENOMEM; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci /* 1028c2ecf20Sopenharmony_ci * The old code would re-promote on fork, we don't do that when using 1038c2ecf20Sopenharmony_ci * slices as it could cause problem promoting slices that have been 1048c2ecf20Sopenharmony_ci * forced down to 4K. 1058c2ecf20Sopenharmony_ci * 1068c2ecf20Sopenharmony_ci * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check 1078c2ecf20Sopenharmony_ci * explicitly against context.id == 0. This ensures that we properly 1088c2ecf20Sopenharmony_ci * initialize context slice details for newly allocated mm's (which will 1098c2ecf20Sopenharmony_ci * have id == 0) and don't alter context slice inherited via fork (which 1108c2ecf20Sopenharmony_ci * will have id != 0). 1118c2ecf20Sopenharmony_ci * 1128c2ecf20Sopenharmony_ci * We should not be calling init_new_context() on init_mm. Hence a 1138c2ecf20Sopenharmony_ci * check against 0 is OK. 1148c2ecf20Sopenharmony_ci */ 1158c2ecf20Sopenharmony_ci if (mm->context.id == 0) { 1168c2ecf20Sopenharmony_ci memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context)); 1178c2ecf20Sopenharmony_ci slice_init_new_context_exec(mm); 1188c2ecf20Sopenharmony_ci } else { 1198c2ecf20Sopenharmony_ci /* This is fork. Copy hash_context details from current->mm */ 1208c2ecf20Sopenharmony_ci memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); 1218c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_SUBPAGE_PROT 1228c2ecf20Sopenharmony_ci /* inherit subpage prot detalis if we have one. */ 1238c2ecf20Sopenharmony_ci if (current->mm->context.hash_context->spt) { 1248c2ecf20Sopenharmony_ci mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), 1258c2ecf20Sopenharmony_ci GFP_KERNEL); 1268c2ecf20Sopenharmony_ci if (!mm->context.hash_context->spt) { 1278c2ecf20Sopenharmony_ci kfree(mm->context.hash_context); 1288c2ecf20Sopenharmony_ci return -ENOMEM; 1298c2ecf20Sopenharmony_ci } 1308c2ecf20Sopenharmony_ci } 1318c2ecf20Sopenharmony_ci#endif 1328c2ecf20Sopenharmony_ci } 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci index = realloc_context_ids(&mm->context); 1358c2ecf20Sopenharmony_ci if (index < 0) { 1368c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_SUBPAGE_PROT 1378c2ecf20Sopenharmony_ci kfree(mm->context.hash_context->spt); 1388c2ecf20Sopenharmony_ci#endif 1398c2ecf20Sopenharmony_ci kfree(mm->context.hash_context); 1408c2ecf20Sopenharmony_ci return index; 1418c2ecf20Sopenharmony_ci } 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci pkey_mm_init(mm); 1448c2ecf20Sopenharmony_ci return index; 1458c2ecf20Sopenharmony_ci} 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_civoid hash__setup_new_exec(void) 1488c2ecf20Sopenharmony_ci{ 1498c2ecf20Sopenharmony_ci slice_setup_new_exec(); 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci slb_setup_new_exec(); 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_cistatic int radix__init_new_context(struct mm_struct *mm) 1558c2ecf20Sopenharmony_ci{ 1568c2ecf20Sopenharmony_ci unsigned long rts_field; 1578c2ecf20Sopenharmony_ci int index, max_id; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci max_id = (1 << mmu_pid_bits) - 1; 1608c2ecf20Sopenharmony_ci index = alloc_context_id(mmu_base_pid, max_id); 1618c2ecf20Sopenharmony_ci if (index < 0) 1628c2ecf20Sopenharmony_ci return index; 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci /* 1658c2ecf20Sopenharmony_ci * set the process table entry, 1668c2ecf20Sopenharmony_ci */ 1678c2ecf20Sopenharmony_ci rts_field = radix__get_tree_size(); 1688c2ecf20Sopenharmony_ci process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* 1718c2ecf20Sopenharmony_ci * Order the above store with subsequent update of the PID 1728c2ecf20Sopenharmony_ci * register (at which point HW can start loading/caching 1738c2ecf20Sopenharmony_ci * the entry) and the corresponding load by the MMU from 1748c2ecf20Sopenharmony_ci * the L2 cache. 1758c2ecf20Sopenharmony_ci */ 1768c2ecf20Sopenharmony_ci asm volatile("ptesync;isync" : : : "memory"); 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci mm->context.hash_context = NULL; 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci return index; 1818c2ecf20Sopenharmony_ci} 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ciint init_new_context(struct task_struct *tsk, struct mm_struct *mm) 1848c2ecf20Sopenharmony_ci{ 1858c2ecf20Sopenharmony_ci int index; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci if (radix_enabled()) 1888c2ecf20Sopenharmony_ci index = radix__init_new_context(mm); 1898c2ecf20Sopenharmony_ci else 1908c2ecf20Sopenharmony_ci index = hash__init_new_context(mm); 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci if (index < 0) 1938c2ecf20Sopenharmony_ci return index; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci mm->context.id = index; 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci mm->context.pte_frag = NULL; 1988c2ecf20Sopenharmony_ci mm->context.pmd_frag = NULL; 1998c2ecf20Sopenharmony_ci#ifdef CONFIG_SPAPR_TCE_IOMMU 2008c2ecf20Sopenharmony_ci mm_iommu_init(mm); 2018c2ecf20Sopenharmony_ci#endif 2028c2ecf20Sopenharmony_ci atomic_set(&mm->context.active_cpus, 0); 2038c2ecf20Sopenharmony_ci atomic_set(&mm->context.copros, 0); 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci return 0; 2068c2ecf20Sopenharmony_ci} 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_civoid __destroy_context(int context_id) 2098c2ecf20Sopenharmony_ci{ 2108c2ecf20Sopenharmony_ci ida_free(&mmu_context_ida, context_id); 2118c2ecf20Sopenharmony_ci} 2128c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__destroy_context); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_cistatic void destroy_contexts(mm_context_t *ctx) 2158c2ecf20Sopenharmony_ci{ 2168c2ecf20Sopenharmony_ci int index, context_id; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { 2198c2ecf20Sopenharmony_ci context_id = ctx->extended_id[index]; 2208c2ecf20Sopenharmony_ci if (context_id) 2218c2ecf20Sopenharmony_ci ida_free(&mmu_context_ida, context_id); 2228c2ecf20Sopenharmony_ci } 2238c2ecf20Sopenharmony_ci kfree(ctx->hash_context); 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cistatic void pmd_frag_destroy(void *pmd_frag) 2278c2ecf20Sopenharmony_ci{ 2288c2ecf20Sopenharmony_ci int count; 2298c2ecf20Sopenharmony_ci struct page *page; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci page = virt_to_page(pmd_frag); 2328c2ecf20Sopenharmony_ci /* drop all the pending references */ 2338c2ecf20Sopenharmony_ci count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT; 2348c2ecf20Sopenharmony_ci /* We allow PTE_FRAG_NR fragments from a PTE page */ 2358c2ecf20Sopenharmony_ci if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) { 2368c2ecf20Sopenharmony_ci pgtable_pmd_page_dtor(page); 2378c2ecf20Sopenharmony_ci __free_page(page); 2388c2ecf20Sopenharmony_ci } 2398c2ecf20Sopenharmony_ci} 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_cistatic void destroy_pagetable_cache(struct mm_struct *mm) 2428c2ecf20Sopenharmony_ci{ 2438c2ecf20Sopenharmony_ci void *frag; 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci frag = mm->context.pte_frag; 2468c2ecf20Sopenharmony_ci if (frag) 2478c2ecf20Sopenharmony_ci pte_frag_destroy(frag); 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci frag = mm->context.pmd_frag; 2508c2ecf20Sopenharmony_ci if (frag) 2518c2ecf20Sopenharmony_ci pmd_frag_destroy(frag); 2528c2ecf20Sopenharmony_ci return; 2538c2ecf20Sopenharmony_ci} 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_civoid destroy_context(struct mm_struct *mm) 2568c2ecf20Sopenharmony_ci{ 2578c2ecf20Sopenharmony_ci#ifdef CONFIG_SPAPR_TCE_IOMMU 2588c2ecf20Sopenharmony_ci WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); 2598c2ecf20Sopenharmony_ci#endif 2608c2ecf20Sopenharmony_ci /* 2618c2ecf20Sopenharmony_ci * For tasks which were successfully initialized we end up calling 2628c2ecf20Sopenharmony_ci * arch_exit_mmap() which clears the process table entry. And 2638c2ecf20Sopenharmony_ci * arch_exit_mmap() is called before the required fullmm TLB flush 2648c2ecf20Sopenharmony_ci * which does a RIC=2 flush. Hence for an initialized task, we do clear 2658c2ecf20Sopenharmony_ci * any cached process table entries. 2668c2ecf20Sopenharmony_ci * 2678c2ecf20Sopenharmony_ci * The condition below handles the error case during task init. We have 2688c2ecf20Sopenharmony_ci * set the process table entry early and if we fail a task 2698c2ecf20Sopenharmony_ci * initialization, we need to ensure the process table entry is zeroed. 2708c2ecf20Sopenharmony_ci * We need not worry about process table entry caches because the task 2718c2ecf20Sopenharmony_ci * never ran with the PID value. 2728c2ecf20Sopenharmony_ci */ 2738c2ecf20Sopenharmony_ci if (radix_enabled()) 2748c2ecf20Sopenharmony_ci process_tb[mm->context.id].prtb0 = 0; 2758c2ecf20Sopenharmony_ci else 2768c2ecf20Sopenharmony_ci subpage_prot_free(mm); 2778c2ecf20Sopenharmony_ci destroy_contexts(&mm->context); 2788c2ecf20Sopenharmony_ci mm->context.id = MMU_NO_CONTEXT; 2798c2ecf20Sopenharmony_ci} 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_civoid arch_exit_mmap(struct mm_struct *mm) 2828c2ecf20Sopenharmony_ci{ 2838c2ecf20Sopenharmony_ci destroy_pagetable_cache(mm); 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci if (radix_enabled()) { 2868c2ecf20Sopenharmony_ci /* 2878c2ecf20Sopenharmony_ci * Radix doesn't have a valid bit in the process table 2888c2ecf20Sopenharmony_ci * entries. However we know that at least P9 implementation 2898c2ecf20Sopenharmony_ci * will avoid caching an entry with an invalid RTS field, 2908c2ecf20Sopenharmony_ci * and 0 is invalid. So this will do. 2918c2ecf20Sopenharmony_ci * 2928c2ecf20Sopenharmony_ci * This runs before the "fullmm" tlb flush in exit_mmap, 2938c2ecf20Sopenharmony_ci * which does a RIC=2 tlbie to clear the process table 2948c2ecf20Sopenharmony_ci * entry. See the "fullmm" comments in tlb-radix.c. 2958c2ecf20Sopenharmony_ci * 2968c2ecf20Sopenharmony_ci * No barrier required here after the store because 2978c2ecf20Sopenharmony_ci * this process will do the invalidate, which starts with 2988c2ecf20Sopenharmony_ci * ptesync. 2998c2ecf20Sopenharmony_ci */ 3008c2ecf20Sopenharmony_ci process_tb[mm->context.id].prtb0 = 0; 3018c2ecf20Sopenharmony_ci } 3028c2ecf20Sopenharmony_ci} 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_RADIX_MMU 3058c2ecf20Sopenharmony_civoid radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) 3068c2ecf20Sopenharmony_ci{ 3078c2ecf20Sopenharmony_ci mtspr(SPRN_PID, next->context.id); 3088c2ecf20Sopenharmony_ci isync(); 3098c2ecf20Sopenharmony_ci} 3108c2ecf20Sopenharmony_ci#endif 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci/** 3138c2ecf20Sopenharmony_ci * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined) 3148c2ecf20Sopenharmony_ci * 3158c2ecf20Sopenharmony_ci * This clears the CPU from mm_cpumask for all processes, and then flushes the 3168c2ecf20Sopenharmony_ci * local TLB to ensure TLB coherency in case the CPU is onlined again. 3178c2ecf20Sopenharmony_ci * 3188c2ecf20Sopenharmony_ci * KVM guest translations are not necessarily flushed here. If KVM started 3198c2ecf20Sopenharmony_ci * using mm_cpumask or the Linux APIs which do, this would have to be resolved. 3208c2ecf20Sopenharmony_ci */ 3218c2ecf20Sopenharmony_ci#ifdef CONFIG_HOTPLUG_CPU 3228c2ecf20Sopenharmony_civoid cleanup_cpu_mmu_context(void) 3238c2ecf20Sopenharmony_ci{ 3248c2ecf20Sopenharmony_ci int cpu = smp_processor_id(); 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci clear_tasks_mm_cpumask(cpu); 3278c2ecf20Sopenharmony_ci tlbiel_all(); 3288c2ecf20Sopenharmony_ci} 3298c2ecf20Sopenharmony_ci#endif 330