162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * MMU context allocation for 64-bit kernels. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/sched.h> 962306a36Sopenharmony_ci#include <linux/kernel.h> 1062306a36Sopenharmony_ci#include <linux/errno.h> 1162306a36Sopenharmony_ci#include <linux/string.h> 1262306a36Sopenharmony_ci#include <linux/types.h> 1362306a36Sopenharmony_ci#include <linux/mm.h> 1462306a36Sopenharmony_ci#include <linux/pkeys.h> 1562306a36Sopenharmony_ci#include <linux/spinlock.h> 1662306a36Sopenharmony_ci#include <linux/idr.h> 1762306a36Sopenharmony_ci#include <linux/export.h> 1862306a36Sopenharmony_ci#include <linux/gfp.h> 1962306a36Sopenharmony_ci#include <linux/slab.h> 2062306a36Sopenharmony_ci#include <linux/cpu.h> 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include <asm/mmu_context.h> 2362306a36Sopenharmony_ci#include <asm/pgalloc.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#include "internal.h" 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic DEFINE_IDA(mmu_context_ida); 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cistatic int alloc_context_id(int min_id, int max_id) 3062306a36Sopenharmony_ci{ 3162306a36Sopenharmony_ci return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL); 3262306a36Sopenharmony_ci} 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#ifdef CONFIG_PPC_64S_HASH_MMU 3562306a36Sopenharmony_civoid __init hash__reserve_context_id(int id) 3662306a36Sopenharmony_ci{ 3762306a36Sopenharmony_ci int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL); 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result); 4062306a36Sopenharmony_ci} 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ciint hash__alloc_context_id(void) 4362306a36Sopenharmony_ci{ 4462306a36Sopenharmony_ci unsigned long max; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci if (mmu_has_feature(MMU_FTR_68_BIT_VA)) 4762306a36Sopenharmony_ci max = MAX_USER_CONTEXT; 4862306a36Sopenharmony_ci else 4962306a36Sopenharmony_ci max = MAX_USER_CONTEXT_65BIT_VA; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci return alloc_context_id(MIN_USER_CONTEXT, max); 5262306a36Sopenharmony_ci} 5362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hash__alloc_context_id); 5462306a36Sopenharmony_ci#endif 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#ifdef CONFIG_PPC_64S_HASH_MMU 5762306a36Sopenharmony_cistatic int realloc_context_ids(mm_context_t *ctx) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci int i, id; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci /* 6262306a36Sopenharmony_ci * id 0 (aka. ctx->id) is special, we always allocate a new one, even if 6362306a36Sopenharmony_ci * there wasn't one allocated previously (which happens in the exec 6462306a36Sopenharmony_ci * case where ctx is newly allocated). 6562306a36Sopenharmony_ci * 6662306a36Sopenharmony_ci * We have to be a bit careful here. We must keep the existing ids in 6762306a36Sopenharmony_ci * the array, so that we can test if they're non-zero to decide if we 6862306a36Sopenharmony_ci * need to allocate a new one. However in case of error we must free the 6962306a36Sopenharmony_ci * ids we've allocated but *not* any of the existing ones (or risk a 7062306a36Sopenharmony_ci * UAF). That's why we decrement i at the start of the error handling 7162306a36Sopenharmony_ci * loop, to skip the id that we just tested but couldn't reallocate. 7262306a36Sopenharmony_ci */ 7362306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { 7462306a36Sopenharmony_ci if (i == 0 || ctx->extended_id[i]) { 7562306a36Sopenharmony_ci id = hash__alloc_context_id(); 7662306a36Sopenharmony_ci if (id < 0) 7762306a36Sopenharmony_ci goto error; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci ctx->extended_id[i] = id; 8062306a36Sopenharmony_ci } 8162306a36Sopenharmony_ci } 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci /* The caller expects us to return id */ 8462306a36Sopenharmony_ci return ctx->id; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_cierror: 8762306a36Sopenharmony_ci for (i--; i >= 0; i--) { 8862306a36Sopenharmony_ci if (ctx->extended_id[i]) 8962306a36Sopenharmony_ci ida_free(&mmu_context_ida, ctx->extended_id[i]); 9062306a36Sopenharmony_ci } 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci return id; 9362306a36Sopenharmony_ci} 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_cistatic int hash__init_new_context(struct mm_struct *mm) 9662306a36Sopenharmony_ci{ 9762306a36Sopenharmony_ci int index; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), 10062306a36Sopenharmony_ci GFP_KERNEL); 10162306a36Sopenharmony_ci if (!mm->context.hash_context) 10262306a36Sopenharmony_ci return -ENOMEM; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci /* 10562306a36Sopenharmony_ci * The old code would re-promote on fork, we don't do that when using 10662306a36Sopenharmony_ci * slices as it could cause problem promoting slices that have been 10762306a36Sopenharmony_ci * forced down to 4K. 10862306a36Sopenharmony_ci * 10962306a36Sopenharmony_ci * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check 11062306a36Sopenharmony_ci * explicitly against context.id == 0. This ensures that we properly 11162306a36Sopenharmony_ci * initialize context slice details for newly allocated mm's (which will 11262306a36Sopenharmony_ci * have id == 0) and don't alter context slice inherited via fork (which 11362306a36Sopenharmony_ci * will have id != 0). 11462306a36Sopenharmony_ci * 11562306a36Sopenharmony_ci * We should not be calling init_new_context() on init_mm. Hence a 11662306a36Sopenharmony_ci * check against 0 is OK. 11762306a36Sopenharmony_ci */ 11862306a36Sopenharmony_ci if (mm->context.id == 0) { 11962306a36Sopenharmony_ci memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context)); 12062306a36Sopenharmony_ci slice_init_new_context_exec(mm); 12162306a36Sopenharmony_ci } else { 12262306a36Sopenharmony_ci /* This is fork. Copy hash_context details from current->mm */ 12362306a36Sopenharmony_ci memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); 12462306a36Sopenharmony_ci#ifdef CONFIG_PPC_SUBPAGE_PROT 12562306a36Sopenharmony_ci /* inherit subpage prot details if we have one. */ 12662306a36Sopenharmony_ci if (current->mm->context.hash_context->spt) { 12762306a36Sopenharmony_ci mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), 12862306a36Sopenharmony_ci GFP_KERNEL); 12962306a36Sopenharmony_ci if (!mm->context.hash_context->spt) { 13062306a36Sopenharmony_ci kfree(mm->context.hash_context); 13162306a36Sopenharmony_ci return -ENOMEM; 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci } 13462306a36Sopenharmony_ci#endif 13562306a36Sopenharmony_ci } 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci index = realloc_context_ids(&mm->context); 13862306a36Sopenharmony_ci if (index < 0) { 13962306a36Sopenharmony_ci#ifdef CONFIG_PPC_SUBPAGE_PROT 14062306a36Sopenharmony_ci kfree(mm->context.hash_context->spt); 14162306a36Sopenharmony_ci#endif 14262306a36Sopenharmony_ci kfree(mm->context.hash_context); 14362306a36Sopenharmony_ci return index; 14462306a36Sopenharmony_ci } 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci pkey_mm_init(mm); 14762306a36Sopenharmony_ci return index; 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_civoid hash__setup_new_exec(void) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci slice_setup_new_exec(); 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci slb_setup_new_exec(); 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci#else 15762306a36Sopenharmony_cistatic inline int hash__init_new_context(struct mm_struct *mm) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci BUILD_BUG(); 16062306a36Sopenharmony_ci return 0; 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci#endif 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_cistatic int radix__init_new_context(struct mm_struct *mm) 16562306a36Sopenharmony_ci{ 16662306a36Sopenharmony_ci unsigned long rts_field; 16762306a36Sopenharmony_ci int index, max_id; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci max_id = (1 << mmu_pid_bits) - 1; 17062306a36Sopenharmony_ci index = alloc_context_id(mmu_base_pid, max_id); 17162306a36Sopenharmony_ci if (index < 0) 17262306a36Sopenharmony_ci return index; 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci /* 17562306a36Sopenharmony_ci * set the process table entry, 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_ci rts_field = radix__get_tree_size(); 17862306a36Sopenharmony_ci process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci /* 18162306a36Sopenharmony_ci * Order the above store with subsequent update of the PID 18262306a36Sopenharmony_ci * register (at which point HW can start loading/caching 18362306a36Sopenharmony_ci * the entry) and the corresponding load by the MMU from 18462306a36Sopenharmony_ci * the L2 cache. 18562306a36Sopenharmony_ci */ 18662306a36Sopenharmony_ci asm volatile("ptesync;isync" : : : "memory"); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci#ifdef CONFIG_PPC_64S_HASH_MMU 18962306a36Sopenharmony_ci mm->context.hash_context = NULL; 19062306a36Sopenharmony_ci#endif 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci return index; 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ciint init_new_context(struct task_struct *tsk, struct mm_struct *mm) 19662306a36Sopenharmony_ci{ 19762306a36Sopenharmony_ci int index; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci if (radix_enabled()) 20062306a36Sopenharmony_ci index = radix__init_new_context(mm); 20162306a36Sopenharmony_ci else 20262306a36Sopenharmony_ci index = hash__init_new_context(mm); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci if (index < 0) 20562306a36Sopenharmony_ci return index; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci mm->context.id = index; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci mm->context.pte_frag = NULL; 21062306a36Sopenharmony_ci mm->context.pmd_frag = NULL; 21162306a36Sopenharmony_ci#ifdef CONFIG_SPAPR_TCE_IOMMU 21262306a36Sopenharmony_ci mm_iommu_init(mm); 21362306a36Sopenharmony_ci#endif 21462306a36Sopenharmony_ci atomic_set(&mm->context.active_cpus, 0); 21562306a36Sopenharmony_ci atomic_set(&mm->context.copros, 0); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci return 0; 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_civoid __destroy_context(int context_id) 22162306a36Sopenharmony_ci{ 22262306a36Sopenharmony_ci ida_free(&mmu_context_ida, context_id); 22362306a36Sopenharmony_ci} 22462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__destroy_context); 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_cistatic void destroy_contexts(mm_context_t *ctx) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci if (radix_enabled()) { 22962306a36Sopenharmony_ci ida_free(&mmu_context_ida, ctx->id); 23062306a36Sopenharmony_ci } else { 23162306a36Sopenharmony_ci#ifdef CONFIG_PPC_64S_HASH_MMU 23262306a36Sopenharmony_ci int index, context_id; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { 23562306a36Sopenharmony_ci context_id = ctx->extended_id[index]; 23662306a36Sopenharmony_ci if (context_id) 23762306a36Sopenharmony_ci ida_free(&mmu_context_ida, context_id); 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci kfree(ctx->hash_context); 24062306a36Sopenharmony_ci#else 24162306a36Sopenharmony_ci BUILD_BUG(); // radix_enabled() should be constant true 24262306a36Sopenharmony_ci#endif 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_cistatic void pmd_frag_destroy(void *pmd_frag) 24762306a36Sopenharmony_ci{ 24862306a36Sopenharmony_ci int count; 24962306a36Sopenharmony_ci struct ptdesc *ptdesc; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci ptdesc = virt_to_ptdesc(pmd_frag); 25262306a36Sopenharmony_ci /* drop all the pending references */ 25362306a36Sopenharmony_ci count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT; 25462306a36Sopenharmony_ci /* We allow PTE_FRAG_NR fragments from a PTE page */ 25562306a36Sopenharmony_ci if (atomic_sub_and_test(PMD_FRAG_NR - count, &ptdesc->pt_frag_refcount)) { 25662306a36Sopenharmony_ci pagetable_pmd_dtor(ptdesc); 25762306a36Sopenharmony_ci pagetable_free(ptdesc); 25862306a36Sopenharmony_ci } 25962306a36Sopenharmony_ci} 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_cistatic void destroy_pagetable_cache(struct mm_struct *mm) 26262306a36Sopenharmony_ci{ 26362306a36Sopenharmony_ci void *frag; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci frag = mm->context.pte_frag; 26662306a36Sopenharmony_ci if (frag) 26762306a36Sopenharmony_ci pte_frag_destroy(frag); 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci frag = mm->context.pmd_frag; 27062306a36Sopenharmony_ci if (frag) 27162306a36Sopenharmony_ci pmd_frag_destroy(frag); 27262306a36Sopenharmony_ci return; 27362306a36Sopenharmony_ci} 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_civoid destroy_context(struct mm_struct *mm) 27662306a36Sopenharmony_ci{ 27762306a36Sopenharmony_ci#ifdef CONFIG_SPAPR_TCE_IOMMU 27862306a36Sopenharmony_ci WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); 27962306a36Sopenharmony_ci#endif 28062306a36Sopenharmony_ci /* 28162306a36Sopenharmony_ci * For tasks which were successfully initialized we end up calling 28262306a36Sopenharmony_ci * arch_exit_mmap() which clears the process table entry. And 28362306a36Sopenharmony_ci * arch_exit_mmap() is called before the required fullmm TLB flush 28462306a36Sopenharmony_ci * which does a RIC=2 flush. Hence for an initialized task, we do clear 28562306a36Sopenharmony_ci * any cached process table entries. 28662306a36Sopenharmony_ci * 28762306a36Sopenharmony_ci * The condition below handles the error case during task init. We have 28862306a36Sopenharmony_ci * set the process table entry early and if we fail a task 28962306a36Sopenharmony_ci * initialization, we need to ensure the process table entry is zeroed. 29062306a36Sopenharmony_ci * We need not worry about process table entry caches because the task 29162306a36Sopenharmony_ci * never ran with the PID value. 29262306a36Sopenharmony_ci */ 29362306a36Sopenharmony_ci if (radix_enabled()) 29462306a36Sopenharmony_ci process_tb[mm->context.id].prtb0 = 0; 29562306a36Sopenharmony_ci else 29662306a36Sopenharmony_ci subpage_prot_free(mm); 29762306a36Sopenharmony_ci destroy_contexts(&mm->context); 29862306a36Sopenharmony_ci mm->context.id = MMU_NO_CONTEXT; 29962306a36Sopenharmony_ci} 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_civoid arch_exit_mmap(struct mm_struct *mm) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci destroy_pagetable_cache(mm); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci if (radix_enabled()) { 30662306a36Sopenharmony_ci /* 30762306a36Sopenharmony_ci * Radix doesn't have a valid bit in the process table 30862306a36Sopenharmony_ci * entries. However we know that at least P9 implementation 30962306a36Sopenharmony_ci * will avoid caching an entry with an invalid RTS field, 31062306a36Sopenharmony_ci * and 0 is invalid. So this will do. 31162306a36Sopenharmony_ci * 31262306a36Sopenharmony_ci * This runs before the "fullmm" tlb flush in exit_mmap, 31362306a36Sopenharmony_ci * which does a RIC=2 tlbie to clear the process table 31462306a36Sopenharmony_ci * entry. See the "fullmm" comments in tlb-radix.c. 31562306a36Sopenharmony_ci * 31662306a36Sopenharmony_ci * No barrier required here after the store because 31762306a36Sopenharmony_ci * this process will do the invalidate, which starts with 31862306a36Sopenharmony_ci * ptesync. 31962306a36Sopenharmony_ci */ 32062306a36Sopenharmony_ci process_tb[mm->context.id].prtb0 = 0; 32162306a36Sopenharmony_ci } 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci#ifdef CONFIG_PPC_RADIX_MMU 32562306a36Sopenharmony_civoid radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) 32662306a36Sopenharmony_ci{ 32762306a36Sopenharmony_ci mtspr(SPRN_PID, next->context.id); 32862306a36Sopenharmony_ci isync(); 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci#endif 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci/** 33362306a36Sopenharmony_ci * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined) 33462306a36Sopenharmony_ci * 33562306a36Sopenharmony_ci * This clears the CPU from mm_cpumask for all processes, and then flushes the 33662306a36Sopenharmony_ci * local TLB to ensure TLB coherency in case the CPU is onlined again. 33762306a36Sopenharmony_ci * 33862306a36Sopenharmony_ci * KVM guest translations are not necessarily flushed here. If KVM started 33962306a36Sopenharmony_ci * using mm_cpumask or the Linux APIs which do, this would have to be resolved. 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_ci#ifdef CONFIG_HOTPLUG_CPU 34262306a36Sopenharmony_civoid cleanup_cpu_mmu_context(void) 34362306a36Sopenharmony_ci{ 34462306a36Sopenharmony_ci int cpu = smp_processor_id(); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci clear_tasks_mm_cpumask(cpu); 34762306a36Sopenharmony_ci tlbiel_all(); 34862306a36Sopenharmony_ci} 34962306a36Sopenharmony_ci#endif 350