1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *  MMU context allocation for 64-bit kernels.
4 *
5 *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
6 */
7
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/string.h>
12#include <linux/types.h>
13#include <linux/mm.h>
14#include <linux/pkeys.h>
15#include <linux/spinlock.h>
16#include <linux/idr.h>
17#include <linux/export.h>
18#include <linux/gfp.h>
19#include <linux/slab.h>
20#include <linux/cpu.h>
21
22#include <asm/mmu_context.h>
23#include <asm/pgalloc.h>
24
25#include "internal.h"
26
27static DEFINE_IDA(mmu_context_ida);
28
29static int alloc_context_id(int min_id, int max_id)
30{
31	return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
32}
33
34void hash__reserve_context_id(int id)
35{
36	int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
37
38	WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
39}
40
41int hash__alloc_context_id(void)
42{
43	unsigned long max;
44
45	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
46		max = MAX_USER_CONTEXT;
47	else
48		max = MAX_USER_CONTEXT_65BIT_VA;
49
50	return alloc_context_id(MIN_USER_CONTEXT, max);
51}
52EXPORT_SYMBOL_GPL(hash__alloc_context_id);
53
54static int realloc_context_ids(mm_context_t *ctx)
55{
56	int i, id;
57
58	/*
59	 * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
60	 * there wasn't one allocated previously (which happens in the exec
61	 * case where ctx is newly allocated).
62	 *
63	 * We have to be a bit careful here. We must keep the existing ids in
64	 * the array, so that we can test if they're non-zero to decide if we
65	 * need to allocate a new one. However in case of error we must free the
66	 * ids we've allocated but *not* any of the existing ones (or risk a
67	 * UAF). That's why we decrement i at the start of the error handling
68	 * loop, to skip the id that we just tested but couldn't reallocate.
69	 */
70	for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
71		if (i == 0 || ctx->extended_id[i]) {
72			id = hash__alloc_context_id();
73			if (id < 0)
74				goto error;
75
76			ctx->extended_id[i] = id;
77		}
78	}
79
80	/* The caller expects us to return id */
81	return ctx->id;
82
83error:
84	for (i--; i >= 0; i--) {
85		if (ctx->extended_id[i])
86			ida_free(&mmu_context_ida, ctx->extended_id[i]);
87	}
88
89	return id;
90}
91
92static int hash__init_new_context(struct mm_struct *mm)
93{
94	int index;
95
96	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
97					   GFP_KERNEL);
98	if (!mm->context.hash_context)
99		return -ENOMEM;
100
101	/*
102	 * The old code would re-promote on fork, we don't do that when using
103	 * slices as it could cause problem promoting slices that have been
104	 * forced down to 4K.
105	 *
106	 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
107	 * explicitly against context.id == 0. This ensures that we properly
108	 * initialize context slice details for newly allocated mm's (which will
109	 * have id == 0) and don't alter context slice inherited via fork (which
110	 * will have id != 0).
111	 *
112	 * We should not be calling init_new_context() on init_mm. Hence a
113	 * check against 0 is OK.
114	 */
115	if (mm->context.id == 0) {
116		memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
117		slice_init_new_context_exec(mm);
118	} else {
119		/* This is fork. Copy hash_context details from current->mm */
120		memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
121#ifdef CONFIG_PPC_SUBPAGE_PROT
122		/* inherit subpage prot detalis if we have one. */
123		if (current->mm->context.hash_context->spt) {
124			mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
125								GFP_KERNEL);
126			if (!mm->context.hash_context->spt) {
127				kfree(mm->context.hash_context);
128				return -ENOMEM;
129			}
130		}
131#endif
132	}
133
134	index = realloc_context_ids(&mm->context);
135	if (index < 0) {
136#ifdef CONFIG_PPC_SUBPAGE_PROT
137		kfree(mm->context.hash_context->spt);
138#endif
139		kfree(mm->context.hash_context);
140		return index;
141	}
142
143	pkey_mm_init(mm);
144	return index;
145}
146
147void hash__setup_new_exec(void)
148{
149	slice_setup_new_exec();
150
151	slb_setup_new_exec();
152}
153
154static int radix__init_new_context(struct mm_struct *mm)
155{
156	unsigned long rts_field;
157	int index, max_id;
158
159	max_id = (1 << mmu_pid_bits) - 1;
160	index = alloc_context_id(mmu_base_pid, max_id);
161	if (index < 0)
162		return index;
163
164	/*
165	 * set the process table entry,
166	 */
167	rts_field = radix__get_tree_size();
168	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
169
170	/*
171	 * Order the above store with subsequent update of the PID
172	 * register (at which point HW can start loading/caching
173	 * the entry) and the corresponding load by the MMU from
174	 * the L2 cache.
175	 */
176	asm volatile("ptesync;isync" : : : "memory");
177
178	mm->context.hash_context = NULL;
179
180	return index;
181}
182
183int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
184{
185	int index;
186
187	if (radix_enabled())
188		index = radix__init_new_context(mm);
189	else
190		index = hash__init_new_context(mm);
191
192	if (index < 0)
193		return index;
194
195	mm->context.id = index;
196
197	mm->context.pte_frag = NULL;
198	mm->context.pmd_frag = NULL;
199#ifdef CONFIG_SPAPR_TCE_IOMMU
200	mm_iommu_init(mm);
201#endif
202	atomic_set(&mm->context.active_cpus, 0);
203	atomic_set(&mm->context.copros, 0);
204
205	return 0;
206}
207
208void __destroy_context(int context_id)
209{
210	ida_free(&mmu_context_ida, context_id);
211}
212EXPORT_SYMBOL_GPL(__destroy_context);
213
214static void destroy_contexts(mm_context_t *ctx)
215{
216	int index, context_id;
217
218	for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
219		context_id = ctx->extended_id[index];
220		if (context_id)
221			ida_free(&mmu_context_ida, context_id);
222	}
223	kfree(ctx->hash_context);
224}
225
226static void pmd_frag_destroy(void *pmd_frag)
227{
228	int count;
229	struct page *page;
230
231	page = virt_to_page(pmd_frag);
232	/* drop all the pending references */
233	count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
234	/* We allow PTE_FRAG_NR fragments from a PTE page */
235	if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
236		pgtable_pmd_page_dtor(page);
237		__free_page(page);
238	}
239}
240
241static void destroy_pagetable_cache(struct mm_struct *mm)
242{
243	void *frag;
244
245	frag = mm->context.pte_frag;
246	if (frag)
247		pte_frag_destroy(frag);
248
249	frag = mm->context.pmd_frag;
250	if (frag)
251		pmd_frag_destroy(frag);
252	return;
253}
254
255void destroy_context(struct mm_struct *mm)
256{
257#ifdef CONFIG_SPAPR_TCE_IOMMU
258	WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
259#endif
260	/*
261	 * For tasks which were successfully initialized we end up calling
262	 * arch_exit_mmap() which clears the process table entry. And
263	 * arch_exit_mmap() is called before the required fullmm TLB flush
264	 * which does a RIC=2 flush. Hence for an initialized task, we do clear
265	 * any cached process table entries.
266	 *
267	 * The condition below handles the error case during task init. We have
268	 * set the process table entry early and if we fail a task
269	 * initialization, we need to ensure the process table entry is zeroed.
270	 * We need not worry about process table entry caches because the task
271	 * never ran with the PID value.
272	 */
273	if (radix_enabled())
274		process_tb[mm->context.id].prtb0 = 0;
275	else
276		subpage_prot_free(mm);
277	destroy_contexts(&mm->context);
278	mm->context.id = MMU_NO_CONTEXT;
279}
280
281void arch_exit_mmap(struct mm_struct *mm)
282{
283	destroy_pagetable_cache(mm);
284
285	if (radix_enabled()) {
286		/*
287		 * Radix doesn't have a valid bit in the process table
288		 * entries. However we know that at least P9 implementation
289		 * will avoid caching an entry with an invalid RTS field,
290		 * and 0 is invalid. So this will do.
291		 *
292		 * This runs before the "fullmm" tlb flush in exit_mmap,
293		 * which does a RIC=2 tlbie to clear the process table
294		 * entry. See the "fullmm" comments in tlb-radix.c.
295		 *
296		 * No barrier required here after the store because
297		 * this process will do the invalidate, which starts with
298		 * ptesync.
299		 */
300		process_tb[mm->context.id].prtb0 = 0;
301	}
302}
303
304#ifdef CONFIG_PPC_RADIX_MMU
305void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
306{
307	mtspr(SPRN_PID, next->context.id);
308	isync();
309}
310#endif
311
312/**
313 * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
314 *
315 * This clears the CPU from mm_cpumask for all processes, and then flushes the
316 * local TLB to ensure TLB coherency in case the CPU is onlined again.
317 *
318 * KVM guest translations are not necessarily flushed here. If KVM started
319 * using mm_cpumask or the Linux APIs which do, this would have to be resolved.
320 */
321#ifdef CONFIG_HOTPLUG_CPU
322void cleanup_cpu_mmu_context(void)
323{
324	int cpu = smp_processor_id();
325
326	clear_tasks_mm_cpumask(cpu);
327	tlbiel_all();
328}
329#endif
330