18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * This file contains the routines for handling the MMU on those
48c2ecf20Sopenharmony_ci * PowerPC implementations where the MMU is not using the hash
58c2ecf20Sopenharmony_ci * table, such as 8xx, 4xx, BookE's etc...
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
88c2ecf20Sopenharmony_ci *                IBM Corp.
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *  Derived from previous arch/powerpc/mm/mmu_context.c
118c2ecf20Sopenharmony_ci *  and arch/powerpc/include/asm/mmu_context.h
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * TODO:
148c2ecf20Sopenharmony_ci *
158c2ecf20Sopenharmony_ci *   - The global context lock will not scale very well
168c2ecf20Sopenharmony_ci *   - The maps should be dynamically allocated to allow for processors
178c2ecf20Sopenharmony_ci *     that support more PID bits at runtime
188c2ecf20Sopenharmony_ci *   - Implement flush_tlb_mm() by making the context stale and picking
198c2ecf20Sopenharmony_ci *     a new one
208c2ecf20Sopenharmony_ci *   - More aggressively clear stale map bits and maybe find some way to
218c2ecf20Sopenharmony_ci *     also clear mm->cpu_vm_mask bits when processes are migrated
228c2ecf20Sopenharmony_ci */
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci//#define DEBUG_MAP_CONSISTENCY
258c2ecf20Sopenharmony_ci//#define DEBUG_CLAMP_LAST_CONTEXT   31
268c2ecf20Sopenharmony_ci//#define DEBUG_HARDER
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci/* We don't use DEBUG because it tends to be compiled in always nowadays
298c2ecf20Sopenharmony_ci * and this would generate way too much output
308c2ecf20Sopenharmony_ci */
318c2ecf20Sopenharmony_ci#ifdef DEBUG_HARDER
328c2ecf20Sopenharmony_ci#define pr_hard(args...)	printk(KERN_DEBUG args)
338c2ecf20Sopenharmony_ci#define pr_hardcont(args...)	printk(KERN_CONT args)
348c2ecf20Sopenharmony_ci#else
358c2ecf20Sopenharmony_ci#define pr_hard(args...)	do { } while(0)
368c2ecf20Sopenharmony_ci#define pr_hardcont(args...)	do { } while(0)
378c2ecf20Sopenharmony_ci#endif
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci#include <linux/kernel.h>
408c2ecf20Sopenharmony_ci#include <linux/mm.h>
418c2ecf20Sopenharmony_ci#include <linux/init.h>
428c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
438c2ecf20Sopenharmony_ci#include <linux/memblock.h>
448c2ecf20Sopenharmony_ci#include <linux/notifier.h>
458c2ecf20Sopenharmony_ci#include <linux/cpu.h>
468c2ecf20Sopenharmony_ci#include <linux/slab.h>
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci#include <asm/mmu_context.h>
498c2ecf20Sopenharmony_ci#include <asm/tlbflush.h>
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci#include <mm/mmu_decl.h>
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci/*
548c2ecf20Sopenharmony_ci * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
558c2ecf20Sopenharmony_ci * A better way would be to keep track of tasks that own contexts, and implement
568c2ecf20Sopenharmony_ci * an LRU usage. That way very active tasks don't always have to pay the TLB
578c2ecf20Sopenharmony_ci * reload overhead. The kernel pages are mapped shared, so the kernel can run on
588c2ecf20Sopenharmony_ci * behalf of any task that makes a kernel entry. Shared does not mean they are
598c2ecf20Sopenharmony_ci * not protected, just that the ASID comparison is not performed. -- Dan
608c2ecf20Sopenharmony_ci *
618c2ecf20Sopenharmony_ci * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
628c2ecf20Sopenharmony_ci * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
638c2ecf20Sopenharmony_ci * is disabled, so we can use a TID of zero to represent all kernel pages as
648c2ecf20Sopenharmony_ci * shared among all contexts. -- Dan
658c2ecf20Sopenharmony_ci *
668c2ecf20Sopenharmony_ci * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
678c2ecf20Sopenharmony_ci * normally never have to steal though the facility is present if needed.
688c2ecf20Sopenharmony_ci * -- BenH
698c2ecf20Sopenharmony_ci */
708c2ecf20Sopenharmony_ci#define FIRST_CONTEXT 1
718c2ecf20Sopenharmony_ci#ifdef DEBUG_CLAMP_LAST_CONTEXT
728c2ecf20Sopenharmony_ci#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT
738c2ecf20Sopenharmony_ci#elif defined(CONFIG_PPC_8xx)
748c2ecf20Sopenharmony_ci#define LAST_CONTEXT 16
758c2ecf20Sopenharmony_ci#elif defined(CONFIG_PPC_47x)
768c2ecf20Sopenharmony_ci#define LAST_CONTEXT 65535
778c2ecf20Sopenharmony_ci#else
788c2ecf20Sopenharmony_ci#define LAST_CONTEXT 255
798c2ecf20Sopenharmony_ci#endif
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_cistatic unsigned int next_context, nr_free_contexts;
828c2ecf20Sopenharmony_cistatic unsigned long *context_map;
838c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
848c2ecf20Sopenharmony_cistatic unsigned long *stale_map[NR_CPUS];
858c2ecf20Sopenharmony_ci#endif
868c2ecf20Sopenharmony_cistatic struct mm_struct **context_mm;
878c2ecf20Sopenharmony_cistatic DEFINE_RAW_SPINLOCK(context_lock);
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci#define CTX_MAP_SIZE	\
908c2ecf20Sopenharmony_ci	(sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci/* Steal a context from a task that has one at the moment.
948c2ecf20Sopenharmony_ci *
958c2ecf20Sopenharmony_ci * This is used when we are running out of available PID numbers
968c2ecf20Sopenharmony_ci * on the processors.
978c2ecf20Sopenharmony_ci *
988c2ecf20Sopenharmony_ci * This isn't an LRU system, it just frees up each context in
998c2ecf20Sopenharmony_ci * turn (sort-of pseudo-random replacement :).  This would be the
1008c2ecf20Sopenharmony_ci * place to implement an LRU scheme if anyone was motivated to do it.
1018c2ecf20Sopenharmony_ci *  -- paulus
1028c2ecf20Sopenharmony_ci *
1038c2ecf20Sopenharmony_ci * For context stealing, we use a slightly different approach for
1048c2ecf20Sopenharmony_ci * SMP and UP. Basically, the UP one is simpler and doesn't use
1058c2ecf20Sopenharmony_ci * the stale map as we can just flush the local CPU
1068c2ecf20Sopenharmony_ci *  -- benh
1078c2ecf20Sopenharmony_ci */
1088c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
1098c2ecf20Sopenharmony_cistatic unsigned int steal_context_smp(unsigned int id)
1108c2ecf20Sopenharmony_ci{
1118c2ecf20Sopenharmony_ci	struct mm_struct *mm;
1128c2ecf20Sopenharmony_ci	unsigned int cpu, max, i;
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	max = LAST_CONTEXT - FIRST_CONTEXT;
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	/* Attempt to free next_context first and then loop until we manage */
1178c2ecf20Sopenharmony_ci	while (max--) {
1188c2ecf20Sopenharmony_ci		/* Pick up the victim mm */
1198c2ecf20Sopenharmony_ci		mm = context_mm[id];
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci		/* We have a candidate victim, check if it's active, on SMP
1228c2ecf20Sopenharmony_ci		 * we cannot steal active contexts
1238c2ecf20Sopenharmony_ci		 */
1248c2ecf20Sopenharmony_ci		if (mm->context.active) {
1258c2ecf20Sopenharmony_ci			id++;
1268c2ecf20Sopenharmony_ci			if (id > LAST_CONTEXT)
1278c2ecf20Sopenharmony_ci				id = FIRST_CONTEXT;
1288c2ecf20Sopenharmony_ci			continue;
1298c2ecf20Sopenharmony_ci		}
1308c2ecf20Sopenharmony_ci		pr_hardcont(" | steal %d from 0x%p", id, mm);
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci		/* Mark this mm has having no context anymore */
1338c2ecf20Sopenharmony_ci		mm->context.id = MMU_NO_CONTEXT;
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci		/* Mark it stale on all CPUs that used this mm. For threaded
1368c2ecf20Sopenharmony_ci		 * implementations, we set it on all threads on each core
1378c2ecf20Sopenharmony_ci		 * represented in the mask. A future implementation will use
1388c2ecf20Sopenharmony_ci		 * a core map instead but this will do for now.
1398c2ecf20Sopenharmony_ci		 */
1408c2ecf20Sopenharmony_ci		for_each_cpu(cpu, mm_cpumask(mm)) {
1418c2ecf20Sopenharmony_ci			for (i = cpu_first_thread_sibling(cpu);
1428c2ecf20Sopenharmony_ci			     i <= cpu_last_thread_sibling(cpu); i++) {
1438c2ecf20Sopenharmony_ci				if (stale_map[i])
1448c2ecf20Sopenharmony_ci					__set_bit(id, stale_map[i]);
1458c2ecf20Sopenharmony_ci			}
1468c2ecf20Sopenharmony_ci			cpu = i - 1;
1478c2ecf20Sopenharmony_ci		}
1488c2ecf20Sopenharmony_ci		return id;
1498c2ecf20Sopenharmony_ci	}
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	/* This will happen if you have more CPUs than available contexts,
1528c2ecf20Sopenharmony_ci	 * all we can do here is wait a bit and try again
1538c2ecf20Sopenharmony_ci	 */
1548c2ecf20Sopenharmony_ci	raw_spin_unlock(&context_lock);
1558c2ecf20Sopenharmony_ci	cpu_relax();
1568c2ecf20Sopenharmony_ci	raw_spin_lock(&context_lock);
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	/* This will cause the caller to try again */
1598c2ecf20Sopenharmony_ci	return MMU_NO_CONTEXT;
1608c2ecf20Sopenharmony_ci}
1618c2ecf20Sopenharmony_ci#endif  /* CONFIG_SMP */
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_cistatic unsigned int steal_all_contexts(void)
1648c2ecf20Sopenharmony_ci{
1658c2ecf20Sopenharmony_ci	struct mm_struct *mm;
1668c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
1678c2ecf20Sopenharmony_ci	int cpu = smp_processor_id();
1688c2ecf20Sopenharmony_ci#endif
1698c2ecf20Sopenharmony_ci	unsigned int id;
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
1728c2ecf20Sopenharmony_ci		/* Pick up the victim mm */
1738c2ecf20Sopenharmony_ci		mm = context_mm[id];
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		pr_hardcont(" | steal %d from 0x%p", id, mm);
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci		/* Mark this mm as having no context anymore */
1788c2ecf20Sopenharmony_ci		mm->context.id = MMU_NO_CONTEXT;
1798c2ecf20Sopenharmony_ci		if (id != FIRST_CONTEXT) {
1808c2ecf20Sopenharmony_ci			context_mm[id] = NULL;
1818c2ecf20Sopenharmony_ci			__clear_bit(id, context_map);
1828c2ecf20Sopenharmony_ci#ifdef DEBUG_MAP_CONSISTENCY
1838c2ecf20Sopenharmony_ci			mm->context.active = 0;
1848c2ecf20Sopenharmony_ci#endif
1858c2ecf20Sopenharmony_ci		}
1868c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
1878c2ecf20Sopenharmony_ci		__clear_bit(id, stale_map[cpu]);
1888c2ecf20Sopenharmony_ci#endif
1898c2ecf20Sopenharmony_ci	}
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	/* Flush the TLB for all contexts (not to be used on SMP) */
1928c2ecf20Sopenharmony_ci	_tlbil_all();
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	return FIRST_CONTEXT;
1978c2ecf20Sopenharmony_ci}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci/* Note that this will also be called on SMP if all other CPUs are
2008c2ecf20Sopenharmony_ci * offlined, which means that it may be called for cpu != 0. For
2018c2ecf20Sopenharmony_ci * this to work, we somewhat assume that CPUs that are onlined
2028c2ecf20Sopenharmony_ci * come up with a fully clean TLB (or are cleaned when offlined)
2038c2ecf20Sopenharmony_ci */
2048c2ecf20Sopenharmony_cistatic unsigned int steal_context_up(unsigned int id)
2058c2ecf20Sopenharmony_ci{
2068c2ecf20Sopenharmony_ci	struct mm_struct *mm;
2078c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
2088c2ecf20Sopenharmony_ci	int cpu = smp_processor_id();
2098c2ecf20Sopenharmony_ci#endif
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci	/* Pick up the victim mm */
2128c2ecf20Sopenharmony_ci	mm = context_mm[id];
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	pr_hardcont(" | steal %d from 0x%p", id, mm);
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	/* Flush the TLB for that context */
2178c2ecf20Sopenharmony_ci	local_flush_tlb_mm(mm);
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	/* Mark this mm has having no context anymore */
2208c2ecf20Sopenharmony_ci	mm->context.id = MMU_NO_CONTEXT;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	/* XXX This clear should ultimately be part of local_flush_tlb_mm */
2238c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
2248c2ecf20Sopenharmony_ci	__clear_bit(id, stale_map[cpu]);
2258c2ecf20Sopenharmony_ci#endif
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci	return id;
2288c2ecf20Sopenharmony_ci}
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci#ifdef DEBUG_MAP_CONSISTENCY
2318c2ecf20Sopenharmony_cistatic void context_check_map(void)
2328c2ecf20Sopenharmony_ci{
2338c2ecf20Sopenharmony_ci	unsigned int id, nrf, nact;
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	nrf = nact = 0;
2368c2ecf20Sopenharmony_ci	for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
2378c2ecf20Sopenharmony_ci		int used = test_bit(id, context_map);
2388c2ecf20Sopenharmony_ci		if (!used)
2398c2ecf20Sopenharmony_ci			nrf++;
2408c2ecf20Sopenharmony_ci		if (used != (context_mm[id] != NULL))
2418c2ecf20Sopenharmony_ci			pr_err("MMU: Context %d is %s and MM is %p !\n",
2428c2ecf20Sopenharmony_ci			       id, used ? "used" : "free", context_mm[id]);
2438c2ecf20Sopenharmony_ci		if (context_mm[id] != NULL)
2448c2ecf20Sopenharmony_ci			nact += context_mm[id]->context.active;
2458c2ecf20Sopenharmony_ci	}
2468c2ecf20Sopenharmony_ci	if (nrf != nr_free_contexts) {
2478c2ecf20Sopenharmony_ci		pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
2488c2ecf20Sopenharmony_ci		       nr_free_contexts, nrf);
2498c2ecf20Sopenharmony_ci		nr_free_contexts = nrf;
2508c2ecf20Sopenharmony_ci	}
2518c2ecf20Sopenharmony_ci	if (nact > num_online_cpus())
2528c2ecf20Sopenharmony_ci		pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
2538c2ecf20Sopenharmony_ci		       nact, num_online_cpus());
2548c2ecf20Sopenharmony_ci	if (FIRST_CONTEXT > 0 && !test_bit(0, context_map))
2558c2ecf20Sopenharmony_ci		pr_err("MMU: Context 0 has been freed !!!\n");
2568c2ecf20Sopenharmony_ci}
2578c2ecf20Sopenharmony_ci#else
2588c2ecf20Sopenharmony_cistatic void context_check_map(void) { }
2598c2ecf20Sopenharmony_ci#endif
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_civoid switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
2628c2ecf20Sopenharmony_ci			struct task_struct *tsk)
2638c2ecf20Sopenharmony_ci{
2648c2ecf20Sopenharmony_ci	unsigned int id;
2658c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
2668c2ecf20Sopenharmony_ci	unsigned int i, cpu = smp_processor_id();
2678c2ecf20Sopenharmony_ci#endif
2688c2ecf20Sopenharmony_ci	unsigned long *map;
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	/* No lockless fast path .. yet */
2718c2ecf20Sopenharmony_ci	raw_spin_lock(&context_lock);
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
2748c2ecf20Sopenharmony_ci		cpu, next, next->context.active, next->context.id);
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
2778c2ecf20Sopenharmony_ci	/* Mark us active and the previous one not anymore */
2788c2ecf20Sopenharmony_ci	next->context.active++;
2798c2ecf20Sopenharmony_ci	if (prev) {
2808c2ecf20Sopenharmony_ci		pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
2818c2ecf20Sopenharmony_ci		WARN_ON(prev->context.active < 1);
2828c2ecf20Sopenharmony_ci		prev->context.active--;
2838c2ecf20Sopenharmony_ci	}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci again:
2868c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	/* If we already have a valid assigned context, skip all that */
2898c2ecf20Sopenharmony_ci	id = next->context.id;
2908c2ecf20Sopenharmony_ci	if (likely(id != MMU_NO_CONTEXT)) {
2918c2ecf20Sopenharmony_ci#ifdef DEBUG_MAP_CONSISTENCY
2928c2ecf20Sopenharmony_ci		if (context_mm[id] != next)
2938c2ecf20Sopenharmony_ci			pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
2948c2ecf20Sopenharmony_ci			       next, id, id, context_mm[id]);
2958c2ecf20Sopenharmony_ci#endif
2968c2ecf20Sopenharmony_ci		goto ctxt_ok;
2978c2ecf20Sopenharmony_ci	}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	/* We really don't have a context, let's try to acquire one */
3008c2ecf20Sopenharmony_ci	id = next_context;
3018c2ecf20Sopenharmony_ci	if (id > LAST_CONTEXT)
3028c2ecf20Sopenharmony_ci		id = FIRST_CONTEXT;
3038c2ecf20Sopenharmony_ci	map = context_map;
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	/* No more free contexts, let's try to steal one */
3068c2ecf20Sopenharmony_ci	if (nr_free_contexts == 0) {
3078c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
3088c2ecf20Sopenharmony_ci		if (num_online_cpus() > 1) {
3098c2ecf20Sopenharmony_ci			id = steal_context_smp(id);
3108c2ecf20Sopenharmony_ci			if (id == MMU_NO_CONTEXT)
3118c2ecf20Sopenharmony_ci				goto again;
3128c2ecf20Sopenharmony_ci			goto stolen;
3138c2ecf20Sopenharmony_ci		}
3148c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */
3158c2ecf20Sopenharmony_ci		if (IS_ENABLED(CONFIG_PPC_8xx))
3168c2ecf20Sopenharmony_ci			id = steal_all_contexts();
3178c2ecf20Sopenharmony_ci		else
3188c2ecf20Sopenharmony_ci			id = steal_context_up(id);
3198c2ecf20Sopenharmony_ci		goto stolen;
3208c2ecf20Sopenharmony_ci	}
3218c2ecf20Sopenharmony_ci	nr_free_contexts--;
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	/* We know there's at least one free context, try to find it */
3248c2ecf20Sopenharmony_ci	while (__test_and_set_bit(id, map)) {
3258c2ecf20Sopenharmony_ci		id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
3268c2ecf20Sopenharmony_ci		if (id > LAST_CONTEXT)
3278c2ecf20Sopenharmony_ci			id = FIRST_CONTEXT;
3288c2ecf20Sopenharmony_ci	}
3298c2ecf20Sopenharmony_ci stolen:
3308c2ecf20Sopenharmony_ci	next_context = id + 1;
3318c2ecf20Sopenharmony_ci	context_mm[id] = next;
3328c2ecf20Sopenharmony_ci	next->context.id = id;
3338c2ecf20Sopenharmony_ci	pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	context_check_map();
3368c2ecf20Sopenharmony_ci ctxt_ok:
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	/* If that context got marked stale on this CPU, then flush the
3398c2ecf20Sopenharmony_ci	 * local TLB for it and unmark it before we use it
3408c2ecf20Sopenharmony_ci	 */
3418c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
3428c2ecf20Sopenharmony_ci	if (test_bit(id, stale_map[cpu])) {
3438c2ecf20Sopenharmony_ci		pr_hardcont(" | stale flush %d [%d..%d]",
3448c2ecf20Sopenharmony_ci			    id, cpu_first_thread_sibling(cpu),
3458c2ecf20Sopenharmony_ci			    cpu_last_thread_sibling(cpu));
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci		local_flush_tlb_mm(next);
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci		/* XXX This clear should ultimately be part of local_flush_tlb_mm */
3508c2ecf20Sopenharmony_ci		for (i = cpu_first_thread_sibling(cpu);
3518c2ecf20Sopenharmony_ci		     i <= cpu_last_thread_sibling(cpu); i++) {
3528c2ecf20Sopenharmony_ci			if (stale_map[i])
3538c2ecf20Sopenharmony_ci				__clear_bit(id, stale_map[i]);
3548c2ecf20Sopenharmony_ci		}
3558c2ecf20Sopenharmony_ci	}
3568c2ecf20Sopenharmony_ci#endif
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci	/* Flick the MMU and release lock */
3598c2ecf20Sopenharmony_ci	pr_hardcont(" -> %d\n", id);
3608c2ecf20Sopenharmony_ci	set_context(id, next->pgd);
3618c2ecf20Sopenharmony_ci	raw_spin_unlock(&context_lock);
3628c2ecf20Sopenharmony_ci}
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci/*
3658c2ecf20Sopenharmony_ci * Set up the context for a new address space.
3668c2ecf20Sopenharmony_ci */
3678c2ecf20Sopenharmony_ciint init_new_context(struct task_struct *t, struct mm_struct *mm)
3688c2ecf20Sopenharmony_ci{
3698c2ecf20Sopenharmony_ci	pr_hard("initing context for mm @%p\n", mm);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	/*
3728c2ecf20Sopenharmony_ci	 * We have MMU_NO_CONTEXT set to be ~0. Hence check
3738c2ecf20Sopenharmony_ci	 * explicitly against context.id == 0. This ensures that we properly
3748c2ecf20Sopenharmony_ci	 * initialize context slice details for newly allocated mm's (which will
3758c2ecf20Sopenharmony_ci	 * have id == 0) and don't alter context slice inherited via fork (which
3768c2ecf20Sopenharmony_ci	 * will have id != 0).
3778c2ecf20Sopenharmony_ci	 */
3788c2ecf20Sopenharmony_ci	if (mm->context.id == 0)
3798c2ecf20Sopenharmony_ci		slice_init_new_context_exec(mm);
3808c2ecf20Sopenharmony_ci	mm->context.id = MMU_NO_CONTEXT;
3818c2ecf20Sopenharmony_ci	mm->context.active = 0;
3828c2ecf20Sopenharmony_ci	pte_frag_set(&mm->context, NULL);
3838c2ecf20Sopenharmony_ci	return 0;
3848c2ecf20Sopenharmony_ci}
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci/*
3878c2ecf20Sopenharmony_ci * We're finished using the context for an address space.
3888c2ecf20Sopenharmony_ci */
3898c2ecf20Sopenharmony_civoid destroy_context(struct mm_struct *mm)
3908c2ecf20Sopenharmony_ci{
3918c2ecf20Sopenharmony_ci	unsigned long flags;
3928c2ecf20Sopenharmony_ci	unsigned int id;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	if (mm->context.id == MMU_NO_CONTEXT)
3958c2ecf20Sopenharmony_ci		return;
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	WARN_ON(mm->context.active != 0);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	raw_spin_lock_irqsave(&context_lock, flags);
4008c2ecf20Sopenharmony_ci	id = mm->context.id;
4018c2ecf20Sopenharmony_ci	if (id != MMU_NO_CONTEXT) {
4028c2ecf20Sopenharmony_ci		__clear_bit(id, context_map);
4038c2ecf20Sopenharmony_ci		mm->context.id = MMU_NO_CONTEXT;
4048c2ecf20Sopenharmony_ci#ifdef DEBUG_MAP_CONSISTENCY
4058c2ecf20Sopenharmony_ci		mm->context.active = 0;
4068c2ecf20Sopenharmony_ci#endif
4078c2ecf20Sopenharmony_ci		context_mm[id] = NULL;
4088c2ecf20Sopenharmony_ci		nr_free_contexts++;
4098c2ecf20Sopenharmony_ci	}
4108c2ecf20Sopenharmony_ci	raw_spin_unlock_irqrestore(&context_lock, flags);
4118c2ecf20Sopenharmony_ci}
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
4148c2ecf20Sopenharmony_cistatic int mmu_ctx_cpu_prepare(unsigned int cpu)
4158c2ecf20Sopenharmony_ci{
4168c2ecf20Sopenharmony_ci	/* We don't touch CPU 0 map, it's allocated at aboot and kept
4178c2ecf20Sopenharmony_ci	 * around forever
4188c2ecf20Sopenharmony_ci	 */
4198c2ecf20Sopenharmony_ci	if (cpu == boot_cpuid)
4208c2ecf20Sopenharmony_ci		return 0;
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
4238c2ecf20Sopenharmony_ci	stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
4248c2ecf20Sopenharmony_ci	return 0;
4258c2ecf20Sopenharmony_ci}
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_cistatic int mmu_ctx_cpu_dead(unsigned int cpu)
4288c2ecf20Sopenharmony_ci{
4298c2ecf20Sopenharmony_ci#ifdef CONFIG_HOTPLUG_CPU
4308c2ecf20Sopenharmony_ci	if (cpu == boot_cpuid)
4318c2ecf20Sopenharmony_ci		return 0;
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
4348c2ecf20Sopenharmony_ci	kfree(stale_map[cpu]);
4358c2ecf20Sopenharmony_ci	stale_map[cpu] = NULL;
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	/* We also clear the cpu_vm_mask bits of CPUs going away */
4388c2ecf20Sopenharmony_ci	clear_tasks_mm_cpumask(cpu);
4398c2ecf20Sopenharmony_ci#endif
4408c2ecf20Sopenharmony_ci	return 0;
4418c2ecf20Sopenharmony_ci}
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci/*
4468c2ecf20Sopenharmony_ci * Initialize the context management stuff.
4478c2ecf20Sopenharmony_ci */
4488c2ecf20Sopenharmony_civoid __init mmu_context_init(void)
4498c2ecf20Sopenharmony_ci{
4508c2ecf20Sopenharmony_ci	/* Mark init_mm as being active on all possible CPUs since
4518c2ecf20Sopenharmony_ci	 * we'll get called with prev == init_mm the first time
4528c2ecf20Sopenharmony_ci	 * we schedule on a given CPU
4538c2ecf20Sopenharmony_ci	 */
4548c2ecf20Sopenharmony_ci	init_mm.context.active = NR_CPUS;
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	/*
4578c2ecf20Sopenharmony_ci	 * Allocate the maps used by context management
4588c2ecf20Sopenharmony_ci	 */
4598c2ecf20Sopenharmony_ci	context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
4608c2ecf20Sopenharmony_ci	if (!context_map)
4618c2ecf20Sopenharmony_ci		panic("%s: Failed to allocate %zu bytes\n", __func__,
4628c2ecf20Sopenharmony_ci		      CTX_MAP_SIZE);
4638c2ecf20Sopenharmony_ci	context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
4648c2ecf20Sopenharmony_ci				    SMP_CACHE_BYTES);
4658c2ecf20Sopenharmony_ci	if (!context_mm)
4668c2ecf20Sopenharmony_ci		panic("%s: Failed to allocate %zu bytes\n", __func__,
4678c2ecf20Sopenharmony_ci		      sizeof(void *) * (LAST_CONTEXT + 1));
4688c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
4698c2ecf20Sopenharmony_ci	stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
4708c2ecf20Sopenharmony_ci	if (!stale_map[boot_cpuid])
4718c2ecf20Sopenharmony_ci		panic("%s: Failed to allocate %zu bytes\n", __func__,
4728c2ecf20Sopenharmony_ci		      CTX_MAP_SIZE);
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
4758c2ecf20Sopenharmony_ci				  "powerpc/mmu/ctx:prepare",
4768c2ecf20Sopenharmony_ci				  mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
4778c2ecf20Sopenharmony_ci#endif
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	printk(KERN_INFO
4808c2ecf20Sopenharmony_ci	       "MMU: Allocated %zu bytes of context maps for %d contexts\n",
4818c2ecf20Sopenharmony_ci	       2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
4828c2ecf20Sopenharmony_ci	       LAST_CONTEXT - FIRST_CONTEXT + 1);
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	/*
4858c2ecf20Sopenharmony_ci	 * Some processors have too few contexts to reserve one for
4868c2ecf20Sopenharmony_ci	 * init_mm, and require using context 0 for a normal task.
4878c2ecf20Sopenharmony_ci	 * Other processors reserve the use of context zero for the kernel.
4888c2ecf20Sopenharmony_ci	 * This code assumes FIRST_CONTEXT < 32.
4898c2ecf20Sopenharmony_ci	 */
4908c2ecf20Sopenharmony_ci	context_map[0] = (1 << FIRST_CONTEXT) - 1;
4918c2ecf20Sopenharmony_ci	next_context = FIRST_CONTEXT;
4928c2ecf20Sopenharmony_ci	nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
4938c2ecf20Sopenharmony_ci}
494