1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * TLB flush routines for radix kernels.
4 *
5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6 */
7
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10#include <linux/memblock.h>
11#include <linux/mmu_context.h>
12#include <linux/sched/mm.h>
13
14#include <asm/ppc-opcode.h>
15#include <asm/tlb.h>
16#include <asm/tlbflush.h>
17#include <asm/trace.h>
18#include <asm/cputhreads.h>
19#include <asm/plpar_wrappers.h>
20
21#define RIC_FLUSH_TLB 0
22#define RIC_FLUSH_PWC 1
23#define RIC_FLUSH_ALL 2
24
25/*
26 * tlbiel instruction for radix, set invalidation
27 * i.e., r=1 and is=01 or is=10 or is=11
28 */
29static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
30					unsigned int pid,
31					unsigned int ric, unsigned int prs)
32{
33	unsigned long rb;
34	unsigned long rs;
35
36	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
37	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
38
39	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
40		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
41		     : "memory");
42}
43
44static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
45{
46	unsigned int set;
47
48	asm volatile("ptesync": : :"memory");
49
50	/*
51	 * Flush the first set of the TLB, and the entire Page Walk Cache
52	 * and partition table entries. Then flush the remaining sets of the
53	 * TLB.
54	 */
55
56	if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
57		/* MSR[HV] should flush partition scope translations first. */
58		tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
59		for (set = 1; set < num_sets; set++)
60			tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
61	}
62
63	/* Flush process scoped entries. */
64	tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
65	for (set = 1; set < num_sets; set++)
66		tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
67
68	ppc_after_tlbiel_barrier();
69}
70
71void radix__tlbiel_all(unsigned int action)
72{
73	unsigned int is;
74
75	switch (action) {
76	case TLB_INVAL_SCOPE_GLOBAL:
77		is = 3;
78		break;
79	case TLB_INVAL_SCOPE_LPID:
80		is = 2;
81		break;
82	default:
83		BUG();
84	}
85
86	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
87		tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
88	else
89		WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
90
91	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
92}
93
94static __always_inline void __tlbiel_pid(unsigned long pid, int set,
95				unsigned long ric)
96{
97	unsigned long rb,rs,prs,r;
98
99	rb = PPC_BIT(53); /* IS = 1 */
100	rb |= set << PPC_BITLSHIFT(51);
101	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
102	prs = 1; /* process scoped */
103	r = 1;   /* radix format */
104
105	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
106		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
107	trace_tlbie(0, 1, rb, rs, ric, prs, r);
108}
109
110static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
111{
112	unsigned long rb,rs,prs,r;
113
114	rb = PPC_BIT(53); /* IS = 1 */
115	rs = pid << PPC_BITLSHIFT(31);
116	prs = 1; /* process scoped */
117	r = 1;   /* radix format */
118
119	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
120		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
121	trace_tlbie(0, 0, rb, rs, ric, prs, r);
122}
123
124static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
125{
126	unsigned long rb,rs,prs,r;
127
128	rb = PPC_BIT(52); /* IS = 2 */
129	rs = lpid;
130	prs = 0; /* partition scoped */
131	r = 1;   /* radix format */
132
133	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
134		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
135	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
136}
137
138static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
139{
140	unsigned long rb,rs,prs,r;
141
142	rb = PPC_BIT(52); /* IS = 2 */
143	rs = lpid;
144	prs = 1; /* process scoped */
145	r = 1;   /* radix format */
146
147	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
148		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
149	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
150}
151
152static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
153					unsigned long ap, unsigned long ric)
154{
155	unsigned long rb,rs,prs,r;
156
157	rb = va & ~(PPC_BITMASK(52, 63));
158	rb |= ap << PPC_BITLSHIFT(58);
159	rs = pid << PPC_BITLSHIFT(31);
160	prs = 1; /* process scoped */
161	r = 1;   /* radix format */
162
163	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
164		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
165	trace_tlbie(0, 1, rb, rs, ric, prs, r);
166}
167
168static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
169				       unsigned long ap, unsigned long ric)
170{
171	unsigned long rb,rs,prs,r;
172
173	rb = va & ~(PPC_BITMASK(52, 63));
174	rb |= ap << PPC_BITLSHIFT(58);
175	rs = pid << PPC_BITLSHIFT(31);
176	prs = 1; /* process scoped */
177	r = 1;   /* radix format */
178
179	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
180		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
181	trace_tlbie(0, 0, rb, rs, ric, prs, r);
182}
183
184static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
185					    unsigned long ap, unsigned long ric)
186{
187	unsigned long rb,rs,prs,r;
188
189	rb = va & ~(PPC_BITMASK(52, 63));
190	rb |= ap << PPC_BITLSHIFT(58);
191	rs = lpid;
192	prs = 0; /* partition scoped */
193	r = 1;   /* radix format */
194
195	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
196		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
197	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
198}
199
200
201static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
202				  unsigned long ap)
203{
204	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
205		asm volatile("ptesync": : :"memory");
206		__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
207	}
208
209	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
210		asm volatile("ptesync": : :"memory");
211		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
212	}
213}
214
215static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
216					unsigned long ap)
217{
218	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
219		asm volatile("ptesync": : :"memory");
220		__tlbie_pid(0, RIC_FLUSH_TLB);
221	}
222
223	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
224		asm volatile("ptesync": : :"memory");
225		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
226	}
227}
228
229static inline void fixup_tlbie_pid(unsigned long pid)
230{
231	/*
232	 * We can use any address for the invalidation, pick one which is
233	 * probably unused as an optimisation.
234	 */
235	unsigned long va = ((1UL << 52) - 1);
236
237	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
238		asm volatile("ptesync": : :"memory");
239		__tlbie_pid(0, RIC_FLUSH_TLB);
240	}
241
242	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
243		asm volatile("ptesync": : :"memory");
244		__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
245	}
246}
247
248
249static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
250				       unsigned long ap)
251{
252	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
253		asm volatile("ptesync": : :"memory");
254		__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
255	}
256
257	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
258		asm volatile("ptesync": : :"memory");
259		__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
260	}
261}
262
263static inline void fixup_tlbie_lpid(unsigned long lpid)
264{
265	/*
266	 * We can use any address for the invalidation, pick one which is
267	 * probably unused as an optimisation.
268	 */
269	unsigned long va = ((1UL << 52) - 1);
270
271	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
272		asm volatile("ptesync": : :"memory");
273		__tlbie_lpid(0, RIC_FLUSH_TLB);
274	}
275
276	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
277		asm volatile("ptesync": : :"memory");
278		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
279	}
280}
281
282/*
283 * We use 128 set in radix mode and 256 set in hpt mode.
284 */
285static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
286{
287	int set;
288
289	asm volatile("ptesync": : :"memory");
290
291	switch (ric) {
292	case RIC_FLUSH_PWC:
293
294		/* For PWC, only one flush is needed */
295		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
296		ppc_after_tlbiel_barrier();
297		return;
298	case RIC_FLUSH_TLB:
299		__tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
300		break;
301	case RIC_FLUSH_ALL:
302	default:
303		/*
304		 * Flush the first set of the TLB, and if
305		 * we're doing a RIC_FLUSH_ALL, also flush
306		 * the entire Page Walk Cache.
307		 */
308		__tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
309	}
310
311	/* For the remaining sets, just flush the TLB */
312	for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
313		__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
314
315	ppc_after_tlbiel_barrier();
316	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
317}
318
319static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
320{
321	asm volatile("ptesync": : :"memory");
322
323	/*
324	 * Workaround the fact that the "ric" argument to __tlbie_pid
325	 * must be a compile-time contraint to match the "i" constraint
326	 * in the asm statement.
327	 */
328	switch (ric) {
329	case RIC_FLUSH_TLB:
330		__tlbie_pid(pid, RIC_FLUSH_TLB);
331		fixup_tlbie_pid(pid);
332		break;
333	case RIC_FLUSH_PWC:
334		__tlbie_pid(pid, RIC_FLUSH_PWC);
335		break;
336	case RIC_FLUSH_ALL:
337	default:
338		__tlbie_pid(pid, RIC_FLUSH_ALL);
339		fixup_tlbie_pid(pid);
340	}
341	asm volatile("eieio; tlbsync; ptesync": : :"memory");
342}
343
344struct tlbiel_pid {
345	unsigned long pid;
346	unsigned long ric;
347};
348
349static void do_tlbiel_pid(void *info)
350{
351	struct tlbiel_pid *t = info;
352
353	if (t->ric == RIC_FLUSH_TLB)
354		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
355	else if (t->ric == RIC_FLUSH_PWC)
356		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
357	else
358		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
359}
360
361static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
362				unsigned long pid, unsigned long ric)
363{
364	struct cpumask *cpus = mm_cpumask(mm);
365	struct tlbiel_pid t = { .pid = pid, .ric = ric };
366
367	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
368	/*
369	 * Always want the CPU translations to be invalidated with tlbiel in
370	 * these paths, so while coprocessors must use tlbie, we can not
371	 * optimise away the tlbiel component.
372	 */
373	if (atomic_read(&mm->context.copros) > 0)
374		_tlbie_pid(pid, RIC_FLUSH_ALL);
375}
376
377static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
378{
379	asm volatile("ptesync": : :"memory");
380
381	/*
382	 * Workaround the fact that the "ric" argument to __tlbie_pid
383	 * must be a compile-time contraint to match the "i" constraint
384	 * in the asm statement.
385	 */
386	switch (ric) {
387	case RIC_FLUSH_TLB:
388		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
389		fixup_tlbie_lpid(lpid);
390		break;
391	case RIC_FLUSH_PWC:
392		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
393		break;
394	case RIC_FLUSH_ALL:
395	default:
396		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
397		fixup_tlbie_lpid(lpid);
398	}
399	asm volatile("eieio; tlbsync; ptesync": : :"memory");
400}
401
402static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
403{
404	/*
405	 * Workaround the fact that the "ric" argument to __tlbie_pid
406	 * must be a compile-time contraint to match the "i" constraint
407	 * in the asm statement.
408	 */
409	switch (ric) {
410	case RIC_FLUSH_TLB:
411		__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
412		break;
413	case RIC_FLUSH_PWC:
414		__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
415		break;
416	case RIC_FLUSH_ALL:
417	default:
418		__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
419	}
420	fixup_tlbie_lpid(lpid);
421	asm volatile("eieio; tlbsync; ptesync": : :"memory");
422}
423
424static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
425				    unsigned long pid, unsigned long page_size,
426				    unsigned long psize)
427{
428	unsigned long addr;
429	unsigned long ap = mmu_get_ap(psize);
430
431	for (addr = start; addr < end; addr += page_size)
432		__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
433}
434
435static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
436				       unsigned long psize, unsigned long ric)
437{
438	unsigned long ap = mmu_get_ap(psize);
439
440	asm volatile("ptesync": : :"memory");
441	__tlbiel_va(va, pid, ap, ric);
442	ppc_after_tlbiel_barrier();
443}
444
445static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
446				    unsigned long pid, unsigned long page_size,
447				    unsigned long psize, bool also_pwc)
448{
449	asm volatile("ptesync": : :"memory");
450	if (also_pwc)
451		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
452	__tlbiel_va_range(start, end, pid, page_size, psize);
453	ppc_after_tlbiel_barrier();
454}
455
456static inline void __tlbie_va_range(unsigned long start, unsigned long end,
457				    unsigned long pid, unsigned long page_size,
458				    unsigned long psize)
459{
460	unsigned long addr;
461	unsigned long ap = mmu_get_ap(psize);
462
463	for (addr = start; addr < end; addr += page_size)
464		__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
465
466	fixup_tlbie_va_range(addr - page_size, pid, ap);
467}
468
469static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
470				      unsigned long psize, unsigned long ric)
471{
472	unsigned long ap = mmu_get_ap(psize);
473
474	asm volatile("ptesync": : :"memory");
475	__tlbie_va(va, pid, ap, ric);
476	fixup_tlbie_va(va, pid, ap);
477	asm volatile("eieio; tlbsync; ptesync": : :"memory");
478}
479
480struct tlbiel_va {
481	unsigned long pid;
482	unsigned long va;
483	unsigned long psize;
484	unsigned long ric;
485};
486
487static void do_tlbiel_va(void *info)
488{
489	struct tlbiel_va *t = info;
490
491	if (t->ric == RIC_FLUSH_TLB)
492		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
493	else if (t->ric == RIC_FLUSH_PWC)
494		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
495	else
496		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
497}
498
499static inline void _tlbiel_va_multicast(struct mm_struct *mm,
500				unsigned long va, unsigned long pid,
501				unsigned long psize, unsigned long ric)
502{
503	struct cpumask *cpus = mm_cpumask(mm);
504	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
505	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
506	if (atomic_read(&mm->context.copros) > 0)
507		_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
508}
509
510struct tlbiel_va_range {
511	unsigned long pid;
512	unsigned long start;
513	unsigned long end;
514	unsigned long page_size;
515	unsigned long psize;
516	bool also_pwc;
517};
518
519static void do_tlbiel_va_range(void *info)
520{
521	struct tlbiel_va_range *t = info;
522
523	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
524				    t->psize, t->also_pwc);
525}
526
527static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
528			      unsigned long psize, unsigned long ric)
529{
530	unsigned long ap = mmu_get_ap(psize);
531
532	asm volatile("ptesync": : :"memory");
533	__tlbie_lpid_va(va, lpid, ap, ric);
534	fixup_tlbie_lpid_va(va, lpid, ap);
535	asm volatile("eieio; tlbsync; ptesync": : :"memory");
536}
537
538static inline void _tlbie_va_range(unsigned long start, unsigned long end,
539				    unsigned long pid, unsigned long page_size,
540				    unsigned long psize, bool also_pwc)
541{
542	asm volatile("ptesync": : :"memory");
543	if (also_pwc)
544		__tlbie_pid(pid, RIC_FLUSH_PWC);
545	__tlbie_va_range(start, end, pid, page_size, psize);
546	asm volatile("eieio; tlbsync; ptesync": : :"memory");
547}
548
549static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
550				unsigned long start, unsigned long end,
551				unsigned long pid, unsigned long page_size,
552				unsigned long psize, bool also_pwc)
553{
554	struct cpumask *cpus = mm_cpumask(mm);
555	struct tlbiel_va_range t = { .start = start, .end = end,
556				.pid = pid, .page_size = page_size,
557				.psize = psize, .also_pwc = also_pwc };
558
559	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
560	if (atomic_read(&mm->context.copros) > 0)
561		_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
562}
563
564/*
565 * Base TLB flushing operations:
566 *
567 *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
568 *  - flush_tlb_page(vma, vmaddr) flushes one page
569 *  - flush_tlb_range(vma, start, end) flushes a range of pages
570 *  - flush_tlb_kernel_range(start, end) flushes kernel pages
571 *
572 *  - local_* variants of page and mm only apply to the current
573 *    processor
574 */
575void radix__local_flush_tlb_mm(struct mm_struct *mm)
576{
577	unsigned long pid;
578
579	preempt_disable();
580	pid = mm->context.id;
581	if (pid != MMU_NO_CONTEXT)
582		_tlbiel_pid(pid, RIC_FLUSH_TLB);
583	preempt_enable();
584}
585EXPORT_SYMBOL(radix__local_flush_tlb_mm);
586
587#ifndef CONFIG_SMP
588void radix__local_flush_all_mm(struct mm_struct *mm)
589{
590	unsigned long pid;
591
592	preempt_disable();
593	pid = mm->context.id;
594	if (pid != MMU_NO_CONTEXT)
595		_tlbiel_pid(pid, RIC_FLUSH_ALL);
596	preempt_enable();
597}
598EXPORT_SYMBOL(radix__local_flush_all_mm);
599
600static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
601{
602	radix__local_flush_all_mm(mm);
603}
604#endif /* CONFIG_SMP */
605
606void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
607				       int psize)
608{
609	unsigned long pid;
610
611	preempt_disable();
612	pid = mm->context.id;
613	if (pid != MMU_NO_CONTEXT)
614		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
615	preempt_enable();
616}
617
618void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
619{
620#ifdef CONFIG_HUGETLB_PAGE
621	/* need the return fix for nohash.c */
622	if (is_vm_hugetlb_page(vma))
623		return radix__local_flush_hugetlb_page(vma, vmaddr);
624#endif
625	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
626}
627EXPORT_SYMBOL(radix__local_flush_tlb_page);
628
629static bool mm_is_singlethreaded(struct mm_struct *mm)
630{
631	if (atomic_read(&mm->context.copros) > 0)
632		return false;
633	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
634		return true;
635	return false;
636}
637
638static bool mm_needs_flush_escalation(struct mm_struct *mm)
639{
640	/*
641	 * P9 nest MMU has issues with the page walk cache
642	 * caching PTEs and not flushing them properly when
643	 * RIC = 0 for a PID/LPID invalidate
644	 */
645	if (atomic_read(&mm->context.copros) > 0)
646		return true;
647	return false;
648}
649
650#ifdef CONFIG_SMP
651static void do_exit_flush_lazy_tlb(void *arg)
652{
653	struct mm_struct *mm = arg;
654	unsigned long pid = mm->context.id;
655
656	/*
657	 * A kthread could have done a mmget_not_zero() after the flushing CPU
658	 * checked mm_is_singlethreaded, and be in the process of
659	 * kthread_use_mm when interrupted here. In that case, current->mm will
660	 * be set to mm, because kthread_use_mm() setting ->mm and switching to
661	 * the mm is done with interrupts off.
662	 */
663	if (current->mm == mm)
664		goto out_flush;
665
666	if (current->active_mm == mm) {
667		WARN_ON_ONCE(current->mm != NULL);
668		/* Is a kernel thread and is using mm as the lazy tlb */
669		mmgrab(&init_mm);
670		current->active_mm = &init_mm;
671		switch_mm_irqs_off(mm, &init_mm, current);
672		mmdrop(mm);
673	}
674
675	atomic_dec(&mm->context.active_cpus);
676	cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm));
677
678out_flush:
679	_tlbiel_pid(pid, RIC_FLUSH_ALL);
680}
681
682static void exit_flush_lazy_tlbs(struct mm_struct *mm)
683{
684	/*
685	 * Would be nice if this was async so it could be run in
686	 * parallel with our local flush, but generic code does not
687	 * give a good API for it. Could extend the generic code or
688	 * make a special powerpc IPI for flushing TLBs.
689	 * For now it's not too performance critical.
690	 */
691	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
692				(void *)mm, 1);
693}
694
695void radix__flush_tlb_mm(struct mm_struct *mm)
696{
697	unsigned long pid;
698
699	pid = mm->context.id;
700	if (unlikely(pid == MMU_NO_CONTEXT))
701		return;
702
703	preempt_disable();
704	/*
705	 * Order loads of mm_cpumask vs previous stores to clear ptes before
706	 * the invalidate. See barrier in switch_mm_irqs_off
707	 */
708	smp_mb();
709	if (!mm_is_thread_local(mm)) {
710		if (unlikely(mm_is_singlethreaded(mm))) {
711			exit_flush_lazy_tlbs(mm);
712			goto local;
713		}
714
715		if (!mmu_has_feature(MMU_FTR_GTSE)) {
716			unsigned long tgt = H_RPTI_TARGET_CMMU;
717
718			if (atomic_read(&mm->context.copros) > 0)
719				tgt |= H_RPTI_TARGET_NMMU;
720			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
721					       H_RPTI_PAGE_ALL, 0, -1UL);
722		} else if (cputlb_use_tlbie()) {
723			if (mm_needs_flush_escalation(mm))
724				_tlbie_pid(pid, RIC_FLUSH_ALL);
725			else
726				_tlbie_pid(pid, RIC_FLUSH_TLB);
727		} else {
728			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
729		}
730	} else {
731local:
732		_tlbiel_pid(pid, RIC_FLUSH_TLB);
733	}
734	preempt_enable();
735}
736EXPORT_SYMBOL(radix__flush_tlb_mm);
737
738static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
739{
740	unsigned long pid;
741
742	pid = mm->context.id;
743	if (unlikely(pid == MMU_NO_CONTEXT))
744		return;
745
746	preempt_disable();
747	smp_mb(); /* see radix__flush_tlb_mm */
748	if (!mm_is_thread_local(mm)) {
749		if (unlikely(mm_is_singlethreaded(mm))) {
750			if (!fullmm) {
751				exit_flush_lazy_tlbs(mm);
752				goto local;
753			}
754		}
755		if (!mmu_has_feature(MMU_FTR_GTSE)) {
756			unsigned long tgt = H_RPTI_TARGET_CMMU;
757			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
758					     H_RPTI_TYPE_PRT;
759
760			if (atomic_read(&mm->context.copros) > 0)
761				tgt |= H_RPTI_TARGET_NMMU;
762			pseries_rpt_invalidate(pid, tgt, type,
763					       H_RPTI_PAGE_ALL, 0, -1UL);
764		} else if (cputlb_use_tlbie())
765			_tlbie_pid(pid, RIC_FLUSH_ALL);
766		else
767			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
768	} else {
769local:
770		_tlbiel_pid(pid, RIC_FLUSH_ALL);
771	}
772	preempt_enable();
773}
774
775void radix__flush_all_mm(struct mm_struct *mm)
776{
777	__flush_all_mm(mm, false);
778}
779EXPORT_SYMBOL(radix__flush_all_mm);
780
781void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
782				 int psize)
783{
784	unsigned long pid;
785
786	pid = mm->context.id;
787	if (unlikely(pid == MMU_NO_CONTEXT))
788		return;
789
790	preempt_disable();
791	smp_mb(); /* see radix__flush_tlb_mm */
792	if (!mm_is_thread_local(mm)) {
793		if (unlikely(mm_is_singlethreaded(mm))) {
794			exit_flush_lazy_tlbs(mm);
795			goto local;
796		}
797		if (!mmu_has_feature(MMU_FTR_GTSE)) {
798			unsigned long tgt, pg_sizes, size;
799
800			tgt = H_RPTI_TARGET_CMMU;
801			pg_sizes = psize_to_rpti_pgsize(psize);
802			size = 1UL << mmu_psize_to_shift(psize);
803
804			if (atomic_read(&mm->context.copros) > 0)
805				tgt |= H_RPTI_TARGET_NMMU;
806			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
807					       pg_sizes, vmaddr,
808					       vmaddr + size);
809		} else if (cputlb_use_tlbie())
810			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
811		else
812			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
813	} else {
814local:
815		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
816	}
817	preempt_enable();
818}
819
820void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
821{
822#ifdef CONFIG_HUGETLB_PAGE
823	if (is_vm_hugetlb_page(vma))
824		return radix__flush_hugetlb_page(vma, vmaddr);
825#endif
826	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
827}
828EXPORT_SYMBOL(radix__flush_tlb_page);
829
830#else /* CONFIG_SMP */
831static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
832#endif /* CONFIG_SMP */
833
834static void do_tlbiel_kernel(void *info)
835{
836	_tlbiel_pid(0, RIC_FLUSH_ALL);
837}
838
839static inline void _tlbiel_kernel_broadcast(void)
840{
841	on_each_cpu(do_tlbiel_kernel, NULL, 1);
842	if (tlbie_capable) {
843		/*
844		 * Coherent accelerators don't refcount kernel memory mappings,
845		 * so have to always issue a tlbie for them. This is quite a
846		 * slow path anyway.
847		 */
848		_tlbie_pid(0, RIC_FLUSH_ALL);
849	}
850}
851
852/*
853 * If kernel TLBIs ever become local rather than global, then
854 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
855 * assumes kernel TLBIs are global.
856 */
857void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
858{
859	if (!mmu_has_feature(MMU_FTR_GTSE)) {
860		unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
861		unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
862				     H_RPTI_TYPE_PRT;
863
864		pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
865				       start, end);
866	} else if (cputlb_use_tlbie())
867		_tlbie_pid(0, RIC_FLUSH_ALL);
868	else
869		_tlbiel_kernel_broadcast();
870}
871EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
872
873#define TLB_FLUSH_ALL -1UL
874
875/*
876 * Number of pages above which we invalidate the entire PID rather than
877 * flush individual pages, for local and global flushes respectively.
878 *
879 * tlbie goes out to the interconnect and individual ops are more costly.
880 * It also does not iterate over sets like the local tlbiel variant when
881 * invalidating a full PID, so it has a far lower threshold to change from
882 * individual page flushes to full-pid flushes.
883 */
884static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
885static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
886
887static inline void __radix__flush_tlb_range(struct mm_struct *mm,
888					    unsigned long start, unsigned long end)
889
890{
891	unsigned long pid;
892	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
893	unsigned long page_size = 1UL << page_shift;
894	unsigned long nr_pages = (end - start) >> page_shift;
895	bool local, full;
896
897	pid = mm->context.id;
898	if (unlikely(pid == MMU_NO_CONTEXT))
899		return;
900
901	preempt_disable();
902	smp_mb(); /* see radix__flush_tlb_mm */
903	if (!mm_is_thread_local(mm)) {
904		if (unlikely(mm_is_singlethreaded(mm))) {
905			if (end != TLB_FLUSH_ALL) {
906				exit_flush_lazy_tlbs(mm);
907				goto is_local;
908			}
909		}
910		local = false;
911		full = (end == TLB_FLUSH_ALL ||
912				nr_pages > tlb_single_page_flush_ceiling);
913	} else {
914is_local:
915		local = true;
916		full = (end == TLB_FLUSH_ALL ||
917				nr_pages > tlb_local_single_page_flush_ceiling);
918	}
919
920	if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
921		unsigned long tgt = H_RPTI_TARGET_CMMU;
922		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
923
924		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
925			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
926		if (atomic_read(&mm->context.copros) > 0)
927			tgt |= H_RPTI_TARGET_NMMU;
928		pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
929				       start, end);
930	} else if (full) {
931		if (local) {
932			_tlbiel_pid(pid, RIC_FLUSH_TLB);
933		} else {
934			if (cputlb_use_tlbie()) {
935				if (mm_needs_flush_escalation(mm))
936					_tlbie_pid(pid, RIC_FLUSH_ALL);
937				else
938					_tlbie_pid(pid, RIC_FLUSH_TLB);
939			} else {
940				_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
941			}
942		}
943	} else {
944		bool hflush;
945		unsigned long hstart, hend;
946
947		hstart = (start + PMD_SIZE - 1) & PMD_MASK;
948		hend = end & PMD_MASK;
949		hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend;
950
951		if (local) {
952			asm volatile("ptesync": : :"memory");
953			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
954			if (hflush)
955				__tlbiel_va_range(hstart, hend, pid,
956						PMD_SIZE, MMU_PAGE_2M);
957			ppc_after_tlbiel_barrier();
958		} else if (cputlb_use_tlbie()) {
959			asm volatile("ptesync": : :"memory");
960			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
961			if (hflush)
962				__tlbie_va_range(hstart, hend, pid,
963						PMD_SIZE, MMU_PAGE_2M);
964			asm volatile("eieio; tlbsync; ptesync": : :"memory");
965		} else {
966			_tlbiel_va_range_multicast(mm,
967					start, end, pid, page_size, mmu_virtual_psize, false);
968			if (hflush)
969				_tlbiel_va_range_multicast(mm,
970					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
971		}
972	}
973	preempt_enable();
974}
975
976void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
977		     unsigned long end)
978
979{
980#ifdef CONFIG_HUGETLB_PAGE
981	if (is_vm_hugetlb_page(vma))
982		return radix__flush_hugetlb_tlb_range(vma, start, end);
983#endif
984
985	__radix__flush_tlb_range(vma->vm_mm, start, end);
986}
987EXPORT_SYMBOL(radix__flush_tlb_range);
988
989static int radix_get_mmu_psize(int page_size)
990{
991	int psize;
992
993	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
994		psize = mmu_virtual_psize;
995	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
996		psize = MMU_PAGE_2M;
997	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
998		psize = MMU_PAGE_1G;
999	else
1000		return -1;
1001	return psize;
1002}
1003
1004/*
1005 * Flush partition scoped LPID address translation for all CPUs.
1006 */
1007void radix__flush_tlb_lpid_page(unsigned int lpid,
1008					unsigned long addr,
1009					unsigned long page_size)
1010{
1011	int psize = radix_get_mmu_psize(page_size);
1012
1013	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1014}
1015EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1016
1017/*
1018 * Flush partition scoped PWC from LPID for all CPUs.
1019 */
1020void radix__flush_pwc_lpid(unsigned int lpid)
1021{
1022	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
1023}
1024EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1025
1026/*
1027 * Flush partition scoped translations from LPID (=LPIDR)
1028 */
1029void radix__flush_all_lpid(unsigned int lpid)
1030{
1031	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
1032}
1033EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1034
1035/*
1036 * Flush process scoped translations from LPID (=LPIDR)
1037 */
1038void radix__flush_all_lpid_guest(unsigned int lpid)
1039{
1040	_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1041}
1042
1043static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1044				  unsigned long end, int psize);
1045
1046void radix__tlb_flush(struct mmu_gather *tlb)
1047{
1048	int psize = 0;
1049	struct mm_struct *mm = tlb->mm;
1050	int page_size = tlb->page_size;
1051	unsigned long start = tlb->start;
1052	unsigned long end = tlb->end;
1053
1054	/*
1055	 * if page size is not something we understand, do a full mm flush
1056	 *
1057	 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1058	 * that flushes the process table entry cache upon process teardown.
1059	 * See the comment for radix in arch_exit_mmap().
1060	 */
1061	if (tlb->fullmm || tlb->need_flush_all) {
1062		__flush_all_mm(mm, true);
1063	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1064		if (!tlb->freed_tables)
1065			radix__flush_tlb_mm(mm);
1066		else
1067			radix__flush_all_mm(mm);
1068	} else {
1069		if (!tlb->freed_tables)
1070			radix__flush_tlb_range_psize(mm, start, end, psize);
1071		else
1072			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1073	}
1074}
1075
1076static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1077				unsigned long start, unsigned long end,
1078				int psize, bool also_pwc)
1079{
1080	unsigned long pid;
1081	unsigned int page_shift = mmu_psize_defs[psize].shift;
1082	unsigned long page_size = 1UL << page_shift;
1083	unsigned long nr_pages = (end - start) >> page_shift;
1084	bool local, full;
1085
1086	pid = mm->context.id;
1087	if (unlikely(pid == MMU_NO_CONTEXT))
1088		return;
1089
1090	preempt_disable();
1091	smp_mb(); /* see radix__flush_tlb_mm */
1092	if (!mm_is_thread_local(mm)) {
1093		if (unlikely(mm_is_singlethreaded(mm))) {
1094			if (end != TLB_FLUSH_ALL) {
1095				exit_flush_lazy_tlbs(mm);
1096				goto is_local;
1097			}
1098		}
1099		local = false;
1100		full = (end == TLB_FLUSH_ALL ||
1101				nr_pages > tlb_single_page_flush_ceiling);
1102	} else {
1103is_local:
1104		local = true;
1105		full = (end == TLB_FLUSH_ALL ||
1106				nr_pages > tlb_local_single_page_flush_ceiling);
1107	}
1108
1109	if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
1110		unsigned long tgt = H_RPTI_TARGET_CMMU;
1111		unsigned long type = H_RPTI_TYPE_TLB;
1112		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1113
1114		if (also_pwc)
1115			type |= H_RPTI_TYPE_PWC;
1116		if (atomic_read(&mm->context.copros) > 0)
1117			tgt |= H_RPTI_TARGET_NMMU;
1118		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1119	} else if (full) {
1120		if (local) {
1121			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1122		} else {
1123			if (cputlb_use_tlbie()) {
1124				if (mm_needs_flush_escalation(mm))
1125					also_pwc = true;
1126
1127				_tlbie_pid(pid,
1128					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1129			} else {
1130				_tlbiel_pid_multicast(mm, pid,
1131					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1132			}
1133
1134		}
1135	} else {
1136		if (local)
1137			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1138		else if (cputlb_use_tlbie())
1139			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1140		else
1141			_tlbiel_va_range_multicast(mm,
1142					start, end, pid, page_size, psize, also_pwc);
1143	}
1144	preempt_enable();
1145}
1146
1147void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1148				  unsigned long end, int psize)
1149{
1150	return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1151}
1152
1153static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1154				  unsigned long end, int psize)
1155{
1156	__radix__flush_tlb_range_psize(mm, start, end, psize, true);
1157}
1158
1159#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1160void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1161{
1162	unsigned long pid, end;
1163
1164	pid = mm->context.id;
1165	if (unlikely(pid == MMU_NO_CONTEXT))
1166		return;
1167
1168	/* 4k page size, just blow the world */
1169	if (PAGE_SIZE == 0x1000) {
1170		radix__flush_all_mm(mm);
1171		return;
1172	}
1173
1174	end = addr + HPAGE_PMD_SIZE;
1175
1176	/* Otherwise first do the PWC, then iterate the pages. */
1177	preempt_disable();
1178	smp_mb(); /* see radix__flush_tlb_mm */
1179	if (!mm_is_thread_local(mm)) {
1180		if (unlikely(mm_is_singlethreaded(mm))) {
1181			exit_flush_lazy_tlbs(mm);
1182			goto local;
1183		}
1184		if (!mmu_has_feature(MMU_FTR_GTSE)) {
1185			unsigned long tgt, type, pg_sizes;
1186
1187			tgt = H_RPTI_TARGET_CMMU;
1188			type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1189			       H_RPTI_TYPE_PRT;
1190			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1191
1192			if (atomic_read(&mm->context.copros) > 0)
1193				tgt |= H_RPTI_TARGET_NMMU;
1194			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1195					       addr, end);
1196		} else if (cputlb_use_tlbie())
1197			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1198		else
1199			_tlbiel_va_range_multicast(mm,
1200					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1201	} else {
1202local:
1203		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1204	}
1205
1206	preempt_enable();
1207}
1208#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1209
1210void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1211				unsigned long start, unsigned long end)
1212{
1213	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1214}
1215EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1216
1217void radix__flush_tlb_all(void)
1218{
1219	unsigned long rb,prs,r,rs;
1220	unsigned long ric = RIC_FLUSH_ALL;
1221
1222	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1223	prs = 0; /* partition scoped */
1224	r = 1;   /* radix format */
1225	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1226
1227	asm volatile("ptesync": : :"memory");
1228	/*
1229	 * now flush guest entries by passing PRS = 1 and LPID != 0
1230	 */
1231	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1232		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1233	/*
1234	 * now flush host entires by passing PRS = 0 and LPID == 0
1235	 */
1236	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1237		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1238	asm volatile("eieio; tlbsync; ptesync": : :"memory");
1239}
1240
1241#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1242extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
1243{
1244	unsigned long pid = mm->context.id;
1245
1246	if (unlikely(pid == MMU_NO_CONTEXT))
1247		return;
1248
1249	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
1250		return;
1251
1252	/*
1253	 * If this context hasn't run on that CPU before and KVM is
1254	 * around, there's a slim chance that the guest on another
1255	 * CPU just brought in obsolete translation into the TLB of
1256	 * this CPU due to a bad prefetch using the guest PID on
1257	 * the way into the hypervisor.
1258	 *
1259	 * We work around this here. If KVM is possible, we check if
1260	 * any sibling thread is in KVM. If it is, the window may exist
1261	 * and thus we flush that PID from the core.
1262	 *
1263	 * A potential future improvement would be to mark which PIDs
1264	 * have never been used on the system and avoid it if the PID
1265	 * is new and the process has no other cpumask bit set.
1266	 */
1267	if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
1268		int cpu = smp_processor_id();
1269		int sib = cpu_first_thread_sibling(cpu);
1270		bool flush = false;
1271
1272		for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
1273			if (sib == cpu)
1274				continue;
1275			if (!cpu_possible(sib))
1276				continue;
1277			if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
1278				flush = true;
1279		}
1280		if (flush)
1281			_tlbiel_pid(pid, RIC_FLUSH_ALL);
1282	}
1283}
1284EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
1285#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1286