xref: /kernel/linux/linux-5.10/arch/x86/hyperv/mmu.c (revision 8c2ecf20)
1#define pr_fmt(fmt)  "Hyper-V: " fmt
2
3#include <linux/hyperv.h>
4#include <linux/log2.h>
5#include <linux/slab.h>
6#include <linux/types.h>
7
8#include <asm/fpu/api.h>
9#include <asm/mshyperv.h>
10#include <asm/msr.h>
11#include <asm/tlbflush.h>
12#include <asm/tlb.h>
13
14#define CREATE_TRACE_POINTS
15#include <asm/trace/hyperv.h>
16
17/* Each gva in gva_list encodes up to 4096 pages to flush */
18#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
19
20static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
21				      const struct flush_tlb_info *info);
22
23/*
24 * Fills in gva_list starting from offset. Returns the number of items added.
25 */
26static inline int fill_gva_list(u64 gva_list[], int offset,
27				unsigned long start, unsigned long end)
28{
29	int gva_n = offset;
30	unsigned long cur = start, diff;
31
32	do {
33		diff = end > cur ? end - cur : 0;
34
35		gva_list[gva_n] = cur & PAGE_MASK;
36		/*
37		 * Lower 12 bits encode the number of additional
38		 * pages to flush (in addition to the 'cur' page).
39		 */
40		if (diff >= HV_TLB_FLUSH_UNIT) {
41			gva_list[gva_n] |= ~PAGE_MASK;
42			cur += HV_TLB_FLUSH_UNIT;
43		}  else if (diff) {
44			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
45			cur = end;
46		}
47
48		gva_n++;
49
50	} while (cur < end);
51
52	return gva_n - offset;
53}
54
55static void hyperv_flush_tlb_others(const struct cpumask *cpus,
56				    const struct flush_tlb_info *info)
57{
58	int cpu, vcpu, gva_n, max_gvas;
59	struct hv_tlb_flush **flush_pcpu;
60	struct hv_tlb_flush *flush;
61	u64 status = U64_MAX;
62	unsigned long flags;
63
64	trace_hyperv_mmu_flush_tlb_others(cpus, info);
65
66	if (!hv_hypercall_pg)
67		goto do_native;
68
69	local_irq_save(flags);
70
71	/*
72	 * Only check the mask _after_ interrupt has been disabled to avoid the
73	 * mask changing under our feet.
74	 */
75	if (cpumask_empty(cpus)) {
76		local_irq_restore(flags);
77		return;
78	}
79
80	flush_pcpu = (struct hv_tlb_flush **)
81		     this_cpu_ptr(hyperv_pcpu_input_arg);
82
83	flush = *flush_pcpu;
84
85	if (unlikely(!flush)) {
86		local_irq_restore(flags);
87		goto do_native;
88	}
89
90	if (info->mm) {
91		/*
92		 * AddressSpace argument must match the CR3 with PCID bits
93		 * stripped out.
94		 */
95		flush->address_space = virt_to_phys(info->mm->pgd);
96		flush->address_space &= CR3_ADDR_MASK;
97		flush->flags = 0;
98	} else {
99		flush->address_space = 0;
100		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
101	}
102
103	flush->processor_mask = 0;
104	if (cpumask_equal(cpus, cpu_present_mask)) {
105		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
106	} else {
107		/*
108		 * From the supplied CPU set we need to figure out if we can get
109		 * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
110		 * hypercalls. This is possible when the highest VP number in
111		 * the set is < 64. As VP numbers are usually in ascending order
112		 * and match Linux CPU ids, here is an optimization: we check
113		 * the VP number for the highest bit in the supplied set first
114		 * so we can quickly find out if using *_EX hypercalls is a
115		 * must. We will also check all VP numbers when walking the
116		 * supplied CPU set to remain correct in all cases.
117		 */
118		if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
119			goto do_ex_hypercall;
120
121		for_each_cpu(cpu, cpus) {
122			vcpu = hv_cpu_number_to_vp_number(cpu);
123			if (vcpu == VP_INVAL) {
124				local_irq_restore(flags);
125				goto do_native;
126			}
127
128			if (vcpu >= 64)
129				goto do_ex_hypercall;
130
131			__set_bit(vcpu, (unsigned long *)
132				  &flush->processor_mask);
133		}
134	}
135
136	/*
137	 * We can flush not more than max_gvas with one hypercall. Flush the
138	 * whole address space if we were asked to do more.
139	 */
140	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
141
142	if (info->end == TLB_FLUSH_ALL) {
143		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
144		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
145					 flush, NULL);
146	} else if (info->end &&
147		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
148		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
149					 flush, NULL);
150	} else {
151		gva_n = fill_gva_list(flush->gva_list, 0,
152				      info->start, info->end);
153		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
154					     gva_n, 0, flush, NULL);
155	}
156	goto check_status;
157
158do_ex_hypercall:
159	status = hyperv_flush_tlb_others_ex(cpus, info);
160
161check_status:
162	local_irq_restore(flags);
163
164	if (!(status & HV_HYPERCALL_RESULT_MASK))
165		return;
166do_native:
167	native_flush_tlb_others(cpus, info);
168}
169
170static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
171				      const struct flush_tlb_info *info)
172{
173	int nr_bank = 0, max_gvas, gva_n;
174	struct hv_tlb_flush_ex **flush_pcpu;
175	struct hv_tlb_flush_ex *flush;
176	u64 status;
177
178	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
179		return U64_MAX;
180
181	flush_pcpu = (struct hv_tlb_flush_ex **)
182		     this_cpu_ptr(hyperv_pcpu_input_arg);
183
184	flush = *flush_pcpu;
185
186	if (info->mm) {
187		/*
188		 * AddressSpace argument must match the CR3 with PCID bits
189		 * stripped out.
190		 */
191		flush->address_space = virt_to_phys(info->mm->pgd);
192		flush->address_space &= CR3_ADDR_MASK;
193		flush->flags = 0;
194	} else {
195		flush->address_space = 0;
196		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
197	}
198
199	flush->hv_vp_set.valid_bank_mask = 0;
200
201	flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
202	nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
203	if (nr_bank < 0)
204		return U64_MAX;
205
206	/*
207	 * We can flush not more than max_gvas with one hypercall. Flush the
208	 * whole address space if we were asked to do more.
209	 */
210	max_gvas =
211		(PAGE_SIZE - sizeof(*flush) - nr_bank *
212		 sizeof(flush->hv_vp_set.bank_contents[0])) /
213		sizeof(flush->gva_list[0]);
214
215	if (info->end == TLB_FLUSH_ALL) {
216		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
217		status = hv_do_rep_hypercall(
218			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
219			0, nr_bank, flush, NULL);
220	} else if (info->end &&
221		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
222		status = hv_do_rep_hypercall(
223			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
224			0, nr_bank, flush, NULL);
225	} else {
226		gva_n = fill_gva_list(flush->gva_list, nr_bank,
227				      info->start, info->end);
228		status = hv_do_rep_hypercall(
229			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
230			gva_n, nr_bank, flush, NULL);
231	}
232
233	return status;
234}
235
236void hyperv_setup_mmu_ops(void)
237{
238	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
239		return;
240
241	pr_info("Using hypercall for remote TLB flush\n");
242	pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
243	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
244}
245