162306a36Sopenharmony_ci#define pr_fmt(fmt)  "Hyper-V: " fmt
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#include <linux/hyperv.h>
462306a36Sopenharmony_ci#include <linux/log2.h>
562306a36Sopenharmony_ci#include <linux/slab.h>
662306a36Sopenharmony_ci#include <linux/types.h>
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <asm/fpu/api.h>
962306a36Sopenharmony_ci#include <asm/mshyperv.h>
1062306a36Sopenharmony_ci#include <asm/msr.h>
1162306a36Sopenharmony_ci#include <asm/tlbflush.h>
1262306a36Sopenharmony_ci#include <asm/tlb.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#define CREATE_TRACE_POINTS
1562306a36Sopenharmony_ci#include <asm/trace/hyperv.h>
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci/* Each gva in gva_list encodes up to 4096 pages to flush */
1862306a36Sopenharmony_ci#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_cistatic u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
2162306a36Sopenharmony_ci				      const struct flush_tlb_info *info);
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci/*
2462306a36Sopenharmony_ci * Fills in gva_list starting from offset. Returns the number of items added.
2562306a36Sopenharmony_ci */
2662306a36Sopenharmony_cistatic inline int fill_gva_list(u64 gva_list[], int offset,
2762306a36Sopenharmony_ci				unsigned long start, unsigned long end)
2862306a36Sopenharmony_ci{
2962306a36Sopenharmony_ci	int gva_n = offset;
3062306a36Sopenharmony_ci	unsigned long cur = start, diff;
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci	do {
3362306a36Sopenharmony_ci		diff = end > cur ? end - cur : 0;
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci		gva_list[gva_n] = cur & PAGE_MASK;
3662306a36Sopenharmony_ci		/*
3762306a36Sopenharmony_ci		 * Lower 12 bits encode the number of additional
3862306a36Sopenharmony_ci		 * pages to flush (in addition to the 'cur' page).
3962306a36Sopenharmony_ci		 */
4062306a36Sopenharmony_ci		if (diff >= HV_TLB_FLUSH_UNIT) {
4162306a36Sopenharmony_ci			gva_list[gva_n] |= ~PAGE_MASK;
4262306a36Sopenharmony_ci			cur += HV_TLB_FLUSH_UNIT;
4362306a36Sopenharmony_ci		}  else if (diff) {
4462306a36Sopenharmony_ci			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
4562306a36Sopenharmony_ci			cur = end;
4662306a36Sopenharmony_ci		}
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci		gva_n++;
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	} while (cur < end);
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	return gva_n - offset;
5362306a36Sopenharmony_ci}
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistatic bool cpu_is_lazy(int cpu)
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	return per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
5862306a36Sopenharmony_ci}
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_cistatic void hyperv_flush_tlb_multi(const struct cpumask *cpus,
6162306a36Sopenharmony_ci				   const struct flush_tlb_info *info)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	int cpu, vcpu, gva_n, max_gvas;
6462306a36Sopenharmony_ci	struct hv_tlb_flush *flush;
6562306a36Sopenharmony_ci	u64 status;
6662306a36Sopenharmony_ci	unsigned long flags;
6762306a36Sopenharmony_ci	bool do_lazy = !info->freed_tables;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	trace_hyperv_mmu_flush_tlb_multi(cpus, info);
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	if (!hv_hypercall_pg)
7262306a36Sopenharmony_ci		goto do_native;
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	local_irq_save(flags);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	if (unlikely(!flush)) {
7962306a36Sopenharmony_ci		local_irq_restore(flags);
8062306a36Sopenharmony_ci		goto do_native;
8162306a36Sopenharmony_ci	}
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	if (info->mm) {
8462306a36Sopenharmony_ci		/*
8562306a36Sopenharmony_ci		 * AddressSpace argument must match the CR3 with PCID bits
8662306a36Sopenharmony_ci		 * stripped out.
8762306a36Sopenharmony_ci		 */
8862306a36Sopenharmony_ci		flush->address_space = virt_to_phys(info->mm->pgd);
8962306a36Sopenharmony_ci		flush->address_space &= CR3_ADDR_MASK;
9062306a36Sopenharmony_ci		flush->flags = 0;
9162306a36Sopenharmony_ci	} else {
9262306a36Sopenharmony_ci		flush->address_space = 0;
9362306a36Sopenharmony_ci		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
9462306a36Sopenharmony_ci	}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	flush->processor_mask = 0;
9762306a36Sopenharmony_ci	if (cpumask_equal(cpus, cpu_present_mask)) {
9862306a36Sopenharmony_ci		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
9962306a36Sopenharmony_ci	} else {
10062306a36Sopenharmony_ci		/*
10162306a36Sopenharmony_ci		 * From the supplied CPU set we need to figure out if we can get
10262306a36Sopenharmony_ci		 * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
10362306a36Sopenharmony_ci		 * hypercalls. This is possible when the highest VP number in
10462306a36Sopenharmony_ci		 * the set is < 64. As VP numbers are usually in ascending order
10562306a36Sopenharmony_ci		 * and match Linux CPU ids, here is an optimization: we check
10662306a36Sopenharmony_ci		 * the VP number for the highest bit in the supplied set first
10762306a36Sopenharmony_ci		 * so we can quickly find out if using *_EX hypercalls is a
10862306a36Sopenharmony_ci		 * must. We will also check all VP numbers when walking the
10962306a36Sopenharmony_ci		 * supplied CPU set to remain correct in all cases.
11062306a36Sopenharmony_ci		 */
11162306a36Sopenharmony_ci		cpu = cpumask_last(cpus);
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci		if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64)
11462306a36Sopenharmony_ci			goto do_ex_hypercall;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci		for_each_cpu(cpu, cpus) {
11762306a36Sopenharmony_ci			if (do_lazy && cpu_is_lazy(cpu))
11862306a36Sopenharmony_ci				continue;
11962306a36Sopenharmony_ci			vcpu = hv_cpu_number_to_vp_number(cpu);
12062306a36Sopenharmony_ci			if (vcpu == VP_INVAL) {
12162306a36Sopenharmony_ci				local_irq_restore(flags);
12262306a36Sopenharmony_ci				goto do_native;
12362306a36Sopenharmony_ci			}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci			if (vcpu >= 64)
12662306a36Sopenharmony_ci				goto do_ex_hypercall;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci			__set_bit(vcpu, (unsigned long *)
12962306a36Sopenharmony_ci				  &flush->processor_mask);
13062306a36Sopenharmony_ci		}
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci		/* nothing to flush if 'processor_mask' ends up being empty */
13362306a36Sopenharmony_ci		if (!flush->processor_mask) {
13462306a36Sopenharmony_ci			local_irq_restore(flags);
13562306a36Sopenharmony_ci			return;
13662306a36Sopenharmony_ci		}
13762306a36Sopenharmony_ci	}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	/*
14062306a36Sopenharmony_ci	 * We can flush not more than max_gvas with one hypercall. Flush the
14162306a36Sopenharmony_ci	 * whole address space if we were asked to do more.
14262306a36Sopenharmony_ci	 */
14362306a36Sopenharmony_ci	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	if (info->end == TLB_FLUSH_ALL) {
14662306a36Sopenharmony_ci		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
14762306a36Sopenharmony_ci		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
14862306a36Sopenharmony_ci					 flush, NULL);
14962306a36Sopenharmony_ci	} else if (info->end &&
15062306a36Sopenharmony_ci		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
15162306a36Sopenharmony_ci		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
15262306a36Sopenharmony_ci					 flush, NULL);
15362306a36Sopenharmony_ci	} else {
15462306a36Sopenharmony_ci		gva_n = fill_gva_list(flush->gva_list, 0,
15562306a36Sopenharmony_ci				      info->start, info->end);
15662306a36Sopenharmony_ci		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
15762306a36Sopenharmony_ci					     gva_n, 0, flush, NULL);
15862306a36Sopenharmony_ci	}
15962306a36Sopenharmony_ci	goto check_status;
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cido_ex_hypercall:
16262306a36Sopenharmony_ci	status = hyperv_flush_tlb_others_ex(cpus, info);
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_cicheck_status:
16562306a36Sopenharmony_ci	local_irq_restore(flags);
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	if (hv_result_success(status))
16862306a36Sopenharmony_ci		return;
16962306a36Sopenharmony_cido_native:
17062306a36Sopenharmony_ci	native_flush_tlb_multi(cpus, info);
17162306a36Sopenharmony_ci}
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_cistatic u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
17462306a36Sopenharmony_ci				      const struct flush_tlb_info *info)
17562306a36Sopenharmony_ci{
17662306a36Sopenharmony_ci	int nr_bank = 0, max_gvas, gva_n;
17762306a36Sopenharmony_ci	struct hv_tlb_flush_ex *flush;
17862306a36Sopenharmony_ci	u64 status;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
18162306a36Sopenharmony_ci		return HV_STATUS_INVALID_PARAMETER;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	if (info->mm) {
18662306a36Sopenharmony_ci		/*
18762306a36Sopenharmony_ci		 * AddressSpace argument must match the CR3 with PCID bits
18862306a36Sopenharmony_ci		 * stripped out.
18962306a36Sopenharmony_ci		 */
19062306a36Sopenharmony_ci		flush->address_space = virt_to_phys(info->mm->pgd);
19162306a36Sopenharmony_ci		flush->address_space &= CR3_ADDR_MASK;
19262306a36Sopenharmony_ci		flush->flags = 0;
19362306a36Sopenharmony_ci	} else {
19462306a36Sopenharmony_ci		flush->address_space = 0;
19562306a36Sopenharmony_ci		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
19662306a36Sopenharmony_ci	}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	flush->hv_vp_set.valid_bank_mask = 0;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
20162306a36Sopenharmony_ci	nr_bank = cpumask_to_vpset_skip(&flush->hv_vp_set, cpus,
20262306a36Sopenharmony_ci			info->freed_tables ? NULL : cpu_is_lazy);
20362306a36Sopenharmony_ci	if (nr_bank < 0)
20462306a36Sopenharmony_ci		return HV_STATUS_INVALID_PARAMETER;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	/*
20762306a36Sopenharmony_ci	 * We can flush not more than max_gvas with one hypercall. Flush the
20862306a36Sopenharmony_ci	 * whole address space if we were asked to do more.
20962306a36Sopenharmony_ci	 */
21062306a36Sopenharmony_ci	max_gvas =
21162306a36Sopenharmony_ci		(PAGE_SIZE - sizeof(*flush) - nr_bank *
21262306a36Sopenharmony_ci		 sizeof(flush->hv_vp_set.bank_contents[0])) /
21362306a36Sopenharmony_ci		sizeof(flush->gva_list[0]);
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	if (info->end == TLB_FLUSH_ALL) {
21662306a36Sopenharmony_ci		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
21762306a36Sopenharmony_ci		status = hv_do_rep_hypercall(
21862306a36Sopenharmony_ci			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
21962306a36Sopenharmony_ci			0, nr_bank, flush, NULL);
22062306a36Sopenharmony_ci	} else if (info->end &&
22162306a36Sopenharmony_ci		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
22262306a36Sopenharmony_ci		status = hv_do_rep_hypercall(
22362306a36Sopenharmony_ci			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
22462306a36Sopenharmony_ci			0, nr_bank, flush, NULL);
22562306a36Sopenharmony_ci	} else {
22662306a36Sopenharmony_ci		gva_n = fill_gva_list(flush->gva_list, nr_bank,
22762306a36Sopenharmony_ci				      info->start, info->end);
22862306a36Sopenharmony_ci		status = hv_do_rep_hypercall(
22962306a36Sopenharmony_ci			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
23062306a36Sopenharmony_ci			gva_n, nr_bank, flush, NULL);
23162306a36Sopenharmony_ci	}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	return status;
23462306a36Sopenharmony_ci}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_civoid hyperv_setup_mmu_ops(void)
23762306a36Sopenharmony_ci{
23862306a36Sopenharmony_ci	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
23962306a36Sopenharmony_ci		return;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	pr_info("Using hypercall for remote TLB flush\n");
24262306a36Sopenharmony_ci	pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
24362306a36Sopenharmony_ci	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
24462306a36Sopenharmony_ci}
245