18c2ecf20Sopenharmony_ci#define pr_fmt(fmt)  "Hyper-V: " fmt
28c2ecf20Sopenharmony_ci
38c2ecf20Sopenharmony_ci#include <linux/hyperv.h>
48c2ecf20Sopenharmony_ci#include <linux/log2.h>
58c2ecf20Sopenharmony_ci#include <linux/slab.h>
68c2ecf20Sopenharmony_ci#include <linux/types.h>
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <asm/fpu/api.h>
98c2ecf20Sopenharmony_ci#include <asm/mshyperv.h>
108c2ecf20Sopenharmony_ci#include <asm/msr.h>
118c2ecf20Sopenharmony_ci#include <asm/tlbflush.h>
128c2ecf20Sopenharmony_ci#include <asm/tlb.h>
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS
158c2ecf20Sopenharmony_ci#include <asm/trace/hyperv.h>
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci/* Each gva in gva_list encodes up to 4096 pages to flush */
188c2ecf20Sopenharmony_ci#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_cistatic u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
218c2ecf20Sopenharmony_ci				      const struct flush_tlb_info *info);
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci/*
248c2ecf20Sopenharmony_ci * Fills in gva_list starting from offset. Returns the number of items added.
258c2ecf20Sopenharmony_ci */
268c2ecf20Sopenharmony_cistatic inline int fill_gva_list(u64 gva_list[], int offset,
278c2ecf20Sopenharmony_ci				unsigned long start, unsigned long end)
288c2ecf20Sopenharmony_ci{
298c2ecf20Sopenharmony_ci	int gva_n = offset;
308c2ecf20Sopenharmony_ci	unsigned long cur = start, diff;
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci	do {
338c2ecf20Sopenharmony_ci		diff = end > cur ? end - cur : 0;
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci		gva_list[gva_n] = cur & PAGE_MASK;
368c2ecf20Sopenharmony_ci		/*
378c2ecf20Sopenharmony_ci		 * Lower 12 bits encode the number of additional
388c2ecf20Sopenharmony_ci		 * pages to flush (in addition to the 'cur' page).
398c2ecf20Sopenharmony_ci		 */
408c2ecf20Sopenharmony_ci		if (diff >= HV_TLB_FLUSH_UNIT) {
418c2ecf20Sopenharmony_ci			gva_list[gva_n] |= ~PAGE_MASK;
428c2ecf20Sopenharmony_ci			cur += HV_TLB_FLUSH_UNIT;
438c2ecf20Sopenharmony_ci		}  else if (diff) {
448c2ecf20Sopenharmony_ci			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
458c2ecf20Sopenharmony_ci			cur = end;
468c2ecf20Sopenharmony_ci		}
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci		gva_n++;
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	} while (cur < end);
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	return gva_n - offset;
538c2ecf20Sopenharmony_ci}
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cistatic void hyperv_flush_tlb_others(const struct cpumask *cpus,
568c2ecf20Sopenharmony_ci				    const struct flush_tlb_info *info)
578c2ecf20Sopenharmony_ci{
588c2ecf20Sopenharmony_ci	int cpu, vcpu, gva_n, max_gvas;
598c2ecf20Sopenharmony_ci	struct hv_tlb_flush **flush_pcpu;
608c2ecf20Sopenharmony_ci	struct hv_tlb_flush *flush;
618c2ecf20Sopenharmony_ci	u64 status = U64_MAX;
628c2ecf20Sopenharmony_ci	unsigned long flags;
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	trace_hyperv_mmu_flush_tlb_others(cpus, info);
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	if (!hv_hypercall_pg)
678c2ecf20Sopenharmony_ci		goto do_native;
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	local_irq_save(flags);
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	/*
728c2ecf20Sopenharmony_ci	 * Only check the mask _after_ interrupt has been disabled to avoid the
738c2ecf20Sopenharmony_ci	 * mask changing under our feet.
748c2ecf20Sopenharmony_ci	 */
758c2ecf20Sopenharmony_ci	if (cpumask_empty(cpus)) {
768c2ecf20Sopenharmony_ci		local_irq_restore(flags);
778c2ecf20Sopenharmony_ci		return;
788c2ecf20Sopenharmony_ci	}
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	flush_pcpu = (struct hv_tlb_flush **)
818c2ecf20Sopenharmony_ci		     this_cpu_ptr(hyperv_pcpu_input_arg);
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci	flush = *flush_pcpu;
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	if (unlikely(!flush)) {
868c2ecf20Sopenharmony_ci		local_irq_restore(flags);
878c2ecf20Sopenharmony_ci		goto do_native;
888c2ecf20Sopenharmony_ci	}
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	if (info->mm) {
918c2ecf20Sopenharmony_ci		/*
928c2ecf20Sopenharmony_ci		 * AddressSpace argument must match the CR3 with PCID bits
938c2ecf20Sopenharmony_ci		 * stripped out.
948c2ecf20Sopenharmony_ci		 */
958c2ecf20Sopenharmony_ci		flush->address_space = virt_to_phys(info->mm->pgd);
968c2ecf20Sopenharmony_ci		flush->address_space &= CR3_ADDR_MASK;
978c2ecf20Sopenharmony_ci		flush->flags = 0;
988c2ecf20Sopenharmony_ci	} else {
998c2ecf20Sopenharmony_ci		flush->address_space = 0;
1008c2ecf20Sopenharmony_ci		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
1018c2ecf20Sopenharmony_ci	}
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	flush->processor_mask = 0;
1048c2ecf20Sopenharmony_ci	if (cpumask_equal(cpus, cpu_present_mask)) {
1058c2ecf20Sopenharmony_ci		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
1068c2ecf20Sopenharmony_ci	} else {
1078c2ecf20Sopenharmony_ci		/*
1088c2ecf20Sopenharmony_ci		 * From the supplied CPU set we need to figure out if we can get
1098c2ecf20Sopenharmony_ci		 * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
1108c2ecf20Sopenharmony_ci		 * hypercalls. This is possible when the highest VP number in
1118c2ecf20Sopenharmony_ci		 * the set is < 64. As VP numbers are usually in ascending order
1128c2ecf20Sopenharmony_ci		 * and match Linux CPU ids, here is an optimization: we check
1138c2ecf20Sopenharmony_ci		 * the VP number for the highest bit in the supplied set first
1148c2ecf20Sopenharmony_ci		 * so we can quickly find out if using *_EX hypercalls is a
1158c2ecf20Sopenharmony_ci		 * must. We will also check all VP numbers when walking the
1168c2ecf20Sopenharmony_ci		 * supplied CPU set to remain correct in all cases.
1178c2ecf20Sopenharmony_ci		 */
1188c2ecf20Sopenharmony_ci		if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
1198c2ecf20Sopenharmony_ci			goto do_ex_hypercall;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci		for_each_cpu(cpu, cpus) {
1228c2ecf20Sopenharmony_ci			vcpu = hv_cpu_number_to_vp_number(cpu);
1238c2ecf20Sopenharmony_ci			if (vcpu == VP_INVAL) {
1248c2ecf20Sopenharmony_ci				local_irq_restore(flags);
1258c2ecf20Sopenharmony_ci				goto do_native;
1268c2ecf20Sopenharmony_ci			}
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci			if (vcpu >= 64)
1298c2ecf20Sopenharmony_ci				goto do_ex_hypercall;
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci			__set_bit(vcpu, (unsigned long *)
1328c2ecf20Sopenharmony_ci				  &flush->processor_mask);
1338c2ecf20Sopenharmony_ci		}
1348c2ecf20Sopenharmony_ci	}
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	/*
1378c2ecf20Sopenharmony_ci	 * We can flush not more than max_gvas with one hypercall. Flush the
1388c2ecf20Sopenharmony_ci	 * whole address space if we were asked to do more.
1398c2ecf20Sopenharmony_ci	 */
1408c2ecf20Sopenharmony_ci	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	if (info->end == TLB_FLUSH_ALL) {
1438c2ecf20Sopenharmony_ci		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
1448c2ecf20Sopenharmony_ci		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
1458c2ecf20Sopenharmony_ci					 flush, NULL);
1468c2ecf20Sopenharmony_ci	} else if (info->end &&
1478c2ecf20Sopenharmony_ci		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
1488c2ecf20Sopenharmony_ci		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
1498c2ecf20Sopenharmony_ci					 flush, NULL);
1508c2ecf20Sopenharmony_ci	} else {
1518c2ecf20Sopenharmony_ci		gva_n = fill_gva_list(flush->gva_list, 0,
1528c2ecf20Sopenharmony_ci				      info->start, info->end);
1538c2ecf20Sopenharmony_ci		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
1548c2ecf20Sopenharmony_ci					     gva_n, 0, flush, NULL);
1558c2ecf20Sopenharmony_ci	}
1568c2ecf20Sopenharmony_ci	goto check_status;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_cido_ex_hypercall:
1598c2ecf20Sopenharmony_ci	status = hyperv_flush_tlb_others_ex(cpus, info);
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_cicheck_status:
1628c2ecf20Sopenharmony_ci	local_irq_restore(flags);
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci	if (!(status & HV_HYPERCALL_RESULT_MASK))
1658c2ecf20Sopenharmony_ci		return;
1668c2ecf20Sopenharmony_cido_native:
1678c2ecf20Sopenharmony_ci	native_flush_tlb_others(cpus, info);
1688c2ecf20Sopenharmony_ci}
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_cistatic u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
1718c2ecf20Sopenharmony_ci				      const struct flush_tlb_info *info)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	int nr_bank = 0, max_gvas, gva_n;
1748c2ecf20Sopenharmony_ci	struct hv_tlb_flush_ex **flush_pcpu;
1758c2ecf20Sopenharmony_ci	struct hv_tlb_flush_ex *flush;
1768c2ecf20Sopenharmony_ci	u64 status;
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
1798c2ecf20Sopenharmony_ci		return U64_MAX;
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci	flush_pcpu = (struct hv_tlb_flush_ex **)
1828c2ecf20Sopenharmony_ci		     this_cpu_ptr(hyperv_pcpu_input_arg);
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	flush = *flush_pcpu;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	if (info->mm) {
1878c2ecf20Sopenharmony_ci		/*
1888c2ecf20Sopenharmony_ci		 * AddressSpace argument must match the CR3 with PCID bits
1898c2ecf20Sopenharmony_ci		 * stripped out.
1908c2ecf20Sopenharmony_ci		 */
1918c2ecf20Sopenharmony_ci		flush->address_space = virt_to_phys(info->mm->pgd);
1928c2ecf20Sopenharmony_ci		flush->address_space &= CR3_ADDR_MASK;
1938c2ecf20Sopenharmony_ci		flush->flags = 0;
1948c2ecf20Sopenharmony_ci	} else {
1958c2ecf20Sopenharmony_ci		flush->address_space = 0;
1968c2ecf20Sopenharmony_ci		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
1978c2ecf20Sopenharmony_ci	}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	flush->hv_vp_set.valid_bank_mask = 0;
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
2028c2ecf20Sopenharmony_ci	nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
2038c2ecf20Sopenharmony_ci	if (nr_bank < 0)
2048c2ecf20Sopenharmony_ci		return U64_MAX;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	/*
2078c2ecf20Sopenharmony_ci	 * We can flush not more than max_gvas with one hypercall. Flush the
2088c2ecf20Sopenharmony_ci	 * whole address space if we were asked to do more.
2098c2ecf20Sopenharmony_ci	 */
2108c2ecf20Sopenharmony_ci	max_gvas =
2118c2ecf20Sopenharmony_ci		(PAGE_SIZE - sizeof(*flush) - nr_bank *
2128c2ecf20Sopenharmony_ci		 sizeof(flush->hv_vp_set.bank_contents[0])) /
2138c2ecf20Sopenharmony_ci		sizeof(flush->gva_list[0]);
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	if (info->end == TLB_FLUSH_ALL) {
2168c2ecf20Sopenharmony_ci		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
2178c2ecf20Sopenharmony_ci		status = hv_do_rep_hypercall(
2188c2ecf20Sopenharmony_ci			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
2198c2ecf20Sopenharmony_ci			0, nr_bank, flush, NULL);
2208c2ecf20Sopenharmony_ci	} else if (info->end &&
2218c2ecf20Sopenharmony_ci		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
2228c2ecf20Sopenharmony_ci		status = hv_do_rep_hypercall(
2238c2ecf20Sopenharmony_ci			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
2248c2ecf20Sopenharmony_ci			0, nr_bank, flush, NULL);
2258c2ecf20Sopenharmony_ci	} else {
2268c2ecf20Sopenharmony_ci		gva_n = fill_gva_list(flush->gva_list, nr_bank,
2278c2ecf20Sopenharmony_ci				      info->start, info->end);
2288c2ecf20Sopenharmony_ci		status = hv_do_rep_hypercall(
2298c2ecf20Sopenharmony_ci			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
2308c2ecf20Sopenharmony_ci			gva_n, nr_bank, flush, NULL);
2318c2ecf20Sopenharmony_ci	}
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	return status;
2348c2ecf20Sopenharmony_ci}
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_civoid hyperv_setup_mmu_ops(void)
2378c2ecf20Sopenharmony_ci{
2388c2ecf20Sopenharmony_ci	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
2398c2ecf20Sopenharmony_ci		return;
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	pr_info("Using hypercall for remote TLB flush\n");
2428c2ecf20Sopenharmony_ci	pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
2438c2ecf20Sopenharmony_ci	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
2448c2ecf20Sopenharmony_ci}
245