18c2ecf20Sopenharmony_ci#define pr_fmt(fmt) "Hyper-V: " fmt 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci#include <linux/hyperv.h> 48c2ecf20Sopenharmony_ci#include <linux/log2.h> 58c2ecf20Sopenharmony_ci#include <linux/slab.h> 68c2ecf20Sopenharmony_ci#include <linux/types.h> 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <asm/fpu/api.h> 98c2ecf20Sopenharmony_ci#include <asm/mshyperv.h> 108c2ecf20Sopenharmony_ci#include <asm/msr.h> 118c2ecf20Sopenharmony_ci#include <asm/tlbflush.h> 128c2ecf20Sopenharmony_ci#include <asm/tlb.h> 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS 158c2ecf20Sopenharmony_ci#include <asm/trace/hyperv.h> 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci/* Each gva in gva_list encodes up to 4096 pages to flush */ 188c2ecf20Sopenharmony_ci#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_cistatic u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, 218c2ecf20Sopenharmony_ci const struct flush_tlb_info *info); 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci/* 248c2ecf20Sopenharmony_ci * Fills in gva_list starting from offset. Returns the number of items added. 258c2ecf20Sopenharmony_ci */ 268c2ecf20Sopenharmony_cistatic inline int fill_gva_list(u64 gva_list[], int offset, 278c2ecf20Sopenharmony_ci unsigned long start, unsigned long end) 288c2ecf20Sopenharmony_ci{ 298c2ecf20Sopenharmony_ci int gva_n = offset; 308c2ecf20Sopenharmony_ci unsigned long cur = start, diff; 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci do { 338c2ecf20Sopenharmony_ci diff = end > cur ? end - cur : 0; 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci gva_list[gva_n] = cur & PAGE_MASK; 368c2ecf20Sopenharmony_ci /* 378c2ecf20Sopenharmony_ci * Lower 12 bits encode the number of additional 388c2ecf20Sopenharmony_ci * pages to flush (in addition to the 'cur' page). 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_ci if (diff >= HV_TLB_FLUSH_UNIT) { 418c2ecf20Sopenharmony_ci gva_list[gva_n] |= ~PAGE_MASK; 428c2ecf20Sopenharmony_ci cur += HV_TLB_FLUSH_UNIT; 438c2ecf20Sopenharmony_ci } else if (diff) { 448c2ecf20Sopenharmony_ci gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; 458c2ecf20Sopenharmony_ci cur = end; 468c2ecf20Sopenharmony_ci } 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci gva_n++; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci } while (cur < end); 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci return gva_n - offset; 538c2ecf20Sopenharmony_ci} 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_cistatic void hyperv_flush_tlb_others(const struct cpumask *cpus, 568c2ecf20Sopenharmony_ci const struct flush_tlb_info *info) 578c2ecf20Sopenharmony_ci{ 588c2ecf20Sopenharmony_ci int cpu, vcpu, gva_n, max_gvas; 598c2ecf20Sopenharmony_ci struct hv_tlb_flush **flush_pcpu; 608c2ecf20Sopenharmony_ci struct hv_tlb_flush *flush; 618c2ecf20Sopenharmony_ci u64 status = U64_MAX; 628c2ecf20Sopenharmony_ci unsigned long flags; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci trace_hyperv_mmu_flush_tlb_others(cpus, info); 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci if (!hv_hypercall_pg) 678c2ecf20Sopenharmony_ci goto do_native; 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci local_irq_save(flags); 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci /* 728c2ecf20Sopenharmony_ci * Only check the mask _after_ interrupt has been disabled to avoid the 738c2ecf20Sopenharmony_ci * mask changing under our feet. 748c2ecf20Sopenharmony_ci */ 758c2ecf20Sopenharmony_ci if (cpumask_empty(cpus)) { 768c2ecf20Sopenharmony_ci local_irq_restore(flags); 778c2ecf20Sopenharmony_ci return; 788c2ecf20Sopenharmony_ci } 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci flush_pcpu = (struct hv_tlb_flush **) 818c2ecf20Sopenharmony_ci this_cpu_ptr(hyperv_pcpu_input_arg); 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci flush = *flush_pcpu; 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci if (unlikely(!flush)) { 868c2ecf20Sopenharmony_ci local_irq_restore(flags); 878c2ecf20Sopenharmony_ci goto do_native; 888c2ecf20Sopenharmony_ci } 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci if (info->mm) { 918c2ecf20Sopenharmony_ci /* 928c2ecf20Sopenharmony_ci * AddressSpace argument must match the CR3 with PCID bits 938c2ecf20Sopenharmony_ci * stripped out. 948c2ecf20Sopenharmony_ci */ 958c2ecf20Sopenharmony_ci flush->address_space = virt_to_phys(info->mm->pgd); 968c2ecf20Sopenharmony_ci flush->address_space &= CR3_ADDR_MASK; 978c2ecf20Sopenharmony_ci flush->flags = 0; 988c2ecf20Sopenharmony_ci } else { 998c2ecf20Sopenharmony_ci flush->address_space = 0; 1008c2ecf20Sopenharmony_ci flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 1018c2ecf20Sopenharmony_ci } 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci flush->processor_mask = 0; 1048c2ecf20Sopenharmony_ci if (cpumask_equal(cpus, cpu_present_mask)) { 1058c2ecf20Sopenharmony_ci flush->flags |= HV_FLUSH_ALL_PROCESSORS; 1068c2ecf20Sopenharmony_ci } else { 1078c2ecf20Sopenharmony_ci /* 1088c2ecf20Sopenharmony_ci * From the supplied CPU set we need to figure out if we can get 1098c2ecf20Sopenharmony_ci * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} 1108c2ecf20Sopenharmony_ci * hypercalls. This is possible when the highest VP number in 1118c2ecf20Sopenharmony_ci * the set is < 64. As VP numbers are usually in ascending order 1128c2ecf20Sopenharmony_ci * and match Linux CPU ids, here is an optimization: we check 1138c2ecf20Sopenharmony_ci * the VP number for the highest bit in the supplied set first 1148c2ecf20Sopenharmony_ci * so we can quickly find out if using *_EX hypercalls is a 1158c2ecf20Sopenharmony_ci * must. We will also check all VP numbers when walking the 1168c2ecf20Sopenharmony_ci * supplied CPU set to remain correct in all cases. 1178c2ecf20Sopenharmony_ci */ 1188c2ecf20Sopenharmony_ci if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64) 1198c2ecf20Sopenharmony_ci goto do_ex_hypercall; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci for_each_cpu(cpu, cpus) { 1228c2ecf20Sopenharmony_ci vcpu = hv_cpu_number_to_vp_number(cpu); 1238c2ecf20Sopenharmony_ci if (vcpu == VP_INVAL) { 1248c2ecf20Sopenharmony_ci local_irq_restore(flags); 1258c2ecf20Sopenharmony_ci goto do_native; 1268c2ecf20Sopenharmony_ci } 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci if (vcpu >= 64) 1298c2ecf20Sopenharmony_ci goto do_ex_hypercall; 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci __set_bit(vcpu, (unsigned long *) 1328c2ecf20Sopenharmony_ci &flush->processor_mask); 1338c2ecf20Sopenharmony_ci } 1348c2ecf20Sopenharmony_ci } 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci /* 1378c2ecf20Sopenharmony_ci * We can flush not more than max_gvas with one hypercall. Flush the 1388c2ecf20Sopenharmony_ci * whole address space if we were asked to do more. 1398c2ecf20Sopenharmony_ci */ 1408c2ecf20Sopenharmony_ci max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci if (info->end == TLB_FLUSH_ALL) { 1438c2ecf20Sopenharmony_ci flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 1448c2ecf20Sopenharmony_ci status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 1458c2ecf20Sopenharmony_ci flush, NULL); 1468c2ecf20Sopenharmony_ci } else if (info->end && 1478c2ecf20Sopenharmony_ci ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 1488c2ecf20Sopenharmony_ci status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 1498c2ecf20Sopenharmony_ci flush, NULL); 1508c2ecf20Sopenharmony_ci } else { 1518c2ecf20Sopenharmony_ci gva_n = fill_gva_list(flush->gva_list, 0, 1528c2ecf20Sopenharmony_ci info->start, info->end); 1538c2ecf20Sopenharmony_ci status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, 1548c2ecf20Sopenharmony_ci gva_n, 0, flush, NULL); 1558c2ecf20Sopenharmony_ci } 1568c2ecf20Sopenharmony_ci goto check_status; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_cido_ex_hypercall: 1598c2ecf20Sopenharmony_ci status = hyperv_flush_tlb_others_ex(cpus, info); 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_cicheck_status: 1628c2ecf20Sopenharmony_ci local_irq_restore(flags); 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci if (!(status & HV_HYPERCALL_RESULT_MASK)) 1658c2ecf20Sopenharmony_ci return; 1668c2ecf20Sopenharmony_cido_native: 1678c2ecf20Sopenharmony_ci native_flush_tlb_others(cpus, info); 1688c2ecf20Sopenharmony_ci} 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_cistatic u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, 1718c2ecf20Sopenharmony_ci const struct flush_tlb_info *info) 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci int nr_bank = 0, max_gvas, gva_n; 1748c2ecf20Sopenharmony_ci struct hv_tlb_flush_ex **flush_pcpu; 1758c2ecf20Sopenharmony_ci struct hv_tlb_flush_ex *flush; 1768c2ecf20Sopenharmony_ci u64 status; 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) 1798c2ecf20Sopenharmony_ci return U64_MAX; 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci flush_pcpu = (struct hv_tlb_flush_ex **) 1828c2ecf20Sopenharmony_ci this_cpu_ptr(hyperv_pcpu_input_arg); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci flush = *flush_pcpu; 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci if (info->mm) { 1878c2ecf20Sopenharmony_ci /* 1888c2ecf20Sopenharmony_ci * AddressSpace argument must match the CR3 with PCID bits 1898c2ecf20Sopenharmony_ci * stripped out. 1908c2ecf20Sopenharmony_ci */ 1918c2ecf20Sopenharmony_ci flush->address_space = virt_to_phys(info->mm->pgd); 1928c2ecf20Sopenharmony_ci flush->address_space &= CR3_ADDR_MASK; 1938c2ecf20Sopenharmony_ci flush->flags = 0; 1948c2ecf20Sopenharmony_ci } else { 1958c2ecf20Sopenharmony_ci flush->address_space = 0; 1968c2ecf20Sopenharmony_ci flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 1978c2ecf20Sopenharmony_ci } 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci flush->hv_vp_set.valid_bank_mask = 0; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; 2028c2ecf20Sopenharmony_ci nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); 2038c2ecf20Sopenharmony_ci if (nr_bank < 0) 2048c2ecf20Sopenharmony_ci return U64_MAX; 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci /* 2078c2ecf20Sopenharmony_ci * We can flush not more than max_gvas with one hypercall. Flush the 2088c2ecf20Sopenharmony_ci * whole address space if we were asked to do more. 2098c2ecf20Sopenharmony_ci */ 2108c2ecf20Sopenharmony_ci max_gvas = 2118c2ecf20Sopenharmony_ci (PAGE_SIZE - sizeof(*flush) - nr_bank * 2128c2ecf20Sopenharmony_ci sizeof(flush->hv_vp_set.bank_contents[0])) / 2138c2ecf20Sopenharmony_ci sizeof(flush->gva_list[0]); 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci if (info->end == TLB_FLUSH_ALL) { 2168c2ecf20Sopenharmony_ci flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 2178c2ecf20Sopenharmony_ci status = hv_do_rep_hypercall( 2188c2ecf20Sopenharmony_ci HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 2198c2ecf20Sopenharmony_ci 0, nr_bank, flush, NULL); 2208c2ecf20Sopenharmony_ci } else if (info->end && 2218c2ecf20Sopenharmony_ci ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 2228c2ecf20Sopenharmony_ci status = hv_do_rep_hypercall( 2238c2ecf20Sopenharmony_ci HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 2248c2ecf20Sopenharmony_ci 0, nr_bank, flush, NULL); 2258c2ecf20Sopenharmony_ci } else { 2268c2ecf20Sopenharmony_ci gva_n = fill_gva_list(flush->gva_list, nr_bank, 2278c2ecf20Sopenharmony_ci info->start, info->end); 2288c2ecf20Sopenharmony_ci status = hv_do_rep_hypercall( 2298c2ecf20Sopenharmony_ci HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, 2308c2ecf20Sopenharmony_ci gva_n, nr_bank, flush, NULL); 2318c2ecf20Sopenharmony_ci } 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci return status; 2348c2ecf20Sopenharmony_ci} 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_civoid hyperv_setup_mmu_ops(void) 2378c2ecf20Sopenharmony_ci{ 2388c2ecf20Sopenharmony_ci if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 2398c2ecf20Sopenharmony_ci return; 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci pr_info("Using hypercall for remote TLB flush\n"); 2428c2ecf20Sopenharmony_ci pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others; 2438c2ecf20Sopenharmony_ci pv_ops.mmu.tlb_remove_table = tlb_remove_table; 2448c2ecf20Sopenharmony_ci} 245