162306a36Sopenharmony_ci#define pr_fmt(fmt) "Hyper-V: " fmt 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include <linux/hyperv.h> 462306a36Sopenharmony_ci#include <linux/log2.h> 562306a36Sopenharmony_ci#include <linux/slab.h> 662306a36Sopenharmony_ci#include <linux/types.h> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <asm/fpu/api.h> 962306a36Sopenharmony_ci#include <asm/mshyperv.h> 1062306a36Sopenharmony_ci#include <asm/msr.h> 1162306a36Sopenharmony_ci#include <asm/tlbflush.h> 1262306a36Sopenharmony_ci#include <asm/tlb.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#define CREATE_TRACE_POINTS 1562306a36Sopenharmony_ci#include <asm/trace/hyperv.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci/* Each gva in gva_list encodes up to 4096 pages to flush */ 1862306a36Sopenharmony_ci#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_cistatic u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, 2162306a36Sopenharmony_ci const struct flush_tlb_info *info); 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci/* 2462306a36Sopenharmony_ci * Fills in gva_list starting from offset. Returns the number of items added. 2562306a36Sopenharmony_ci */ 2662306a36Sopenharmony_cistatic inline int fill_gva_list(u64 gva_list[], int offset, 2762306a36Sopenharmony_ci unsigned long start, unsigned long end) 2862306a36Sopenharmony_ci{ 2962306a36Sopenharmony_ci int gva_n = offset; 3062306a36Sopenharmony_ci unsigned long cur = start, diff; 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci do { 3362306a36Sopenharmony_ci diff = end > cur ? end - cur : 0; 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci gva_list[gva_n] = cur & PAGE_MASK; 3662306a36Sopenharmony_ci /* 3762306a36Sopenharmony_ci * Lower 12 bits encode the number of additional 3862306a36Sopenharmony_ci * pages to flush (in addition to the 'cur' page). 3962306a36Sopenharmony_ci */ 4062306a36Sopenharmony_ci if (diff >= HV_TLB_FLUSH_UNIT) { 4162306a36Sopenharmony_ci gva_list[gva_n] |= ~PAGE_MASK; 4262306a36Sopenharmony_ci cur += HV_TLB_FLUSH_UNIT; 4362306a36Sopenharmony_ci } else if (diff) { 4462306a36Sopenharmony_ci gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; 4562306a36Sopenharmony_ci cur = end; 4662306a36Sopenharmony_ci } 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci gva_n++; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci } while (cur < end); 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci return gva_n - offset; 5362306a36Sopenharmony_ci} 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_cistatic bool cpu_is_lazy(int cpu) 5662306a36Sopenharmony_ci{ 5762306a36Sopenharmony_ci return per_cpu(cpu_tlbstate_shared.is_lazy, cpu); 5862306a36Sopenharmony_ci} 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_cistatic void hyperv_flush_tlb_multi(const struct cpumask *cpus, 6162306a36Sopenharmony_ci const struct flush_tlb_info *info) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci int cpu, vcpu, gva_n, max_gvas; 6462306a36Sopenharmony_ci struct hv_tlb_flush *flush; 6562306a36Sopenharmony_ci u64 status; 6662306a36Sopenharmony_ci unsigned long flags; 6762306a36Sopenharmony_ci bool do_lazy = !info->freed_tables; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci trace_hyperv_mmu_flush_tlb_multi(cpus, info); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci if (!hv_hypercall_pg) 7262306a36Sopenharmony_ci goto do_native; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci local_irq_save(flags); 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci flush = *this_cpu_ptr(hyperv_pcpu_input_arg); 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci if (unlikely(!flush)) { 7962306a36Sopenharmony_ci local_irq_restore(flags); 8062306a36Sopenharmony_ci goto do_native; 8162306a36Sopenharmony_ci } 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci if (info->mm) { 8462306a36Sopenharmony_ci /* 8562306a36Sopenharmony_ci * AddressSpace argument must match the CR3 with PCID bits 8662306a36Sopenharmony_ci * stripped out. 8762306a36Sopenharmony_ci */ 8862306a36Sopenharmony_ci flush->address_space = virt_to_phys(info->mm->pgd); 8962306a36Sopenharmony_ci flush->address_space &= CR3_ADDR_MASK; 9062306a36Sopenharmony_ci flush->flags = 0; 9162306a36Sopenharmony_ci } else { 9262306a36Sopenharmony_ci flush->address_space = 0; 9362306a36Sopenharmony_ci flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 9462306a36Sopenharmony_ci } 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci flush->processor_mask = 0; 9762306a36Sopenharmony_ci if (cpumask_equal(cpus, cpu_present_mask)) { 9862306a36Sopenharmony_ci flush->flags |= HV_FLUSH_ALL_PROCESSORS; 9962306a36Sopenharmony_ci } else { 10062306a36Sopenharmony_ci /* 10162306a36Sopenharmony_ci * From the supplied CPU set we need to figure out if we can get 10262306a36Sopenharmony_ci * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} 10362306a36Sopenharmony_ci * hypercalls. This is possible when the highest VP number in 10462306a36Sopenharmony_ci * the set is < 64. As VP numbers are usually in ascending order 10562306a36Sopenharmony_ci * and match Linux CPU ids, here is an optimization: we check 10662306a36Sopenharmony_ci * the VP number for the highest bit in the supplied set first 10762306a36Sopenharmony_ci * so we can quickly find out if using *_EX hypercalls is a 10862306a36Sopenharmony_ci * must. We will also check all VP numbers when walking the 10962306a36Sopenharmony_ci * supplied CPU set to remain correct in all cases. 11062306a36Sopenharmony_ci */ 11162306a36Sopenharmony_ci cpu = cpumask_last(cpus); 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64) 11462306a36Sopenharmony_ci goto do_ex_hypercall; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci for_each_cpu(cpu, cpus) { 11762306a36Sopenharmony_ci if (do_lazy && cpu_is_lazy(cpu)) 11862306a36Sopenharmony_ci continue; 11962306a36Sopenharmony_ci vcpu = hv_cpu_number_to_vp_number(cpu); 12062306a36Sopenharmony_ci if (vcpu == VP_INVAL) { 12162306a36Sopenharmony_ci local_irq_restore(flags); 12262306a36Sopenharmony_ci goto do_native; 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci if (vcpu >= 64) 12662306a36Sopenharmony_ci goto do_ex_hypercall; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci __set_bit(vcpu, (unsigned long *) 12962306a36Sopenharmony_ci &flush->processor_mask); 13062306a36Sopenharmony_ci } 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci /* nothing to flush if 'processor_mask' ends up being empty */ 13362306a36Sopenharmony_ci if (!flush->processor_mask) { 13462306a36Sopenharmony_ci local_irq_restore(flags); 13562306a36Sopenharmony_ci return; 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci } 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci /* 14062306a36Sopenharmony_ci * We can flush not more than max_gvas with one hypercall. Flush the 14162306a36Sopenharmony_ci * whole address space if we were asked to do more. 14262306a36Sopenharmony_ci */ 14362306a36Sopenharmony_ci max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci if (info->end == TLB_FLUSH_ALL) { 14662306a36Sopenharmony_ci flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 14762306a36Sopenharmony_ci status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 14862306a36Sopenharmony_ci flush, NULL); 14962306a36Sopenharmony_ci } else if (info->end && 15062306a36Sopenharmony_ci ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 15162306a36Sopenharmony_ci status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 15262306a36Sopenharmony_ci flush, NULL); 15362306a36Sopenharmony_ci } else { 15462306a36Sopenharmony_ci gva_n = fill_gva_list(flush->gva_list, 0, 15562306a36Sopenharmony_ci info->start, info->end); 15662306a36Sopenharmony_ci status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, 15762306a36Sopenharmony_ci gva_n, 0, flush, NULL); 15862306a36Sopenharmony_ci } 15962306a36Sopenharmony_ci goto check_status; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cido_ex_hypercall: 16262306a36Sopenharmony_ci status = hyperv_flush_tlb_others_ex(cpus, info); 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_cicheck_status: 16562306a36Sopenharmony_ci local_irq_restore(flags); 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci if (hv_result_success(status)) 16862306a36Sopenharmony_ci return; 16962306a36Sopenharmony_cido_native: 17062306a36Sopenharmony_ci native_flush_tlb_multi(cpus, info); 17162306a36Sopenharmony_ci} 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_cistatic u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, 17462306a36Sopenharmony_ci const struct flush_tlb_info *info) 17562306a36Sopenharmony_ci{ 17662306a36Sopenharmony_ci int nr_bank = 0, max_gvas, gva_n; 17762306a36Sopenharmony_ci struct hv_tlb_flush_ex *flush; 17862306a36Sopenharmony_ci u64 status; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) 18162306a36Sopenharmony_ci return HV_STATUS_INVALID_PARAMETER; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci flush = *this_cpu_ptr(hyperv_pcpu_input_arg); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci if (info->mm) { 18662306a36Sopenharmony_ci /* 18762306a36Sopenharmony_ci * AddressSpace argument must match the CR3 with PCID bits 18862306a36Sopenharmony_ci * stripped out. 18962306a36Sopenharmony_ci */ 19062306a36Sopenharmony_ci flush->address_space = virt_to_phys(info->mm->pgd); 19162306a36Sopenharmony_ci flush->address_space &= CR3_ADDR_MASK; 19262306a36Sopenharmony_ci flush->flags = 0; 19362306a36Sopenharmony_ci } else { 19462306a36Sopenharmony_ci flush->address_space = 0; 19562306a36Sopenharmony_ci flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci flush->hv_vp_set.valid_bank_mask = 0; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; 20162306a36Sopenharmony_ci nr_bank = cpumask_to_vpset_skip(&flush->hv_vp_set, cpus, 20262306a36Sopenharmony_ci info->freed_tables ? NULL : cpu_is_lazy); 20362306a36Sopenharmony_ci if (nr_bank < 0) 20462306a36Sopenharmony_ci return HV_STATUS_INVALID_PARAMETER; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci /* 20762306a36Sopenharmony_ci * We can flush not more than max_gvas with one hypercall. Flush the 20862306a36Sopenharmony_ci * whole address space if we were asked to do more. 20962306a36Sopenharmony_ci */ 21062306a36Sopenharmony_ci max_gvas = 21162306a36Sopenharmony_ci (PAGE_SIZE - sizeof(*flush) - nr_bank * 21262306a36Sopenharmony_ci sizeof(flush->hv_vp_set.bank_contents[0])) / 21362306a36Sopenharmony_ci sizeof(flush->gva_list[0]); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (info->end == TLB_FLUSH_ALL) { 21662306a36Sopenharmony_ci flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 21762306a36Sopenharmony_ci status = hv_do_rep_hypercall( 21862306a36Sopenharmony_ci HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 21962306a36Sopenharmony_ci 0, nr_bank, flush, NULL); 22062306a36Sopenharmony_ci } else if (info->end && 22162306a36Sopenharmony_ci ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 22262306a36Sopenharmony_ci status = hv_do_rep_hypercall( 22362306a36Sopenharmony_ci HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 22462306a36Sopenharmony_ci 0, nr_bank, flush, NULL); 22562306a36Sopenharmony_ci } else { 22662306a36Sopenharmony_ci gva_n = fill_gva_list(flush->gva_list, nr_bank, 22762306a36Sopenharmony_ci info->start, info->end); 22862306a36Sopenharmony_ci status = hv_do_rep_hypercall( 22962306a36Sopenharmony_ci HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, 23062306a36Sopenharmony_ci gva_n, nr_bank, flush, NULL); 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci return status; 23462306a36Sopenharmony_ci} 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_civoid hyperv_setup_mmu_ops(void) 23762306a36Sopenharmony_ci{ 23862306a36Sopenharmony_ci if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 23962306a36Sopenharmony_ci return; 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci pr_info("Using hypercall for remote TLB flush\n"); 24262306a36Sopenharmony_ci pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi; 24362306a36Sopenharmony_ci pv_ops.mmu.tlb_remove_table = tlb_remove_table; 24462306a36Sopenharmony_ci} 245