18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * This file contains the routines for flushing entries from the 48c2ecf20Sopenharmony_ci * TLB and MMU hash table. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Derived from arch/ppc64/mm/init.c: 78c2ecf20Sopenharmony_ci * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) 108c2ecf20Sopenharmony_ci * and Cort Dougan (PReP) (cort@cs.nmt.edu) 118c2ecf20Sopenharmony_ci * Copyright (C) 1996 Paul Mackerras 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * Derived from "arch/i386/mm/init.c" 148c2ecf20Sopenharmony_ci * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * Dave Engebretsen <engebret@us.ibm.com> 178c2ecf20Sopenharmony_ci * Rework for PPC64 port. 188c2ecf20Sopenharmony_ci */ 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci#include <linux/kernel.h> 218c2ecf20Sopenharmony_ci#include <linux/mm.h> 228c2ecf20Sopenharmony_ci#include <linux/percpu.h> 238c2ecf20Sopenharmony_ci#include <linux/hardirq.h> 248c2ecf20Sopenharmony_ci#include <asm/tlbflush.h> 258c2ecf20Sopenharmony_ci#include <asm/tlb.h> 268c2ecf20Sopenharmony_ci#include <asm/bug.h> 278c2ecf20Sopenharmony_ci#include <asm/pte-walk.h> 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#include <trace/events/thp.h> 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ciDEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci/* 358c2ecf20Sopenharmony_ci * A linux PTE was changed and the corresponding hash table entry 368c2ecf20Sopenharmony_ci * neesd to be flushed. This function will either perform the flush 378c2ecf20Sopenharmony_ci * immediately or will batch it up if the current CPU has an active 388c2ecf20Sopenharmony_ci * batch on it. 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_civoid hpte_need_flush(struct mm_struct *mm, unsigned long addr, 418c2ecf20Sopenharmony_ci pte_t *ptep, unsigned long pte, int huge) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci unsigned long vpn; 448c2ecf20Sopenharmony_ci struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); 458c2ecf20Sopenharmony_ci unsigned long vsid; 468c2ecf20Sopenharmony_ci unsigned int psize; 478c2ecf20Sopenharmony_ci int ssize; 488c2ecf20Sopenharmony_ci real_pte_t rpte; 498c2ecf20Sopenharmony_ci int i, offset; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci i = batch->index; 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci /* 548c2ecf20Sopenharmony_ci * Get page size (maybe move back to caller). 558c2ecf20Sopenharmony_ci * 568c2ecf20Sopenharmony_ci * NOTE: when using special 64K mappings in 4K environment like 578c2ecf20Sopenharmony_ci * for SPEs, we obtain the page size from the slice, which thus 588c2ecf20Sopenharmony_ci * must still exist (and thus the VMA not reused) at the time 598c2ecf20Sopenharmony_ci * of this call 608c2ecf20Sopenharmony_ci */ 618c2ecf20Sopenharmony_ci if (huge) { 628c2ecf20Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE 638c2ecf20Sopenharmony_ci psize = get_slice_psize(mm, addr); 648c2ecf20Sopenharmony_ci /* Mask the address for the correct page size */ 658c2ecf20Sopenharmony_ci addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); 668c2ecf20Sopenharmony_ci if (unlikely(psize == MMU_PAGE_16G)) 678c2ecf20Sopenharmony_ci offset = PTRS_PER_PUD; 688c2ecf20Sopenharmony_ci else 698c2ecf20Sopenharmony_ci offset = PTRS_PER_PMD; 708c2ecf20Sopenharmony_ci#else 718c2ecf20Sopenharmony_ci BUG(); 728c2ecf20Sopenharmony_ci psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ 738c2ecf20Sopenharmony_ci#endif 748c2ecf20Sopenharmony_ci } else { 758c2ecf20Sopenharmony_ci psize = pte_pagesize_index(mm, addr, pte); 768c2ecf20Sopenharmony_ci /* 778c2ecf20Sopenharmony_ci * Mask the address for the standard page size. If we 788c2ecf20Sopenharmony_ci * have a 64k page kernel, but the hardware does not 798c2ecf20Sopenharmony_ci * support 64k pages, this might be different from the 808c2ecf20Sopenharmony_ci * hardware page size encoded in the slice table. 818c2ecf20Sopenharmony_ci */ 828c2ecf20Sopenharmony_ci addr &= PAGE_MASK; 838c2ecf20Sopenharmony_ci offset = PTRS_PER_PTE; 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci /* Build full vaddr */ 888c2ecf20Sopenharmony_ci if (!is_kernel_addr(addr)) { 898c2ecf20Sopenharmony_ci ssize = user_segment_size(addr); 908c2ecf20Sopenharmony_ci vsid = get_user_vsid(&mm->context, addr, ssize); 918c2ecf20Sopenharmony_ci } else { 928c2ecf20Sopenharmony_ci vsid = get_kernel_vsid(addr, mmu_kernel_ssize); 938c2ecf20Sopenharmony_ci ssize = mmu_kernel_ssize; 948c2ecf20Sopenharmony_ci } 958c2ecf20Sopenharmony_ci WARN_ON(vsid == 0); 968c2ecf20Sopenharmony_ci vpn = hpt_vpn(addr, vsid, ssize); 978c2ecf20Sopenharmony_ci rpte = __real_pte(__pte(pte), ptep, offset); 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci /* 1008c2ecf20Sopenharmony_ci * Check if we have an active batch on this CPU. If not, just 1018c2ecf20Sopenharmony_ci * flush now and return. 1028c2ecf20Sopenharmony_ci */ 1038c2ecf20Sopenharmony_ci if (!batch->active) { 1048c2ecf20Sopenharmony_ci flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm)); 1058c2ecf20Sopenharmony_ci put_cpu_var(ppc64_tlb_batch); 1068c2ecf20Sopenharmony_ci return; 1078c2ecf20Sopenharmony_ci } 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci /* 1108c2ecf20Sopenharmony_ci * This can happen when we are in the middle of a TLB batch and 1118c2ecf20Sopenharmony_ci * we encounter memory pressure (eg copy_page_range when it tries 1128c2ecf20Sopenharmony_ci * to allocate a new pte). If we have to reclaim memory and end 1138c2ecf20Sopenharmony_ci * up scanning and resetting referenced bits then our batch context 1148c2ecf20Sopenharmony_ci * will change mid stream. 1158c2ecf20Sopenharmony_ci * 1168c2ecf20Sopenharmony_ci * We also need to ensure only one page size is present in a given 1178c2ecf20Sopenharmony_ci * batch 1188c2ecf20Sopenharmony_ci */ 1198c2ecf20Sopenharmony_ci if (i != 0 && (mm != batch->mm || batch->psize != psize || 1208c2ecf20Sopenharmony_ci batch->ssize != ssize)) { 1218c2ecf20Sopenharmony_ci __flush_tlb_pending(batch); 1228c2ecf20Sopenharmony_ci i = 0; 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci if (i == 0) { 1258c2ecf20Sopenharmony_ci batch->mm = mm; 1268c2ecf20Sopenharmony_ci batch->psize = psize; 1278c2ecf20Sopenharmony_ci batch->ssize = ssize; 1288c2ecf20Sopenharmony_ci } 1298c2ecf20Sopenharmony_ci batch->pte[i] = rpte; 1308c2ecf20Sopenharmony_ci batch->vpn[i] = vpn; 1318c2ecf20Sopenharmony_ci batch->index = ++i; 1328c2ecf20Sopenharmony_ci if (i >= PPC64_TLB_BATCH_NR) 1338c2ecf20Sopenharmony_ci __flush_tlb_pending(batch); 1348c2ecf20Sopenharmony_ci put_cpu_var(ppc64_tlb_batch); 1358c2ecf20Sopenharmony_ci} 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci/* 1388c2ecf20Sopenharmony_ci * This function is called when terminating an mmu batch or when a batch 1398c2ecf20Sopenharmony_ci * is full. It will perform the flush of all the entries currently stored 1408c2ecf20Sopenharmony_ci * in a batch. 1418c2ecf20Sopenharmony_ci * 1428c2ecf20Sopenharmony_ci * Must be called from within some kind of spinlock/non-preempt region... 1438c2ecf20Sopenharmony_ci */ 1448c2ecf20Sopenharmony_civoid __flush_tlb_pending(struct ppc64_tlb_batch *batch) 1458c2ecf20Sopenharmony_ci{ 1468c2ecf20Sopenharmony_ci int i, local; 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci i = batch->index; 1498c2ecf20Sopenharmony_ci local = mm_is_thread_local(batch->mm); 1508c2ecf20Sopenharmony_ci if (i == 1) 1518c2ecf20Sopenharmony_ci flush_hash_page(batch->vpn[0], batch->pte[0], 1528c2ecf20Sopenharmony_ci batch->psize, batch->ssize, local); 1538c2ecf20Sopenharmony_ci else 1548c2ecf20Sopenharmony_ci flush_hash_range(i, local); 1558c2ecf20Sopenharmony_ci batch->index = 0; 1568c2ecf20Sopenharmony_ci} 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_civoid hash__tlb_flush(struct mmu_gather *tlb) 1598c2ecf20Sopenharmony_ci{ 1608c2ecf20Sopenharmony_ci struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch); 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci /* 1638c2ecf20Sopenharmony_ci * If there's a TLB batch pending, then we must flush it because the 1648c2ecf20Sopenharmony_ci * pages are going to be freed and we really don't want to have a CPU 1658c2ecf20Sopenharmony_ci * access a freed page because it has a stale TLB 1668c2ecf20Sopenharmony_ci */ 1678c2ecf20Sopenharmony_ci if (tlbbatch->index) 1688c2ecf20Sopenharmony_ci __flush_tlb_pending(tlbbatch); 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci put_cpu_var(ppc64_tlb_batch); 1718c2ecf20Sopenharmony_ci} 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci/** 1748c2ecf20Sopenharmony_ci * __flush_hash_table_range - Flush all HPTEs for a given address range 1758c2ecf20Sopenharmony_ci * from the hash table (and the TLB). But keeps 1768c2ecf20Sopenharmony_ci * the linux PTEs intact. 1778c2ecf20Sopenharmony_ci * 1788c2ecf20Sopenharmony_ci * @start : starting address 1798c2ecf20Sopenharmony_ci * @end : ending address (not included in the flush) 1808c2ecf20Sopenharmony_ci * 1818c2ecf20Sopenharmony_ci * This function is mostly to be used by some IO hotplug code in order 1828c2ecf20Sopenharmony_ci * to remove all hash entries from a given address range used to map IO 1838c2ecf20Sopenharmony_ci * space on a removed PCI-PCI bidge without tearing down the full mapping 1848c2ecf20Sopenharmony_ci * since 64K pages may overlap with other bridges when using 64K pages 1858c2ecf20Sopenharmony_ci * with 4K HW pages on IO space. 1868c2ecf20Sopenharmony_ci * 1878c2ecf20Sopenharmony_ci * Because of that usage pattern, it is implemented for small size rather 1888c2ecf20Sopenharmony_ci * than speed. 1898c2ecf20Sopenharmony_ci */ 1908c2ecf20Sopenharmony_civoid __flush_hash_table_range(unsigned long start, unsigned long end) 1918c2ecf20Sopenharmony_ci{ 1928c2ecf20Sopenharmony_ci int hugepage_shift; 1938c2ecf20Sopenharmony_ci unsigned long flags; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci start = ALIGN_DOWN(start, PAGE_SIZE); 1968c2ecf20Sopenharmony_ci end = ALIGN(end, PAGE_SIZE); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci /* 2008c2ecf20Sopenharmony_ci * Note: Normally, we should only ever use a batch within a 2018c2ecf20Sopenharmony_ci * PTE locked section. This violates the rule, but will work 2028c2ecf20Sopenharmony_ci * since we don't actually modify the PTEs, we just flush the 2038c2ecf20Sopenharmony_ci * hash while leaving the PTEs intact (including their reference 2048c2ecf20Sopenharmony_ci * to being hashed). This is not the most performance oriented 2058c2ecf20Sopenharmony_ci * way to do things but is fine for our needs here. 2068c2ecf20Sopenharmony_ci */ 2078c2ecf20Sopenharmony_ci local_irq_save(flags); 2088c2ecf20Sopenharmony_ci arch_enter_lazy_mmu_mode(); 2098c2ecf20Sopenharmony_ci for (; start < end; start += PAGE_SIZE) { 2108c2ecf20Sopenharmony_ci pte_t *ptep = find_init_mm_pte(start, &hugepage_shift); 2118c2ecf20Sopenharmony_ci unsigned long pte; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci if (ptep == NULL) 2148c2ecf20Sopenharmony_ci continue; 2158c2ecf20Sopenharmony_ci pte = pte_val(*ptep); 2168c2ecf20Sopenharmony_ci if (!(pte & H_PAGE_HASHPTE)) 2178c2ecf20Sopenharmony_ci continue; 2188c2ecf20Sopenharmony_ci hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift); 2198c2ecf20Sopenharmony_ci } 2208c2ecf20Sopenharmony_ci arch_leave_lazy_mmu_mode(); 2218c2ecf20Sopenharmony_ci local_irq_restore(flags); 2228c2ecf20Sopenharmony_ci} 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_civoid flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) 2258c2ecf20Sopenharmony_ci{ 2268c2ecf20Sopenharmony_ci pte_t *pte; 2278c2ecf20Sopenharmony_ci pte_t *start_pte; 2288c2ecf20Sopenharmony_ci unsigned long flags; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci addr = ALIGN_DOWN(addr, PMD_SIZE); 2318c2ecf20Sopenharmony_ci /* 2328c2ecf20Sopenharmony_ci * Note: Normally, we should only ever use a batch within a 2338c2ecf20Sopenharmony_ci * PTE locked section. This violates the rule, but will work 2348c2ecf20Sopenharmony_ci * since we don't actually modify the PTEs, we just flush the 2358c2ecf20Sopenharmony_ci * hash while leaving the PTEs intact (including their reference 2368c2ecf20Sopenharmony_ci * to being hashed). This is not the most performance oriented 2378c2ecf20Sopenharmony_ci * way to do things but is fine for our needs here. 2388c2ecf20Sopenharmony_ci */ 2398c2ecf20Sopenharmony_ci local_irq_save(flags); 2408c2ecf20Sopenharmony_ci arch_enter_lazy_mmu_mode(); 2418c2ecf20Sopenharmony_ci start_pte = pte_offset_map(pmd, addr); 2428c2ecf20Sopenharmony_ci for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) { 2438c2ecf20Sopenharmony_ci unsigned long pteval = pte_val(*pte); 2448c2ecf20Sopenharmony_ci if (pteval & H_PAGE_HASHPTE) 2458c2ecf20Sopenharmony_ci hpte_need_flush(mm, addr, pte, pteval, 0); 2468c2ecf20Sopenharmony_ci addr += PAGE_SIZE; 2478c2ecf20Sopenharmony_ci } 2488c2ecf20Sopenharmony_ci arch_leave_lazy_mmu_mode(); 2498c2ecf20Sopenharmony_ci local_irq_restore(flags); 2508c2ecf20Sopenharmony_ci} 251