162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2005, Paul Mackerras, IBM Corporation. 462306a36Sopenharmony_ci * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation. 562306a36Sopenharmony_ci * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/sched.h> 962306a36Sopenharmony_ci#include <linux/mm_types.h> 1062306a36Sopenharmony_ci#include <linux/mm.h> 1162306a36Sopenharmony_ci#include <linux/stop_machine.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <asm/sections.h> 1462306a36Sopenharmony_ci#include <asm/mmu.h> 1562306a36Sopenharmony_ci#include <asm/tlb.h> 1662306a36Sopenharmony_ci#include <asm/firmware.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <mm/mmu_decl.h> 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <trace/events/thp.h> 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE)) 2362306a36Sopenharmony_ci#warning Limited user VSID range means pagetable space is wasted 2462306a36Sopenharmony_ci#endif 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 2762306a36Sopenharmony_ci/* 2862306a36Sopenharmony_ci * vmemmap is the starting address of the virtual address space where 2962306a36Sopenharmony_ci * struct pages are allocated for all possible PFNs present on the system 3062306a36Sopenharmony_ci * including holes and bad memory (hence sparse). These virtual struct 3162306a36Sopenharmony_ci * pages are stored in sequence in this virtual address space irrespective 3262306a36Sopenharmony_ci * of the fact whether the corresponding PFN is valid or not. This achieves 3362306a36Sopenharmony_ci * constant relationship between address of struct page and its PFN. 3462306a36Sopenharmony_ci * 3562306a36Sopenharmony_ci * During boot or memory hotplug operation when a new memory section is 3662306a36Sopenharmony_ci * added, physical memory allocation (including hash table bolting) will 3762306a36Sopenharmony_ci * be performed for the set of struct pages which are part of the memory 3862306a36Sopenharmony_ci * section. This saves memory by not allocating struct pages for PFNs 3962306a36Sopenharmony_ci * which are not valid. 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * ---------------------------------------------- 4262306a36Sopenharmony_ci * | PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES| 4362306a36Sopenharmony_ci * ---------------------------------------------- 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * f000000000000000 c000000000000000 4662306a36Sopenharmony_ci * vmemmap +--------------+ +--------------+ 4762306a36Sopenharmony_ci * + | page struct | +--------------> | page struct | 4862306a36Sopenharmony_ci * | +--------------+ +--------------+ 4962306a36Sopenharmony_ci * | | page struct | +--------------> | page struct | 5062306a36Sopenharmony_ci * | +--------------+ | +--------------+ 5162306a36Sopenharmony_ci * | | page struct | + +------> | page struct | 5262306a36Sopenharmony_ci * | +--------------+ | +--------------+ 5362306a36Sopenharmony_ci * | | page struct | | +--> | page struct | 5462306a36Sopenharmony_ci * | +--------------+ | | +--------------+ 5562306a36Sopenharmony_ci * | | page struct | | | 5662306a36Sopenharmony_ci * | +--------------+ | | 5762306a36Sopenharmony_ci * | | page struct | | | 5862306a36Sopenharmony_ci * | +--------------+ | | 5962306a36Sopenharmony_ci * | | page struct | | | 6062306a36Sopenharmony_ci * | +--------------+ | | 6162306a36Sopenharmony_ci * | | page struct | | | 6262306a36Sopenharmony_ci * | +--------------+ | | 6362306a36Sopenharmony_ci * | | page struct | +-------+ | 6462306a36Sopenharmony_ci * | +--------------+ | 6562306a36Sopenharmony_ci * | | page struct | +-----------+ 6662306a36Sopenharmony_ci * | +--------------+ 6762306a36Sopenharmony_ci * | | page struct | No mapping 6862306a36Sopenharmony_ci * | +--------------+ 6962306a36Sopenharmony_ci * | | page struct | No mapping 7062306a36Sopenharmony_ci * v +--------------+ 7162306a36Sopenharmony_ci * 7262306a36Sopenharmony_ci * ----------------------------------------- 7362306a36Sopenharmony_ci * | RELATION BETWEEN STRUCT PAGES AND PFNS| 7462306a36Sopenharmony_ci * ----------------------------------------- 7562306a36Sopenharmony_ci * 7662306a36Sopenharmony_ci * vmemmap +--------------+ +---------------+ 7762306a36Sopenharmony_ci * + | page struct | +-------------> | PFN | 7862306a36Sopenharmony_ci * | +--------------+ +---------------+ 7962306a36Sopenharmony_ci * | | page struct | +-------------> | PFN | 8062306a36Sopenharmony_ci * | +--------------+ +---------------+ 8162306a36Sopenharmony_ci * | | page struct | +-------------> | PFN | 8262306a36Sopenharmony_ci * | +--------------+ +---------------+ 8362306a36Sopenharmony_ci * | | page struct | +-------------> | PFN | 8462306a36Sopenharmony_ci * | +--------------+ +---------------+ 8562306a36Sopenharmony_ci * | | | 8662306a36Sopenharmony_ci * | +--------------+ 8762306a36Sopenharmony_ci * | | | 8862306a36Sopenharmony_ci * | +--------------+ 8962306a36Sopenharmony_ci * | | | 9062306a36Sopenharmony_ci * | +--------------+ +---------------+ 9162306a36Sopenharmony_ci * | | page struct | +-------------> | PFN | 9262306a36Sopenharmony_ci * | +--------------+ +---------------+ 9362306a36Sopenharmony_ci * | | | 9462306a36Sopenharmony_ci * | +--------------+ 9562306a36Sopenharmony_ci * | | | 9662306a36Sopenharmony_ci * | +--------------+ +---------------+ 9762306a36Sopenharmony_ci * | | page struct | +-------------> | PFN | 9862306a36Sopenharmony_ci * | +--------------+ +---------------+ 9962306a36Sopenharmony_ci * | | page struct | +-------------> | PFN | 10062306a36Sopenharmony_ci * v +--------------+ +---------------+ 10162306a36Sopenharmony_ci */ 10262306a36Sopenharmony_ci/* 10362306a36Sopenharmony_ci * On hash-based CPUs, the vmemmap is bolted in the hash table. 10462306a36Sopenharmony_ci * 10562306a36Sopenharmony_ci */ 10662306a36Sopenharmony_ciint __meminit hash__vmemmap_create_mapping(unsigned long start, 10762306a36Sopenharmony_ci unsigned long page_size, 10862306a36Sopenharmony_ci unsigned long phys) 10962306a36Sopenharmony_ci{ 11062306a36Sopenharmony_ci int rc; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci if ((start + page_size) >= H_VMEMMAP_END) { 11362306a36Sopenharmony_ci pr_warn("Outside the supported range\n"); 11462306a36Sopenharmony_ci return -1; 11562306a36Sopenharmony_ci } 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci rc = htab_bolt_mapping(start, start + page_size, phys, 11862306a36Sopenharmony_ci pgprot_val(PAGE_KERNEL), 11962306a36Sopenharmony_ci mmu_vmemmap_psize, mmu_kernel_ssize); 12062306a36Sopenharmony_ci if (rc < 0) { 12162306a36Sopenharmony_ci int rc2 = htab_remove_mapping(start, start + page_size, 12262306a36Sopenharmony_ci mmu_vmemmap_psize, 12362306a36Sopenharmony_ci mmu_kernel_ssize); 12462306a36Sopenharmony_ci BUG_ON(rc2 && (rc2 != -ENOENT)); 12562306a36Sopenharmony_ci } 12662306a36Sopenharmony_ci return rc; 12762306a36Sopenharmony_ci} 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 13062306a36Sopenharmony_civoid hash__vmemmap_remove_mapping(unsigned long start, 13162306a36Sopenharmony_ci unsigned long page_size) 13262306a36Sopenharmony_ci{ 13362306a36Sopenharmony_ci int rc = htab_remove_mapping(start, start + page_size, 13462306a36Sopenharmony_ci mmu_vmemmap_psize, 13562306a36Sopenharmony_ci mmu_kernel_ssize); 13662306a36Sopenharmony_ci BUG_ON((rc < 0) && (rc != -ENOENT)); 13762306a36Sopenharmony_ci WARN_ON(rc == -ENOENT); 13862306a36Sopenharmony_ci} 13962306a36Sopenharmony_ci#endif 14062306a36Sopenharmony_ci#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci/* 14362306a36Sopenharmony_ci * map_kernel_page currently only called by __ioremap 14462306a36Sopenharmony_ci * map_kernel_page adds an entry to the ioremap page table 14562306a36Sopenharmony_ci * and adds an entry to the HPT, possibly bolting it 14662306a36Sopenharmony_ci */ 14762306a36Sopenharmony_ciint hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci pgd_t *pgdp; 15062306a36Sopenharmony_ci p4d_t *p4dp; 15162306a36Sopenharmony_ci pud_t *pudp; 15262306a36Sopenharmony_ci pmd_t *pmdp; 15362306a36Sopenharmony_ci pte_t *ptep; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); 15662306a36Sopenharmony_ci if (slab_is_available()) { 15762306a36Sopenharmony_ci pgdp = pgd_offset_k(ea); 15862306a36Sopenharmony_ci p4dp = p4d_offset(pgdp, ea); 15962306a36Sopenharmony_ci pudp = pud_alloc(&init_mm, p4dp, ea); 16062306a36Sopenharmony_ci if (!pudp) 16162306a36Sopenharmony_ci return -ENOMEM; 16262306a36Sopenharmony_ci pmdp = pmd_alloc(&init_mm, pudp, ea); 16362306a36Sopenharmony_ci if (!pmdp) 16462306a36Sopenharmony_ci return -ENOMEM; 16562306a36Sopenharmony_ci ptep = pte_alloc_kernel(pmdp, ea); 16662306a36Sopenharmony_ci if (!ptep) 16762306a36Sopenharmony_ci return -ENOMEM; 16862306a36Sopenharmony_ci set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); 16962306a36Sopenharmony_ci } else { 17062306a36Sopenharmony_ci /* 17162306a36Sopenharmony_ci * If the mm subsystem is not fully up, we cannot create a 17262306a36Sopenharmony_ci * linux page table entry for this mapping. Simply bolt an 17362306a36Sopenharmony_ci * entry in the hardware page table. 17462306a36Sopenharmony_ci * 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ci if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot), 17762306a36Sopenharmony_ci mmu_io_psize, mmu_kernel_ssize)) { 17862306a36Sopenharmony_ci printk(KERN_ERR "Failed to do bolted mapping IO " 17962306a36Sopenharmony_ci "memory at %016lx !\n", pa); 18062306a36Sopenharmony_ci return -ENOMEM; 18162306a36Sopenharmony_ci } 18262306a36Sopenharmony_ci } 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci smp_wmb(); 18562306a36Sopenharmony_ci return 0; 18662306a36Sopenharmony_ci} 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ciunsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, 19162306a36Sopenharmony_ci pmd_t *pmdp, unsigned long clr, 19262306a36Sopenharmony_ci unsigned long set) 19362306a36Sopenharmony_ci{ 19462306a36Sopenharmony_ci __be64 old_be, tmp; 19562306a36Sopenharmony_ci unsigned long old; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 19862306a36Sopenharmony_ci WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); 19962306a36Sopenharmony_ci assert_spin_locked(pmd_lockptr(mm, pmdp)); 20062306a36Sopenharmony_ci#endif 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci __asm__ __volatile__( 20362306a36Sopenharmony_ci "1: ldarx %0,0,%3\n\ 20462306a36Sopenharmony_ci and. %1,%0,%6\n\ 20562306a36Sopenharmony_ci bne- 1b \n\ 20662306a36Sopenharmony_ci andc %1,%0,%4 \n\ 20762306a36Sopenharmony_ci or %1,%1,%7\n\ 20862306a36Sopenharmony_ci stdcx. %1,0,%3 \n\ 20962306a36Sopenharmony_ci bne- 1b" 21062306a36Sopenharmony_ci : "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp) 21162306a36Sopenharmony_ci : "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp), 21262306a36Sopenharmony_ci "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set)) 21362306a36Sopenharmony_ci : "cc" ); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci old = be64_to_cpu(old_be); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci trace_hugepage_update_pmd(addr, old, clr, set); 21862306a36Sopenharmony_ci if (old & H_PAGE_HASHPTE) 21962306a36Sopenharmony_ci hpte_do_hugepage_flush(mm, addr, pmdp, old); 22062306a36Sopenharmony_ci return old; 22162306a36Sopenharmony_ci} 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_cipmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, 22462306a36Sopenharmony_ci pmd_t *pmdp) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci pmd_t pmd; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci VM_BUG_ON(address & ~HPAGE_PMD_MASK); 22962306a36Sopenharmony_ci VM_BUG_ON(pmd_trans_huge(*pmdp)); 23062306a36Sopenharmony_ci VM_BUG_ON(pmd_devmap(*pmdp)); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci pmd = *pmdp; 23362306a36Sopenharmony_ci pmd_clear(pmdp); 23462306a36Sopenharmony_ci /* 23562306a36Sopenharmony_ci * Wait for all pending hash_page to finish. This is needed 23662306a36Sopenharmony_ci * in case of subpage collapse. When we collapse normal pages 23762306a36Sopenharmony_ci * to hugepage, we first clear the pmd, then invalidate all 23862306a36Sopenharmony_ci * the PTE entries. The assumption here is that any low level 23962306a36Sopenharmony_ci * page fault will see a none pmd and take the slow path that 24062306a36Sopenharmony_ci * will wait on mmap_lock. But we could very well be in a 24162306a36Sopenharmony_ci * hash_page with local ptep pointer value. Such a hash page 24262306a36Sopenharmony_ci * can result in adding new HPTE entries for normal subpages. 24362306a36Sopenharmony_ci * That means we could be modifying the page content as we 24462306a36Sopenharmony_ci * copy them to a huge page. So wait for parallel hash_page 24562306a36Sopenharmony_ci * to finish before invalidating HPTE entries. We can do this 24662306a36Sopenharmony_ci * by sending an IPI to all the cpus and executing a dummy 24762306a36Sopenharmony_ci * function there. 24862306a36Sopenharmony_ci */ 24962306a36Sopenharmony_ci serialize_against_pte_lookup(vma->vm_mm); 25062306a36Sopenharmony_ci /* 25162306a36Sopenharmony_ci * Now invalidate the hpte entries in the range 25262306a36Sopenharmony_ci * covered by pmd. This make sure we take a 25362306a36Sopenharmony_ci * fault and will find the pmd as none, which will 25462306a36Sopenharmony_ci * result in a major fault which takes mmap_lock and 25562306a36Sopenharmony_ci * hence wait for collapse to complete. Without this 25662306a36Sopenharmony_ci * the __collapse_huge_page_copy can result in copying 25762306a36Sopenharmony_ci * the old content. 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_ci flush_hash_table_pmd_range(vma->vm_mm, &pmd, address); 26062306a36Sopenharmony_ci return pmd; 26162306a36Sopenharmony_ci} 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci/* 26462306a36Sopenharmony_ci * We want to put the pgtable in pmd and use pgtable for tracking 26562306a36Sopenharmony_ci * the base page size hptes 26662306a36Sopenharmony_ci */ 26762306a36Sopenharmony_civoid hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 26862306a36Sopenharmony_ci pgtable_t pgtable) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci pgtable_t *pgtable_slot; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci assert_spin_locked(pmd_lockptr(mm, pmdp)); 27362306a36Sopenharmony_ci /* 27462306a36Sopenharmony_ci * we store the pgtable in the second half of PMD 27562306a36Sopenharmony_ci */ 27662306a36Sopenharmony_ci pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; 27762306a36Sopenharmony_ci *pgtable_slot = pgtable; 27862306a36Sopenharmony_ci /* 27962306a36Sopenharmony_ci * expose the deposited pgtable to other cpus. 28062306a36Sopenharmony_ci * before we set the hugepage PTE at pmd level 28162306a36Sopenharmony_ci * hash fault code looks at the deposted pgtable 28262306a36Sopenharmony_ci * to store hash index values. 28362306a36Sopenharmony_ci */ 28462306a36Sopenharmony_ci smp_wmb(); 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_cipgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci pgtable_t pgtable; 29062306a36Sopenharmony_ci pgtable_t *pgtable_slot; 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci assert_spin_locked(pmd_lockptr(mm, pmdp)); 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; 29562306a36Sopenharmony_ci pgtable = *pgtable_slot; 29662306a36Sopenharmony_ci /* 29762306a36Sopenharmony_ci * Once we withdraw, mark the entry NULL. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_ci *pgtable_slot = NULL; 30062306a36Sopenharmony_ci /* 30162306a36Sopenharmony_ci * We store HPTE information in the deposited PTE fragment. 30262306a36Sopenharmony_ci * zero out the content on withdraw. 30362306a36Sopenharmony_ci */ 30462306a36Sopenharmony_ci memset(pgtable, 0, PTE_FRAG_SIZE); 30562306a36Sopenharmony_ci return pgtable; 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci/* 30962306a36Sopenharmony_ci * A linux hugepage PMD was changed and the corresponding hash table entries 31062306a36Sopenharmony_ci * neesd to be flushed. 31162306a36Sopenharmony_ci */ 31262306a36Sopenharmony_civoid hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, 31362306a36Sopenharmony_ci pmd_t *pmdp, unsigned long old_pmd) 31462306a36Sopenharmony_ci{ 31562306a36Sopenharmony_ci int ssize; 31662306a36Sopenharmony_ci unsigned int psize; 31762306a36Sopenharmony_ci unsigned long vsid; 31862306a36Sopenharmony_ci unsigned long flags = 0; 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci /* get the base page size,vsid and segment size */ 32162306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 32262306a36Sopenharmony_ci psize = get_slice_psize(mm, addr); 32362306a36Sopenharmony_ci BUG_ON(psize == MMU_PAGE_16M); 32462306a36Sopenharmony_ci#endif 32562306a36Sopenharmony_ci if (old_pmd & H_PAGE_COMBO) 32662306a36Sopenharmony_ci psize = MMU_PAGE_4K; 32762306a36Sopenharmony_ci else 32862306a36Sopenharmony_ci psize = MMU_PAGE_64K; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci if (!is_kernel_addr(addr)) { 33162306a36Sopenharmony_ci ssize = user_segment_size(addr); 33262306a36Sopenharmony_ci vsid = get_user_vsid(&mm->context, addr, ssize); 33362306a36Sopenharmony_ci WARN_ON(vsid == 0); 33462306a36Sopenharmony_ci } else { 33562306a36Sopenharmony_ci vsid = get_kernel_vsid(addr, mmu_kernel_ssize); 33662306a36Sopenharmony_ci ssize = mmu_kernel_ssize; 33762306a36Sopenharmony_ci } 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci if (mm_is_thread_local(mm)) 34062306a36Sopenharmony_ci flags |= HPTE_LOCAL_UPDATE; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags); 34362306a36Sopenharmony_ci} 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_cipmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, 34662306a36Sopenharmony_ci unsigned long addr, pmd_t *pmdp) 34762306a36Sopenharmony_ci{ 34862306a36Sopenharmony_ci pmd_t old_pmd; 34962306a36Sopenharmony_ci pgtable_t pgtable; 35062306a36Sopenharmony_ci unsigned long old; 35162306a36Sopenharmony_ci pgtable_t *pgtable_slot; 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0); 35462306a36Sopenharmony_ci old_pmd = __pmd(old); 35562306a36Sopenharmony_ci /* 35662306a36Sopenharmony_ci * We have pmd == none and we are holding page_table_lock. 35762306a36Sopenharmony_ci * So we can safely go and clear the pgtable hash 35862306a36Sopenharmony_ci * index info. 35962306a36Sopenharmony_ci */ 36062306a36Sopenharmony_ci pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; 36162306a36Sopenharmony_ci pgtable = *pgtable_slot; 36262306a36Sopenharmony_ci /* 36362306a36Sopenharmony_ci * Let's zero out old valid and hash index details 36462306a36Sopenharmony_ci * hash fault look at them. 36562306a36Sopenharmony_ci */ 36662306a36Sopenharmony_ci memset(pgtable, 0, PTE_FRAG_SIZE); 36762306a36Sopenharmony_ci return old_pmd; 36862306a36Sopenharmony_ci} 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ciint hash__has_transparent_hugepage(void) 37162306a36Sopenharmony_ci{ 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci if (!mmu_has_feature(MMU_FTR_16M_PAGE)) 37462306a36Sopenharmony_ci return 0; 37562306a36Sopenharmony_ci /* 37662306a36Sopenharmony_ci * We support THP only if PMD_SIZE is 16MB. 37762306a36Sopenharmony_ci */ 37862306a36Sopenharmony_ci if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT) 37962306a36Sopenharmony_ci return 0; 38062306a36Sopenharmony_ci /* 38162306a36Sopenharmony_ci * We need to make sure that we support 16MB hugepage in a segment 38262306a36Sopenharmony_ci * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE 38362306a36Sopenharmony_ci * of 64K. 38462306a36Sopenharmony_ci */ 38562306a36Sopenharmony_ci /* 38662306a36Sopenharmony_ci * If we have 64K HPTE, we will be using that by default 38762306a36Sopenharmony_ci */ 38862306a36Sopenharmony_ci if (mmu_psize_defs[MMU_PAGE_64K].shift && 38962306a36Sopenharmony_ci (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1)) 39062306a36Sopenharmony_ci return 0; 39162306a36Sopenharmony_ci /* 39262306a36Sopenharmony_ci * Ok we only have 4K HPTE 39362306a36Sopenharmony_ci */ 39462306a36Sopenharmony_ci if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1) 39562306a36Sopenharmony_ci return 0; 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci return 1; 39862306a36Sopenharmony_ci} 39962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci#ifdef CONFIG_STRICT_KERNEL_RWX 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_cistruct change_memory_parms { 40662306a36Sopenharmony_ci unsigned long start, end, newpp; 40762306a36Sopenharmony_ci unsigned int step, nr_cpus; 40862306a36Sopenharmony_ci atomic_t master_cpu; 40962306a36Sopenharmony_ci atomic_t cpu_counter; 41062306a36Sopenharmony_ci}; 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci// We'd rather this was on the stack but it has to be in the RMO 41362306a36Sopenharmony_cistatic struct change_memory_parms chmem_parms; 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci// And therefore we need a lock to protect it from concurrent use 41662306a36Sopenharmony_cistatic DEFINE_MUTEX(chmem_lock); 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_cistatic void change_memory_range(unsigned long start, unsigned long end, 41962306a36Sopenharmony_ci unsigned int step, unsigned long newpp) 42062306a36Sopenharmony_ci{ 42162306a36Sopenharmony_ci unsigned long idx; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", 42462306a36Sopenharmony_ci start, end, newpp, step); 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci for (idx = start; idx < end; idx += step) 42762306a36Sopenharmony_ci /* Not sure if we can do much with the return value */ 42862306a36Sopenharmony_ci mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, 42962306a36Sopenharmony_ci mmu_kernel_ssize); 43062306a36Sopenharmony_ci} 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_cistatic int notrace chmem_secondary_loop(struct change_memory_parms *parms) 43362306a36Sopenharmony_ci{ 43462306a36Sopenharmony_ci unsigned long msr, tmp, flags; 43562306a36Sopenharmony_ci int *p; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci p = &parms->cpu_counter.counter; 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci local_irq_save(flags); 44062306a36Sopenharmony_ci hard_irq_disable(); 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci asm volatile ( 44362306a36Sopenharmony_ci // Switch to real mode and leave interrupts off 44462306a36Sopenharmony_ci "mfmsr %[msr] ;" 44562306a36Sopenharmony_ci "li %[tmp], %[MSR_IR_DR] ;" 44662306a36Sopenharmony_ci "andc %[tmp], %[msr], %[tmp] ;" 44762306a36Sopenharmony_ci "mtmsrd %[tmp] ;" 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci // Tell the master we are in real mode 45062306a36Sopenharmony_ci "1: " 45162306a36Sopenharmony_ci "lwarx %[tmp], 0, %[p] ;" 45262306a36Sopenharmony_ci "addic %[tmp], %[tmp], -1 ;" 45362306a36Sopenharmony_ci "stwcx. %[tmp], 0, %[p] ;" 45462306a36Sopenharmony_ci "bne- 1b ;" 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci // Spin until the counter goes to zero 45762306a36Sopenharmony_ci "2: ;" 45862306a36Sopenharmony_ci "lwz %[tmp], 0(%[p]) ;" 45962306a36Sopenharmony_ci "cmpwi %[tmp], 0 ;" 46062306a36Sopenharmony_ci "bne- 2b ;" 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci // Switch back to virtual mode 46362306a36Sopenharmony_ci "mtmsrd %[msr] ;" 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci : // outputs 46662306a36Sopenharmony_ci [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p) 46762306a36Sopenharmony_ci : // inputs 46862306a36Sopenharmony_ci [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR) 46962306a36Sopenharmony_ci : // clobbers 47062306a36Sopenharmony_ci "cc", "xer" 47162306a36Sopenharmony_ci ); 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci local_irq_restore(flags); 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci return 0; 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cistatic int change_memory_range_fn(void *data) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci struct change_memory_parms *parms = data; 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci // First CPU goes through, all others wait. 48362306a36Sopenharmony_ci if (atomic_xchg(&parms->master_cpu, 1) == 1) 48462306a36Sopenharmony_ci return chmem_secondary_loop(parms); 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci // Wait for all but one CPU (this one) to call-in 48762306a36Sopenharmony_ci while (atomic_read(&parms->cpu_counter) > 1) 48862306a36Sopenharmony_ci barrier(); 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci change_memory_range(parms->start, parms->end, parms->step, parms->newpp); 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci mb(); 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci // Signal the other CPUs that we're done 49562306a36Sopenharmony_ci atomic_dec(&parms->cpu_counter); 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci return 0; 49862306a36Sopenharmony_ci} 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_cistatic bool hash__change_memory_range(unsigned long start, unsigned long end, 50162306a36Sopenharmony_ci unsigned long newpp) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci unsigned int step, shift; 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci shift = mmu_psize_defs[mmu_linear_psize].shift; 50662306a36Sopenharmony_ci step = 1 << shift; 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci start = ALIGN_DOWN(start, step); 50962306a36Sopenharmony_ci end = ALIGN(end, step); // aligns up 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci if (start >= end) 51262306a36Sopenharmony_ci return false; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci if (firmware_has_feature(FW_FEATURE_LPAR)) { 51562306a36Sopenharmony_ci mutex_lock(&chmem_lock); 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci chmem_parms.start = start; 51862306a36Sopenharmony_ci chmem_parms.end = end; 51962306a36Sopenharmony_ci chmem_parms.step = step; 52062306a36Sopenharmony_ci chmem_parms.newpp = newpp; 52162306a36Sopenharmony_ci atomic_set(&chmem_parms.master_cpu, 0); 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci cpus_read_lock(); 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci atomic_set(&chmem_parms.cpu_counter, num_online_cpus()); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci // Ensure state is consistent before we call the other CPUs 52862306a36Sopenharmony_ci mb(); 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms, 53162306a36Sopenharmony_ci cpu_online_mask); 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci cpus_read_unlock(); 53462306a36Sopenharmony_ci mutex_unlock(&chmem_lock); 53562306a36Sopenharmony_ci } else 53662306a36Sopenharmony_ci change_memory_range(start, end, step, newpp); 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci return true; 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_civoid hash__mark_rodata_ro(void) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci unsigned long start, end, pp; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci start = (unsigned long)_stext; 54662306a36Sopenharmony_ci end = (unsigned long)__end_rodata; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY); 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci WARN_ON(!hash__change_memory_range(start, end, pp)); 55162306a36Sopenharmony_ci} 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_civoid hash__mark_initmem_nx(void) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci unsigned long start, end, pp; 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci start = (unsigned long)__init_begin; 55862306a36Sopenharmony_ci end = (unsigned long)__init_end; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci WARN_ON(!hash__change_memory_range(start, end, pp)); 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci#endif 565