1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 5 */ 6 7#include <linux/sched.h> 8#include <linux/kernel.h> 9#include <linux/errno.h> 10#include <linux/gfp.h> 11#include <linux/mm.h> 12#include <linux/swap.h> 13#include <linux/smp.h> 14#include <linux/spinlock.h> 15#include <linux/rcupdate.h> 16#include <linux/slab.h> 17#include <linux/swapops.h> 18#include <linux/sysctl.h> 19#include <linux/ksm.h> 20#include <linux/mman.h> 21 22#include <asm/tlb.h> 23#include <asm/tlbflush.h> 24#include <asm/mmu_context.h> 25#include <asm/page-states.h> 26 27pgprot_t pgprot_writecombine(pgprot_t prot) 28{ 29 /* 30 * mio_wb_bit_mask may be set on a different CPU, but it is only set 31 * once at init and only read afterwards. 32 */ 33 return __pgprot(pgprot_val(prot) | mio_wb_bit_mask); 34} 35EXPORT_SYMBOL_GPL(pgprot_writecombine); 36 37pgprot_t pgprot_writethrough(pgprot_t prot) 38{ 39 /* 40 * mio_wb_bit_mask may be set on a different CPU, but it is only set 41 * once at init and only read afterwards. 42 */ 43 return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); 44} 45EXPORT_SYMBOL_GPL(pgprot_writethrough); 46 47static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, 48 pte_t *ptep, int nodat) 49{ 50 unsigned long opt, asce; 51 52 if (MACHINE_HAS_TLB_GUEST) { 53 opt = 0; 54 asce = READ_ONCE(mm->context.gmap_asce); 55 if (asce == 0UL || nodat) 56 opt |= IPTE_NODAT; 57 if (asce != -1UL) { 58 asce = asce ? : mm->context.asce; 59 opt |= IPTE_GUEST_ASCE; 60 } 61 __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); 62 } else { 63 __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); 64 } 65} 66 67static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, 68 pte_t *ptep, int nodat) 69{ 70 unsigned long opt, asce; 71 72 if (MACHINE_HAS_TLB_GUEST) { 73 opt = 0; 74 asce = READ_ONCE(mm->context.gmap_asce); 75 if (asce == 0UL || nodat) 76 opt |= IPTE_NODAT; 77 if (asce != -1UL) { 78 asce = asce ? : mm->context.asce; 79 opt |= IPTE_GUEST_ASCE; 80 } 81 __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); 82 } else { 83 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 84 } 85} 86 87static inline pte_t ptep_flush_direct(struct mm_struct *mm, 88 unsigned long addr, pte_t *ptep, 89 int nodat) 90{ 91 pte_t old; 92 93 old = *ptep; 94 if (unlikely(pte_val(old) & _PAGE_INVALID)) 95 return old; 96 atomic_inc(&mm->context.flush_count); 97 if (MACHINE_HAS_TLB_LC && 98 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 99 ptep_ipte_local(mm, addr, ptep, nodat); 100 else 101 ptep_ipte_global(mm, addr, ptep, nodat); 102 atomic_dec(&mm->context.flush_count); 103 return old; 104} 105 106static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 107 unsigned long addr, pte_t *ptep, 108 int nodat) 109{ 110 pte_t old; 111 112 old = *ptep; 113 if (unlikely(pte_val(old) & _PAGE_INVALID)) 114 return old; 115 atomic_inc(&mm->context.flush_count); 116 if (cpumask_equal(&mm->context.cpu_attach_mask, 117 cpumask_of(smp_processor_id()))) { 118 pte_val(*ptep) |= _PAGE_INVALID; 119 mm->context.flush_mm = 1; 120 } else 121 ptep_ipte_global(mm, addr, ptep, nodat); 122 atomic_dec(&mm->context.flush_count); 123 return old; 124} 125 126static inline pgste_t pgste_get_lock(pte_t *ptep) 127{ 128 unsigned long new = 0; 129#ifdef CONFIG_PGSTE 130 unsigned long old; 131 132 asm( 133 " lg %0,%2\n" 134 "0: lgr %1,%0\n" 135 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 136 " oihh %1,0x0080\n" /* set PCL bit in new */ 137 " csg %0,%1,%2\n" 138 " jl 0b\n" 139 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 140 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 141#endif 142 return __pgste(new); 143} 144 145static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 146{ 147#ifdef CONFIG_PGSTE 148 asm( 149 " nihh %1,0xff7f\n" /* clear PCL bit */ 150 " stg %1,%0\n" 151 : "=Q" (ptep[PTRS_PER_PTE]) 152 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 153 : "cc", "memory"); 154#endif 155} 156 157static inline pgste_t pgste_get(pte_t *ptep) 158{ 159 unsigned long pgste = 0; 160#ifdef CONFIG_PGSTE 161 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 162#endif 163 return __pgste(pgste); 164} 165 166static inline void pgste_set(pte_t *ptep, pgste_t pgste) 167{ 168#ifdef CONFIG_PGSTE 169 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 170#endif 171} 172 173static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 174 struct mm_struct *mm) 175{ 176#ifdef CONFIG_PGSTE 177 unsigned long address, bits, skey; 178 179 if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID) 180 return pgste; 181 address = pte_val(pte) & PAGE_MASK; 182 skey = (unsigned long) page_get_storage_key(address); 183 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 184 /* Transfer page changed & referenced bit to guest bits in pgste */ 185 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 186 /* Copy page access key and fetch protection bit to pgste */ 187 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 188 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 189#endif 190 return pgste; 191 192} 193 194static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 195 struct mm_struct *mm) 196{ 197#ifdef CONFIG_PGSTE 198 unsigned long address; 199 unsigned long nkey; 200 201 if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID) 202 return; 203 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 204 address = pte_val(entry) & PAGE_MASK; 205 /* 206 * Set page access key and fetch protection bit from pgste. 207 * The guest C/R information is still in the PGSTE, set real 208 * key C/R to 0. 209 */ 210 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 211 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 212 page_set_storage_key(address, nkey, 0); 213#endif 214} 215 216static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 217{ 218#ifdef CONFIG_PGSTE 219 if ((pte_val(entry) & _PAGE_PRESENT) && 220 (pte_val(entry) & _PAGE_WRITE) && 221 !(pte_val(entry) & _PAGE_INVALID)) { 222 if (!MACHINE_HAS_ESOP) { 223 /* 224 * Without enhanced suppression-on-protection force 225 * the dirty bit on for all writable ptes. 226 */ 227 pte_val(entry) |= _PAGE_DIRTY; 228 pte_val(entry) &= ~_PAGE_PROTECT; 229 } 230 if (!(pte_val(entry) & _PAGE_PROTECT)) 231 /* This pte allows write access, set user-dirty */ 232 pgste_val(pgste) |= PGSTE_UC_BIT; 233 } 234#endif 235 *ptep = entry; 236 return pgste; 237} 238 239static inline pgste_t pgste_pte_notify(struct mm_struct *mm, 240 unsigned long addr, 241 pte_t *ptep, pgste_t pgste) 242{ 243#ifdef CONFIG_PGSTE 244 unsigned long bits; 245 246 bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); 247 if (bits) { 248 pgste_val(pgste) ^= bits; 249 ptep_notify(mm, addr, ptep, bits); 250 } 251#endif 252 return pgste; 253} 254 255static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 256 unsigned long addr, pte_t *ptep) 257{ 258 pgste_t pgste = __pgste(0); 259 260 if (mm_has_pgste(mm)) { 261 pgste = pgste_get_lock(ptep); 262 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 263 } 264 return pgste; 265} 266 267static inline pte_t ptep_xchg_commit(struct mm_struct *mm, 268 unsigned long addr, pte_t *ptep, 269 pgste_t pgste, pte_t old, pte_t new) 270{ 271 if (mm_has_pgste(mm)) { 272 if (pte_val(old) & _PAGE_INVALID) 273 pgste_set_key(ptep, pgste, new, mm); 274 if (pte_val(new) & _PAGE_INVALID) { 275 pgste = pgste_update_all(old, pgste, mm); 276 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 277 _PGSTE_GPS_USAGE_UNUSED) 278 pte_val(old) |= _PAGE_UNUSED; 279 } 280 pgste = pgste_set_pte(ptep, pgste, new); 281 pgste_set_unlock(ptep, pgste); 282 } else { 283 *ptep = new; 284 } 285 return old; 286} 287 288pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 289 pte_t *ptep, pte_t new) 290{ 291 pgste_t pgste; 292 pte_t old; 293 int nodat; 294 295 preempt_disable(); 296 pgste = ptep_xchg_start(mm, addr, ptep); 297 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 298 old = ptep_flush_direct(mm, addr, ptep, nodat); 299 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 300 preempt_enable(); 301 return old; 302} 303EXPORT_SYMBOL(ptep_xchg_direct); 304 305pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 306 pte_t *ptep, pte_t new) 307{ 308 pgste_t pgste; 309 pte_t old; 310 int nodat; 311 312 preempt_disable(); 313 pgste = ptep_xchg_start(mm, addr, ptep); 314 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 315 old = ptep_flush_lazy(mm, addr, ptep, nodat); 316 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 317 preempt_enable(); 318 return old; 319} 320EXPORT_SYMBOL(ptep_xchg_lazy); 321 322pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, 323 pte_t *ptep) 324{ 325 pgste_t pgste; 326 pte_t old; 327 int nodat; 328 struct mm_struct *mm = vma->vm_mm; 329 330 preempt_disable(); 331 pgste = ptep_xchg_start(mm, addr, ptep); 332 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 333 old = ptep_flush_lazy(mm, addr, ptep, nodat); 334 if (mm_has_pgste(mm)) { 335 pgste = pgste_update_all(old, pgste, mm); 336 pgste_set(ptep, pgste); 337 } 338 return old; 339} 340 341void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, 342 pte_t *ptep, pte_t old_pte, pte_t pte) 343{ 344 pgste_t pgste; 345 struct mm_struct *mm = vma->vm_mm; 346 347 if (!MACHINE_HAS_NX) 348 pte_val(pte) &= ~_PAGE_NOEXEC; 349 if (mm_has_pgste(mm)) { 350 pgste = pgste_get(ptep); 351 pgste_set_key(ptep, pgste, pte, mm); 352 pgste = pgste_set_pte(ptep, pgste, pte); 353 pgste_set_unlock(ptep, pgste); 354 } else { 355 *ptep = pte; 356 } 357 preempt_enable(); 358} 359 360static inline void pmdp_idte_local(struct mm_struct *mm, 361 unsigned long addr, pmd_t *pmdp) 362{ 363 if (MACHINE_HAS_TLB_GUEST) 364 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 365 mm->context.asce, IDTE_LOCAL); 366 else 367 __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); 368 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 369 gmap_pmdp_idte_local(mm, addr); 370} 371 372static inline void pmdp_idte_global(struct mm_struct *mm, 373 unsigned long addr, pmd_t *pmdp) 374{ 375 if (MACHINE_HAS_TLB_GUEST) { 376 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 377 mm->context.asce, IDTE_GLOBAL); 378 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 379 gmap_pmdp_idte_global(mm, addr); 380 } else if (MACHINE_HAS_IDTE) { 381 __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); 382 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 383 gmap_pmdp_idte_global(mm, addr); 384 } else { 385 __pmdp_csp(pmdp); 386 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 387 gmap_pmdp_csp(mm, addr); 388 } 389} 390 391static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 392 unsigned long addr, pmd_t *pmdp) 393{ 394 pmd_t old; 395 396 old = *pmdp; 397 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 398 return old; 399 atomic_inc(&mm->context.flush_count); 400 if (MACHINE_HAS_TLB_LC && 401 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 402 pmdp_idte_local(mm, addr, pmdp); 403 else 404 pmdp_idte_global(mm, addr, pmdp); 405 atomic_dec(&mm->context.flush_count); 406 return old; 407} 408 409static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 410 unsigned long addr, pmd_t *pmdp) 411{ 412 pmd_t old; 413 414 old = *pmdp; 415 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 416 return old; 417 atomic_inc(&mm->context.flush_count); 418 if (cpumask_equal(&mm->context.cpu_attach_mask, 419 cpumask_of(smp_processor_id()))) { 420 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 421 mm->context.flush_mm = 1; 422 if (mm_has_pgste(mm)) 423 gmap_pmdp_invalidate(mm, addr); 424 } else { 425 pmdp_idte_global(mm, addr, pmdp); 426 } 427 atomic_dec(&mm->context.flush_count); 428 return old; 429} 430 431#ifdef CONFIG_PGSTE 432static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) 433{ 434 pgd_t *pgd; 435 p4d_t *p4d; 436 pud_t *pud; 437 pmd_t *pmd; 438 439 pgd = pgd_offset(mm, addr); 440 p4d = p4d_alloc(mm, pgd, addr); 441 if (!p4d) 442 return NULL; 443 pud = pud_alloc(mm, p4d, addr); 444 if (!pud) 445 return NULL; 446 pmd = pmd_alloc(mm, pud, addr); 447 return pmd; 448} 449#endif 450 451pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 452 pmd_t *pmdp, pmd_t new) 453{ 454 pmd_t old; 455 456 preempt_disable(); 457 old = pmdp_flush_direct(mm, addr, pmdp); 458 *pmdp = new; 459 preempt_enable(); 460 return old; 461} 462EXPORT_SYMBOL(pmdp_xchg_direct); 463 464pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 465 pmd_t *pmdp, pmd_t new) 466{ 467 pmd_t old; 468 469 preempt_disable(); 470 old = pmdp_flush_lazy(mm, addr, pmdp); 471 *pmdp = new; 472 preempt_enable(); 473 return old; 474} 475EXPORT_SYMBOL(pmdp_xchg_lazy); 476 477static inline void pudp_idte_local(struct mm_struct *mm, 478 unsigned long addr, pud_t *pudp) 479{ 480 if (MACHINE_HAS_TLB_GUEST) 481 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 482 mm->context.asce, IDTE_LOCAL); 483 else 484 __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); 485} 486 487static inline void pudp_idte_global(struct mm_struct *mm, 488 unsigned long addr, pud_t *pudp) 489{ 490 if (MACHINE_HAS_TLB_GUEST) 491 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 492 mm->context.asce, IDTE_GLOBAL); 493 else if (MACHINE_HAS_IDTE) 494 __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); 495 else 496 /* 497 * Invalid bit position is the same for pmd and pud, so we can 498 * re-use _pmd_csp() here 499 */ 500 __pmdp_csp((pmd_t *) pudp); 501} 502 503static inline pud_t pudp_flush_direct(struct mm_struct *mm, 504 unsigned long addr, pud_t *pudp) 505{ 506 pud_t old; 507 508 old = *pudp; 509 if (pud_val(old) & _REGION_ENTRY_INVALID) 510 return old; 511 atomic_inc(&mm->context.flush_count); 512 if (MACHINE_HAS_TLB_LC && 513 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 514 pudp_idte_local(mm, addr, pudp); 515 else 516 pudp_idte_global(mm, addr, pudp); 517 atomic_dec(&mm->context.flush_count); 518 return old; 519} 520 521pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 522 pud_t *pudp, pud_t new) 523{ 524 pud_t old; 525 526 preempt_disable(); 527 old = pudp_flush_direct(mm, addr, pudp); 528 *pudp = new; 529 preempt_enable(); 530 return old; 531} 532EXPORT_SYMBOL(pudp_xchg_direct); 533 534#ifdef CONFIG_TRANSPARENT_HUGEPAGE 535void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 536 pgtable_t pgtable) 537{ 538 struct list_head *lh = (struct list_head *) pgtable; 539 540 assert_spin_locked(pmd_lockptr(mm, pmdp)); 541 542 /* FIFO */ 543 if (!pmd_huge_pte(mm, pmdp)) 544 INIT_LIST_HEAD(lh); 545 else 546 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 547 pmd_huge_pte(mm, pmdp) = pgtable; 548} 549 550pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 551{ 552 struct list_head *lh; 553 pgtable_t pgtable; 554 pte_t *ptep; 555 556 assert_spin_locked(pmd_lockptr(mm, pmdp)); 557 558 /* FIFO */ 559 pgtable = pmd_huge_pte(mm, pmdp); 560 lh = (struct list_head *) pgtable; 561 if (list_empty(lh)) 562 pmd_huge_pte(mm, pmdp) = NULL; 563 else { 564 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 565 list_del(lh); 566 } 567 ptep = (pte_t *) pgtable; 568 pte_val(*ptep) = _PAGE_INVALID; 569 ptep++; 570 pte_val(*ptep) = _PAGE_INVALID; 571 return pgtable; 572} 573#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 574 575#ifdef CONFIG_PGSTE 576void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 577 pte_t *ptep, pte_t entry) 578{ 579 pgste_t pgste; 580 581 /* the mm_has_pgste() check is done in set_pte_at() */ 582 preempt_disable(); 583 pgste = pgste_get_lock(ptep); 584 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 585 pgste_set_key(ptep, pgste, entry, mm); 586 pgste = pgste_set_pte(ptep, pgste, entry); 587 pgste_set_unlock(ptep, pgste); 588 preempt_enable(); 589} 590 591void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 592{ 593 pgste_t pgste; 594 595 preempt_disable(); 596 pgste = pgste_get_lock(ptep); 597 pgste_val(pgste) |= PGSTE_IN_BIT; 598 pgste_set_unlock(ptep, pgste); 599 preempt_enable(); 600} 601 602/** 603 * ptep_force_prot - change access rights of a locked pte 604 * @mm: pointer to the process mm_struct 605 * @addr: virtual address in the guest address space 606 * @ptep: pointer to the page table entry 607 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE 608 * @bit: pgste bit to set (e.g. for notification) 609 * 610 * Returns 0 if the access rights were changed and -EAGAIN if the current 611 * and requested access rights are incompatible. 612 */ 613int ptep_force_prot(struct mm_struct *mm, unsigned long addr, 614 pte_t *ptep, int prot, unsigned long bit) 615{ 616 pte_t entry; 617 pgste_t pgste; 618 int pte_i, pte_p, nodat; 619 620 pgste = pgste_get_lock(ptep); 621 entry = *ptep; 622 /* Check pte entry after all locks have been acquired */ 623 pte_i = pte_val(entry) & _PAGE_INVALID; 624 pte_p = pte_val(entry) & _PAGE_PROTECT; 625 if ((pte_i && (prot != PROT_NONE)) || 626 (pte_p && (prot & PROT_WRITE))) { 627 pgste_set_unlock(ptep, pgste); 628 return -EAGAIN; 629 } 630 /* Change access rights and set pgste bit */ 631 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 632 if (prot == PROT_NONE && !pte_i) { 633 ptep_flush_direct(mm, addr, ptep, nodat); 634 pgste = pgste_update_all(entry, pgste, mm); 635 pte_val(entry) |= _PAGE_INVALID; 636 } 637 if (prot == PROT_READ && !pte_p) { 638 ptep_flush_direct(mm, addr, ptep, nodat); 639 pte_val(entry) &= ~_PAGE_INVALID; 640 pte_val(entry) |= _PAGE_PROTECT; 641 } 642 pgste_val(pgste) |= bit; 643 pgste = pgste_set_pte(ptep, pgste, entry); 644 pgste_set_unlock(ptep, pgste); 645 return 0; 646} 647 648int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, 649 pte_t *sptep, pte_t *tptep, pte_t pte) 650{ 651 pgste_t spgste, tpgste; 652 pte_t spte, tpte; 653 int rc = -EAGAIN; 654 655 if (!(pte_val(*tptep) & _PAGE_INVALID)) 656 return 0; /* already shadowed */ 657 spgste = pgste_get_lock(sptep); 658 spte = *sptep; 659 if (!(pte_val(spte) & _PAGE_INVALID) && 660 !((pte_val(spte) & _PAGE_PROTECT) && 661 !(pte_val(pte) & _PAGE_PROTECT))) { 662 pgste_val(spgste) |= PGSTE_VSIE_BIT; 663 tpgste = pgste_get_lock(tptep); 664 pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | 665 (pte_val(pte) & _PAGE_PROTECT); 666 /* don't touch the storage key - it belongs to parent pgste */ 667 tpgste = pgste_set_pte(tptep, tpgste, tpte); 668 pgste_set_unlock(tptep, tpgste); 669 rc = 1; 670 } 671 pgste_set_unlock(sptep, spgste); 672 return rc; 673} 674 675void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 676{ 677 pgste_t pgste; 678 int nodat; 679 680 pgste = pgste_get_lock(ptep); 681 /* notifier is called by the caller */ 682 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 683 ptep_flush_direct(mm, saddr, ptep, nodat); 684 /* don't touch the storage key - it belongs to parent pgste */ 685 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 686 pgste_set_unlock(ptep, pgste); 687} 688 689static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 690{ 691 if (!non_swap_entry(entry)) 692 dec_mm_counter(mm, MM_SWAPENTS); 693 else if (is_migration_entry(entry)) { 694 struct page *page = migration_entry_to_page(entry); 695 696 dec_mm_counter(mm, mm_counter(page)); 697 } 698 free_swap_and_cache(entry); 699} 700 701void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 702 pte_t *ptep, int reset) 703{ 704 unsigned long pgstev; 705 pgste_t pgste; 706 pte_t pte; 707 708 /* Zap unused and logically-zero pages */ 709 preempt_disable(); 710 pgste = pgste_get_lock(ptep); 711 pgstev = pgste_val(pgste); 712 pte = *ptep; 713 if (!reset && pte_swap(pte) && 714 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 715 (pgstev & _PGSTE_GPS_ZERO))) { 716 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 717 pte_clear(mm, addr, ptep); 718 } 719 if (reset) 720 pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); 721 pgste_set_unlock(ptep, pgste); 722 preempt_enable(); 723} 724 725void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 726{ 727 unsigned long ptev; 728 pgste_t pgste; 729 730 /* Clear storage key ACC and F, but set R/C */ 731 preempt_disable(); 732 pgste = pgste_get_lock(ptep); 733 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 734 pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; 735 ptev = pte_val(*ptep); 736 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 737 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); 738 pgste_set_unlock(ptep, pgste); 739 preempt_enable(); 740} 741 742/* 743 * Test and reset if a guest page is dirty 744 */ 745bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, 746 pte_t *ptep) 747{ 748 pgste_t pgste; 749 pte_t pte; 750 bool dirty; 751 int nodat; 752 753 pgste = pgste_get_lock(ptep); 754 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 755 pgste_val(pgste) &= ~PGSTE_UC_BIT; 756 pte = *ptep; 757 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 758 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 759 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 760 ptep_ipte_global(mm, addr, ptep, nodat); 761 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 762 pte_val(pte) |= _PAGE_PROTECT; 763 else 764 pte_val(pte) |= _PAGE_INVALID; 765 *ptep = pte; 766 } 767 pgste_set_unlock(ptep, pgste); 768 return dirty; 769} 770EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); 771 772int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 773 unsigned char key, bool nq) 774{ 775 unsigned long keyul, paddr; 776 spinlock_t *ptl; 777 pgste_t old, new; 778 pmd_t *pmdp; 779 pte_t *ptep; 780 781 pmdp = pmd_alloc_map(mm, addr); 782 if (unlikely(!pmdp)) 783 return -EFAULT; 784 785 ptl = pmd_lock(mm, pmdp); 786 if (!pmd_present(*pmdp)) { 787 spin_unlock(ptl); 788 return -EFAULT; 789 } 790 791 if (pmd_large(*pmdp)) { 792 paddr = pmd_val(*pmdp) & HPAGE_MASK; 793 paddr |= addr & ~HPAGE_MASK; 794 /* 795 * Huge pmds need quiescing operations, they are 796 * always mapped. 797 */ 798 page_set_storage_key(paddr, key, 1); 799 spin_unlock(ptl); 800 return 0; 801 } 802 spin_unlock(ptl); 803 804 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 805 if (unlikely(!ptep)) 806 return -EFAULT; 807 808 new = old = pgste_get_lock(ptep); 809 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 810 PGSTE_ACC_BITS | PGSTE_FP_BIT); 811 keyul = (unsigned long) key; 812 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 813 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 814 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 815 unsigned long bits, skey; 816 817 paddr = pte_val(*ptep) & PAGE_MASK; 818 skey = (unsigned long) page_get_storage_key(paddr); 819 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 820 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 821 /* Set storage key ACC and FP */ 822 page_set_storage_key(paddr, skey, !nq); 823 /* Merge host changed & referenced into pgste */ 824 pgste_val(new) |= bits << 52; 825 } 826 /* changing the guest storage key is considered a change of the page */ 827 if ((pgste_val(new) ^ pgste_val(old)) & 828 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 829 pgste_val(new) |= PGSTE_UC_BIT; 830 831 pgste_set_unlock(ptep, new); 832 pte_unmap_unlock(ptep, ptl); 833 return 0; 834} 835EXPORT_SYMBOL(set_guest_storage_key); 836 837/** 838 * Conditionally set a guest storage key (handling csske). 839 * oldkey will be updated when either mr or mc is set and a pointer is given. 840 * 841 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest 842 * storage key was updated and -EFAULT on access errors. 843 */ 844int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 845 unsigned char key, unsigned char *oldkey, 846 bool nq, bool mr, bool mc) 847{ 848 unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; 849 int rc; 850 851 /* we can drop the pgste lock between getting and setting the key */ 852 if (mr | mc) { 853 rc = get_guest_storage_key(current->mm, addr, &tmp); 854 if (rc) 855 return rc; 856 if (oldkey) 857 *oldkey = tmp; 858 if (!mr) 859 mask |= _PAGE_REFERENCED; 860 if (!mc) 861 mask |= _PAGE_CHANGED; 862 if (!((tmp ^ key) & mask)) 863 return 0; 864 } 865 rc = set_guest_storage_key(current->mm, addr, key, nq); 866 return rc < 0 ? rc : 1; 867} 868EXPORT_SYMBOL(cond_set_guest_storage_key); 869 870/** 871 * Reset a guest reference bit (rrbe), returning the reference and changed bit. 872 * 873 * Returns < 0 in case of error, otherwise the cc to be reported to the guest. 874 */ 875int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) 876{ 877 spinlock_t *ptl; 878 unsigned long paddr; 879 pgste_t old, new; 880 pmd_t *pmdp; 881 pte_t *ptep; 882 int cc = 0; 883 884 pmdp = pmd_alloc_map(mm, addr); 885 if (unlikely(!pmdp)) 886 return -EFAULT; 887 888 ptl = pmd_lock(mm, pmdp); 889 if (!pmd_present(*pmdp)) { 890 spin_unlock(ptl); 891 return -EFAULT; 892 } 893 894 if (pmd_large(*pmdp)) { 895 paddr = pmd_val(*pmdp) & HPAGE_MASK; 896 paddr |= addr & ~HPAGE_MASK; 897 cc = page_reset_referenced(paddr); 898 spin_unlock(ptl); 899 return cc; 900 } 901 spin_unlock(ptl); 902 903 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 904 if (unlikely(!ptep)) 905 return -EFAULT; 906 907 new = old = pgste_get_lock(ptep); 908 /* Reset guest reference bit only */ 909 pgste_val(new) &= ~PGSTE_GR_BIT; 910 911 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 912 paddr = pte_val(*ptep) & PAGE_MASK; 913 cc = page_reset_referenced(paddr); 914 /* Merge real referenced bit into host-set */ 915 pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; 916 } 917 /* Reflect guest's logical view, not physical */ 918 cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; 919 /* Changing the guest storage key is considered a change of the page */ 920 if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) 921 pgste_val(new) |= PGSTE_UC_BIT; 922 923 pgste_set_unlock(ptep, new); 924 pte_unmap_unlock(ptep, ptl); 925 return cc; 926} 927EXPORT_SYMBOL(reset_guest_reference_bit); 928 929int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 930 unsigned char *key) 931{ 932 unsigned long paddr; 933 spinlock_t *ptl; 934 pgste_t pgste; 935 pmd_t *pmdp; 936 pte_t *ptep; 937 938 pmdp = pmd_alloc_map(mm, addr); 939 if (unlikely(!pmdp)) 940 return -EFAULT; 941 942 ptl = pmd_lock(mm, pmdp); 943 if (!pmd_present(*pmdp)) { 944 /* Not yet mapped memory has a zero key */ 945 spin_unlock(ptl); 946 *key = 0; 947 return 0; 948 } 949 950 if (pmd_large(*pmdp)) { 951 paddr = pmd_val(*pmdp) & HPAGE_MASK; 952 paddr |= addr & ~HPAGE_MASK; 953 *key = page_get_storage_key(paddr); 954 spin_unlock(ptl); 955 return 0; 956 } 957 spin_unlock(ptl); 958 959 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 960 if (unlikely(!ptep)) 961 return -EFAULT; 962 963 pgste = pgste_get_lock(ptep); 964 *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 965 paddr = pte_val(*ptep) & PAGE_MASK; 966 if (!(pte_val(*ptep) & _PAGE_INVALID)) 967 *key = page_get_storage_key(paddr); 968 /* Reflect guest's logical view, not physical */ 969 *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 970 pgste_set_unlock(ptep, pgste); 971 pte_unmap_unlock(ptep, ptl); 972 return 0; 973} 974EXPORT_SYMBOL(get_guest_storage_key); 975 976/** 977 * pgste_perform_essa - perform ESSA actions on the PGSTE. 978 * @mm: the memory context. It must have PGSTEs, no check is performed here! 979 * @hva: the host virtual address of the page whose PGSTE is to be processed 980 * @orc: the specific action to perform, see the ESSA_SET_* macros. 981 * @oldpte: the PTE will be saved there if the pointer is not NULL. 982 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. 983 * 984 * Return: 1 if the page is to be added to the CBRL, otherwise 0, 985 * or < 0 in case of error. -EINVAL is returned for invalid values 986 * of orc, -EFAULT for invalid addresses. 987 */ 988int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, 989 unsigned long *oldpte, unsigned long *oldpgste) 990{ 991 struct vm_area_struct *vma; 992 unsigned long pgstev; 993 spinlock_t *ptl; 994 pgste_t pgste; 995 pte_t *ptep; 996 int res = 0; 997 998 WARN_ON_ONCE(orc > ESSA_MAX); 999 if (unlikely(orc > ESSA_MAX)) 1000 return -EINVAL; 1001 1002 vma = find_vma(mm, hva); 1003 if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) 1004 return -EFAULT; 1005 ptep = get_locked_pte(mm, hva, &ptl); 1006 if (unlikely(!ptep)) 1007 return -EFAULT; 1008 pgste = pgste_get_lock(ptep); 1009 pgstev = pgste_val(pgste); 1010 if (oldpte) 1011 *oldpte = pte_val(*ptep); 1012 if (oldpgste) 1013 *oldpgste = pgstev; 1014 1015 switch (orc) { 1016 case ESSA_GET_STATE: 1017 break; 1018 case ESSA_SET_STABLE: 1019 pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); 1020 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1021 break; 1022 case ESSA_SET_UNUSED: 1023 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1024 pgstev |= _PGSTE_GPS_USAGE_UNUSED; 1025 if (pte_val(*ptep) & _PAGE_INVALID) 1026 res = 1; 1027 break; 1028 case ESSA_SET_VOLATILE: 1029 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1030 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1031 if (pte_val(*ptep) & _PAGE_INVALID) 1032 res = 1; 1033 break; 1034 case ESSA_SET_POT_VOLATILE: 1035 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1036 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1037 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; 1038 break; 1039 } 1040 if (pgstev & _PGSTE_GPS_ZERO) { 1041 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1042 break; 1043 } 1044 if (!(pgstev & PGSTE_GC_BIT)) { 1045 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1046 res = 1; 1047 break; 1048 } 1049 break; 1050 case ESSA_SET_STABLE_RESIDENT: 1051 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1052 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1053 /* 1054 * Since the resident state can go away any time after this 1055 * call, we will not make this page resident. We can revisit 1056 * this decision if a guest will ever start using this. 1057 */ 1058 break; 1059 case ESSA_SET_STABLE_IF_RESIDENT: 1060 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1061 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1062 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1063 } 1064 break; 1065 case ESSA_SET_STABLE_NODAT: 1066 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1067 pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; 1068 break; 1069 default: 1070 /* we should never get here! */ 1071 break; 1072 } 1073 /* If we are discarding a page, set it to logical zero */ 1074 if (res) 1075 pgstev |= _PGSTE_GPS_ZERO; 1076 1077 pgste_val(pgste) = pgstev; 1078 pgste_set_unlock(ptep, pgste); 1079 pte_unmap_unlock(ptep, ptl); 1080 return res; 1081} 1082EXPORT_SYMBOL(pgste_perform_essa); 1083 1084/** 1085 * set_pgste_bits - set specific PGSTE bits. 1086 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1087 * @hva: the host virtual address of the page whose PGSTE is to be processed 1088 * @bits: a bitmask representing the bits that will be touched 1089 * @value: the values of the bits to be written. Only the bits in the mask 1090 * will be written. 1091 * 1092 * Return: 0 on success, < 0 in case of error. 1093 */ 1094int set_pgste_bits(struct mm_struct *mm, unsigned long hva, 1095 unsigned long bits, unsigned long value) 1096{ 1097 struct vm_area_struct *vma; 1098 spinlock_t *ptl; 1099 pgste_t new; 1100 pte_t *ptep; 1101 1102 vma = find_vma(mm, hva); 1103 if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) 1104 return -EFAULT; 1105 ptep = get_locked_pte(mm, hva, &ptl); 1106 if (unlikely(!ptep)) 1107 return -EFAULT; 1108 new = pgste_get_lock(ptep); 1109 1110 pgste_val(new) &= ~bits; 1111 pgste_val(new) |= value & bits; 1112 1113 pgste_set_unlock(ptep, new); 1114 pte_unmap_unlock(ptep, ptl); 1115 return 0; 1116} 1117EXPORT_SYMBOL(set_pgste_bits); 1118 1119/** 1120 * get_pgste - get the current PGSTE for the given address. 1121 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1122 * @hva: the host virtual address of the page whose PGSTE is to be processed 1123 * @pgstep: will be written with the current PGSTE for the given address. 1124 * 1125 * Return: 0 on success, < 0 in case of error. 1126 */ 1127int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) 1128{ 1129 struct vm_area_struct *vma; 1130 spinlock_t *ptl; 1131 pte_t *ptep; 1132 1133 vma = find_vma(mm, hva); 1134 if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) 1135 return -EFAULT; 1136 ptep = get_locked_pte(mm, hva, &ptl); 1137 if (unlikely(!ptep)) 1138 return -EFAULT; 1139 *pgstep = pgste_val(pgste_get(ptep)); 1140 pte_unmap_unlock(ptep, ptl); 1141 return 0; 1142} 1143EXPORT_SYMBOL(get_pgste); 1144#endif 1145