162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci#ifndef _ASM_X86_PGTABLE_3LEVEL_H
362306a36Sopenharmony_ci#define _ASM_X86_PGTABLE_3LEVEL_H
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci/*
662306a36Sopenharmony_ci * Intel Physical Address Extension (PAE) Mode - three-level page
762306a36Sopenharmony_ci * tables on PPro+ CPUs.
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#define pte_ERROR(e)							\
1362306a36Sopenharmony_ci	pr_err("%s:%d: bad pte %p(%08lx%08lx)\n",			\
1462306a36Sopenharmony_ci	       __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low)
1562306a36Sopenharmony_ci#define pmd_ERROR(e)							\
1662306a36Sopenharmony_ci	pr_err("%s:%d: bad pmd %p(%016Lx)\n",				\
1762306a36Sopenharmony_ci	       __FILE__, __LINE__, &(e), pmd_val(e))
1862306a36Sopenharmony_ci#define pgd_ERROR(e)							\
1962306a36Sopenharmony_ci	pr_err("%s:%d: bad pgd %p(%016Lx)\n",				\
2062306a36Sopenharmony_ci	       __FILE__, __LINE__, &(e), pgd_val(e))
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#define pxx_xchg64(_pxx, _ptr, _val) ({					\
2362306a36Sopenharmony_ci	_pxx##val_t *_p = (_pxx##val_t *)_ptr;				\
2462306a36Sopenharmony_ci	_pxx##val_t _o = *_p;						\
2562306a36Sopenharmony_ci	do { } while (!try_cmpxchg64(_p, &_o, (_val)));			\
2662306a36Sopenharmony_ci	native_make_##_pxx(_o);						\
2762306a36Sopenharmony_ci})
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci/*
3062306a36Sopenharmony_ci * Rules for using set_pte: the pte being assigned *must* be
3162306a36Sopenharmony_ci * either not present or in a state where the hardware will
3262306a36Sopenharmony_ci * not attempt to update the pte.  In places where this is
3362306a36Sopenharmony_ci * not possible, use pte_get_and_clear to obtain the old pte
3462306a36Sopenharmony_ci * value and then use set_pte to update it.  -ben
3562306a36Sopenharmony_ci */
3662306a36Sopenharmony_cistatic inline void native_set_pte(pte_t *ptep, pte_t pte)
3762306a36Sopenharmony_ci{
3862306a36Sopenharmony_ci	WRITE_ONCE(ptep->pte_high, pte.pte_high);
3962306a36Sopenharmony_ci	smp_wmb();
4062306a36Sopenharmony_ci	WRITE_ONCE(ptep->pte_low, pte.pte_low);
4162306a36Sopenharmony_ci}
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistatic inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	pxx_xchg64(pte, ptep, native_pte_val(pte));
4662306a36Sopenharmony_ci}
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_cistatic inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	pxx_xchg64(pmd, pmdp, native_pmd_val(pmd));
5162306a36Sopenharmony_ci}
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistatic inline void native_set_pud(pud_t *pudp, pud_t pud)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci#ifdef CONFIG_PAGE_TABLE_ISOLATION
5662306a36Sopenharmony_ci	pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
5762306a36Sopenharmony_ci#endif
5862306a36Sopenharmony_ci	pxx_xchg64(pud, pudp, native_pud_val(pud));
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/*
6262306a36Sopenharmony_ci * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
6362306a36Sopenharmony_ci * entry, so clear the bottom half first and enforce ordering with a compiler
6462306a36Sopenharmony_ci * barrier.
6562306a36Sopenharmony_ci */
6662306a36Sopenharmony_cistatic inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
6762306a36Sopenharmony_ci				    pte_t *ptep)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	WRITE_ONCE(ptep->pte_low, 0);
7062306a36Sopenharmony_ci	smp_wmb();
7162306a36Sopenharmony_ci	WRITE_ONCE(ptep->pte_high, 0);
7262306a36Sopenharmony_ci}
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_cistatic inline void native_pmd_clear(pmd_t *pmdp)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	WRITE_ONCE(pmdp->pmd_low, 0);
7762306a36Sopenharmony_ci	smp_wmb();
7862306a36Sopenharmony_ci	WRITE_ONCE(pmdp->pmd_high, 0);
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cistatic inline void native_pud_clear(pud_t *pudp)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci}
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_cistatic inline void pud_clear(pud_t *pudp)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	set_pud(pudp, __pud(0));
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	/*
9062306a36Sopenharmony_ci	 * According to Intel App note "TLBs, Paging-Structure Caches,
9162306a36Sopenharmony_ci	 * and Their Invalidation", April 2007, document 317080-001,
9262306a36Sopenharmony_ci	 * section 8.1: in PAE mode we explicitly have to flush the
9362306a36Sopenharmony_ci	 * TLB via cr3 if the top-level pgd is changed...
9462306a36Sopenharmony_ci	 *
9562306a36Sopenharmony_ci	 * Currently all places where pud_clear() is called either have
9662306a36Sopenharmony_ci	 * flush_tlb_mm() followed or don't need TLB flush (x86_64 code or
9762306a36Sopenharmony_ci	 * pud_clear_bad()), so we don't need TLB flush here.
9862306a36Sopenharmony_ci	 */
9962306a36Sopenharmony_ci}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci#ifdef CONFIG_SMP
10362306a36Sopenharmony_cistatic inline pte_t native_ptep_get_and_clear(pte_t *ptep)
10462306a36Sopenharmony_ci{
10562306a36Sopenharmony_ci	return pxx_xchg64(pte, ptep, 0ULL);
10662306a36Sopenharmony_ci}
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_cistatic inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
10962306a36Sopenharmony_ci{
11062306a36Sopenharmony_ci	return pxx_xchg64(pmd, pmdp, 0ULL);
11162306a36Sopenharmony_ci}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_cistatic inline pud_t native_pudp_get_and_clear(pud_t *pudp)
11462306a36Sopenharmony_ci{
11562306a36Sopenharmony_ci	return pxx_xchg64(pud, pudp, 0ULL);
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ci#else
11862306a36Sopenharmony_ci#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
11962306a36Sopenharmony_ci#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
12062306a36Sopenharmony_ci#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
12162306a36Sopenharmony_ci#endif
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci#ifndef pmdp_establish
12462306a36Sopenharmony_ci#define pmdp_establish pmdp_establish
12562306a36Sopenharmony_cistatic inline pmd_t pmdp_establish(struct vm_area_struct *vma,
12662306a36Sopenharmony_ci		unsigned long address, pmd_t *pmdp, pmd_t pmd)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	pmd_t old;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	/*
13162306a36Sopenharmony_ci	 * If pmd has present bit cleared we can get away without expensive
13262306a36Sopenharmony_ci	 * cmpxchg64: we can update pmdp half-by-half without racing with
13362306a36Sopenharmony_ci	 * anybody.
13462306a36Sopenharmony_ci	 */
13562306a36Sopenharmony_ci	if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
13662306a36Sopenharmony_ci		/* xchg acts as a barrier before setting of the high bits */
13762306a36Sopenharmony_ci		old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low);
13862306a36Sopenharmony_ci		old.pmd_high = READ_ONCE(pmdp->pmd_high);
13962306a36Sopenharmony_ci		WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high);
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci		return old;
14262306a36Sopenharmony_ci	}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	return pxx_xchg64(pmd, pmdp, pmd.pmd);
14562306a36Sopenharmony_ci}
14662306a36Sopenharmony_ci#endif
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci/*
14962306a36Sopenharmony_ci * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
15062306a36Sopenharmony_ci * are !pte_none() && !pte_present().
15162306a36Sopenharmony_ci *
15262306a36Sopenharmony_ci * Format of swap PTEs:
15362306a36Sopenharmony_ci *
15462306a36Sopenharmony_ci *   6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
15562306a36Sopenharmony_ci *   3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
15662306a36Sopenharmony_ci *   < type -> <---------------------- offset ----------------------
15762306a36Sopenharmony_ci *
15862306a36Sopenharmony_ci *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
15962306a36Sopenharmony_ci *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
16062306a36Sopenharmony_ci *   --------------------------------------------> 0 E 0 0 0 0 0 0 0
16162306a36Sopenharmony_ci *
16262306a36Sopenharmony_ci *   E is the exclusive marker that is not stored in swap entries.
16362306a36Sopenharmony_ci */
16462306a36Sopenharmony_ci#define SWP_TYPE_BITS		5
16562306a36Sopenharmony_ci#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1)
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci#define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 1)
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci/* We always extract/encode the offset by shifting it all the way up, and then down again */
17062306a36Sopenharmony_ci#define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
17362306a36Sopenharmony_ci#define __swp_type(x)			(((x).val) & _SWP_TYPE_MASK)
17462306a36Sopenharmony_ci#define __swp_offset(x)			((x).val >> SWP_TYPE_BITS)
17562306a36Sopenharmony_ci#define __swp_entry(type, offset)	((swp_entry_t){((type) & _SWP_TYPE_MASK) \
17662306a36Sopenharmony_ci					| (offset) << SWP_TYPE_BITS})
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci/*
17962306a36Sopenharmony_ci * Normally, __swp_entry() converts from arch-independent swp_entry_t to
18062306a36Sopenharmony_ci * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result
18162306a36Sopenharmony_ci * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the
18262306a36Sopenharmony_ci * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to
18362306a36Sopenharmony_ci * __swp_entry_to_pte() through the following helper macro based on 64bit
18462306a36Sopenharmony_ci * __swp_entry().
18562306a36Sopenharmony_ci */
18662306a36Sopenharmony_ci#define __swp_pteval_entry(type, offset) ((pteval_t) { \
18762306a36Sopenharmony_ci	(~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
18862306a36Sopenharmony_ci	| ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) })
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci#define __swp_entry_to_pte(x)	((pte_t){ .pte = \
19162306a36Sopenharmony_ci		__swp_pteval_entry(__swp_type(x), __swp_offset(x)) })
19262306a36Sopenharmony_ci/*
19362306a36Sopenharmony_ci * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent
19462306a36Sopenharmony_ci * swp_entry_t, but also has to convert it from 64bit to the 32bit
19562306a36Sopenharmony_ci * intermediate representation, using the following macros based on 64bit
19662306a36Sopenharmony_ci * __swp_type() and __swp_offset().
19762306a36Sopenharmony_ci */
19862306a36Sopenharmony_ci#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS)))
19962306a36Sopenharmony_ci#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT))
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci#define __pte_to_swp_entry(pte)	(__swp_entry(__pteval_swp_type(pte), \
20262306a36Sopenharmony_ci					     __pteval_swp_offset(pte)))
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
20562306a36Sopenharmony_ci#define _PAGE_SWP_EXCLUSIVE	_PAGE_PSE
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci#include <asm/pgtable-invert.h>
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
210