162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  mm/mprotect.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  (C) Copyright 1994 Linus Torvalds
662306a36Sopenharmony_ci *  (C) Copyright 2002 Christoph Hellwig
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci *  Address space accounting code	<alan@lxorguk.ukuu.org.uk>
962306a36Sopenharmony_ci *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include <linux/pagewalk.h>
1362306a36Sopenharmony_ci#include <linux/hugetlb.h>
1462306a36Sopenharmony_ci#include <linux/shm.h>
1562306a36Sopenharmony_ci#include <linux/mman.h>
1662306a36Sopenharmony_ci#include <linux/fs.h>
1762306a36Sopenharmony_ci#include <linux/highmem.h>
1862306a36Sopenharmony_ci#include <linux/security.h>
1962306a36Sopenharmony_ci#include <linux/mempolicy.h>
2062306a36Sopenharmony_ci#include <linux/personality.h>
2162306a36Sopenharmony_ci#include <linux/syscalls.h>
2262306a36Sopenharmony_ci#include <linux/swap.h>
2362306a36Sopenharmony_ci#include <linux/swapops.h>
2462306a36Sopenharmony_ci#include <linux/mmu_notifier.h>
2562306a36Sopenharmony_ci#include <linux/migrate.h>
2662306a36Sopenharmony_ci#include <linux/perf_event.h>
2762306a36Sopenharmony_ci#include <linux/pkeys.h>
2862306a36Sopenharmony_ci#include <linux/ksm.h>
2962306a36Sopenharmony_ci#include <linux/uaccess.h>
3062306a36Sopenharmony_ci#include <linux/mm_inline.h>
3162306a36Sopenharmony_ci#include <linux/pgtable.h>
3262306a36Sopenharmony_ci#include <linux/sched/sysctl.h>
3362306a36Sopenharmony_ci#include <linux/userfaultfd_k.h>
3462306a36Sopenharmony_ci#include <linux/memory-tiers.h>
3562306a36Sopenharmony_ci#include <asm/cacheflush.h>
3662306a36Sopenharmony_ci#include <asm/mmu_context.h>
3762306a36Sopenharmony_ci#include <asm/tlbflush.h>
3862306a36Sopenharmony_ci#include <asm/tlb.h>
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci#include "internal.h"
4162306a36Sopenharmony_ci#include <linux/hck/lite_hck_jit_memory.h>
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cibool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
4462306a36Sopenharmony_ci			     pte_t pte)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	struct page *page;
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE)))
4962306a36Sopenharmony_ci		return false;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	/* Don't touch entries that are not even readable. */
5262306a36Sopenharmony_ci	if (pte_protnone(pte))
5362306a36Sopenharmony_ci		return false;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	/* Do we need write faults for softdirty tracking? */
5662306a36Sopenharmony_ci	if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
5762306a36Sopenharmony_ci		return false;
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	/* Do we need write faults for uffd-wp tracking? */
6062306a36Sopenharmony_ci	if (userfaultfd_pte_wp(vma, pte))
6162306a36Sopenharmony_ci		return false;
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	if (!(vma->vm_flags & VM_SHARED)) {
6462306a36Sopenharmony_ci		/*
6562306a36Sopenharmony_ci		 * Writable MAP_PRIVATE mapping: We can only special-case on
6662306a36Sopenharmony_ci		 * exclusive anonymous pages, because we know that our
6762306a36Sopenharmony_ci		 * write-fault handler similarly would map them writable without
6862306a36Sopenharmony_ci		 * any additional checks while holding the PT lock.
6962306a36Sopenharmony_ci		 */
7062306a36Sopenharmony_ci		page = vm_normal_page(vma, addr, pte);
7162306a36Sopenharmony_ci		return page && PageAnon(page) && PageAnonExclusive(page);
7262306a36Sopenharmony_ci	}
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	/*
7562306a36Sopenharmony_ci	 * Writable MAP_SHARED mapping: "clean" might indicate that the FS still
7662306a36Sopenharmony_ci	 * needs a real write-fault for writenotify
7762306a36Sopenharmony_ci	 * (see vma_wants_writenotify()). If "dirty", the assumption is that the
7862306a36Sopenharmony_ci	 * FS was already notified and we can simply mark the PTE writable
7962306a36Sopenharmony_ci	 * just like the write-fault handler would do.
8062306a36Sopenharmony_ci	 */
8162306a36Sopenharmony_ci	return pte_dirty(pte);
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic long change_pte_range(struct mmu_gather *tlb,
8562306a36Sopenharmony_ci		struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
8662306a36Sopenharmony_ci		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
8762306a36Sopenharmony_ci{
8862306a36Sopenharmony_ci	pte_t *pte, oldpte;
8962306a36Sopenharmony_ci	spinlock_t *ptl;
9062306a36Sopenharmony_ci	long pages = 0;
9162306a36Sopenharmony_ci	int target_node = NUMA_NO_NODE;
9262306a36Sopenharmony_ci	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
9362306a36Sopenharmony_ci	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
9462306a36Sopenharmony_ci	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	tlb_change_page_size(tlb, PAGE_SIZE);
9762306a36Sopenharmony_ci	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
9862306a36Sopenharmony_ci	if (!pte)
9962306a36Sopenharmony_ci		return -EAGAIN;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	/* Get target node for single threaded private VMAs */
10262306a36Sopenharmony_ci	if (prot_numa && !(vma->vm_flags & VM_SHARED) &&
10362306a36Sopenharmony_ci	    atomic_read(&vma->vm_mm->mm_users) == 1)
10462306a36Sopenharmony_ci		target_node = numa_node_id();
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	flush_tlb_batched_pending(vma->vm_mm);
10762306a36Sopenharmony_ci	arch_enter_lazy_mmu_mode();
10862306a36Sopenharmony_ci	do {
10962306a36Sopenharmony_ci		oldpte = ptep_get(pte);
11062306a36Sopenharmony_ci		if (pte_present(oldpte)) {
11162306a36Sopenharmony_ci			pte_t ptent;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci			/*
11462306a36Sopenharmony_ci			 * Avoid trapping faults against the zero or KSM
11562306a36Sopenharmony_ci			 * pages. See similar comment in change_huge_pmd.
11662306a36Sopenharmony_ci			 */
11762306a36Sopenharmony_ci			if (prot_numa) {
11862306a36Sopenharmony_ci				struct page *page;
11962306a36Sopenharmony_ci				int nid;
12062306a36Sopenharmony_ci				bool toptier;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci				/* Avoid TLB flush if possible */
12362306a36Sopenharmony_ci				if (pte_protnone(oldpte))
12462306a36Sopenharmony_ci					continue;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci				page = vm_normal_page(vma, addr, oldpte);
12762306a36Sopenharmony_ci				if (!page || is_zone_device_page(page) || PageKsm(page))
12862306a36Sopenharmony_ci					continue;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci				/* Also skip shared copy-on-write pages */
13162306a36Sopenharmony_ci				if (is_cow_mapping(vma->vm_flags) &&
13262306a36Sopenharmony_ci				    page_count(page) != 1)
13362306a36Sopenharmony_ci					continue;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci				/*
13662306a36Sopenharmony_ci				 * While migration can move some dirty pages,
13762306a36Sopenharmony_ci				 * it cannot move them all from MIGRATE_ASYNC
13862306a36Sopenharmony_ci				 * context.
13962306a36Sopenharmony_ci				 */
14062306a36Sopenharmony_ci				if (page_is_file_lru(page) && PageDirty(page))
14162306a36Sopenharmony_ci					continue;
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci				/*
14462306a36Sopenharmony_ci				 * Don't mess with PTEs if page is already on the node
14562306a36Sopenharmony_ci				 * a single-threaded process is running on.
14662306a36Sopenharmony_ci				 */
14762306a36Sopenharmony_ci				nid = page_to_nid(page);
14862306a36Sopenharmony_ci				if (target_node == nid)
14962306a36Sopenharmony_ci					continue;
15062306a36Sopenharmony_ci				toptier = node_is_toptier(nid);
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci				/*
15362306a36Sopenharmony_ci				 * Skip scanning top tier node if normal numa
15462306a36Sopenharmony_ci				 * balancing is disabled
15562306a36Sopenharmony_ci				 */
15662306a36Sopenharmony_ci				if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
15762306a36Sopenharmony_ci				    toptier)
15862306a36Sopenharmony_ci					continue;
15962306a36Sopenharmony_ci				if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING &&
16062306a36Sopenharmony_ci				    !toptier)
16162306a36Sopenharmony_ci					xchg_page_access_time(page,
16262306a36Sopenharmony_ci						jiffies_to_msecs(jiffies));
16362306a36Sopenharmony_ci			}
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci			oldpte = ptep_modify_prot_start(vma, addr, pte);
16662306a36Sopenharmony_ci			ptent = pte_modify(oldpte, newprot);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci			if (uffd_wp)
16962306a36Sopenharmony_ci				ptent = pte_mkuffd_wp(ptent);
17062306a36Sopenharmony_ci			else if (uffd_wp_resolve)
17162306a36Sopenharmony_ci				ptent = pte_clear_uffd_wp(ptent);
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci			/*
17462306a36Sopenharmony_ci			 * In some writable, shared mappings, we might want
17562306a36Sopenharmony_ci			 * to catch actual write access -- see
17662306a36Sopenharmony_ci			 * vma_wants_writenotify().
17762306a36Sopenharmony_ci			 *
17862306a36Sopenharmony_ci			 * In all writable, private mappings, we have to
17962306a36Sopenharmony_ci			 * properly handle COW.
18062306a36Sopenharmony_ci			 *
18162306a36Sopenharmony_ci			 * In both cases, we can sometimes still change PTEs
18262306a36Sopenharmony_ci			 * writable and avoid the write-fault handler, for
18362306a36Sopenharmony_ci			 * example, if a PTE is already dirty and no other
18462306a36Sopenharmony_ci			 * COW or special handling is required.
18562306a36Sopenharmony_ci			 */
18662306a36Sopenharmony_ci			if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
18762306a36Sopenharmony_ci			    !pte_write(ptent) &&
18862306a36Sopenharmony_ci			    can_change_pte_writable(vma, addr, ptent))
18962306a36Sopenharmony_ci				ptent = pte_mkwrite(ptent, vma);
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci			ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
19262306a36Sopenharmony_ci			if (pte_needs_flush(oldpte, ptent))
19362306a36Sopenharmony_ci				tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
19462306a36Sopenharmony_ci			pages++;
19562306a36Sopenharmony_ci		} else if (is_swap_pte(oldpte)) {
19662306a36Sopenharmony_ci			swp_entry_t entry = pte_to_swp_entry(oldpte);
19762306a36Sopenharmony_ci			pte_t newpte;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci			if (is_writable_migration_entry(entry)) {
20062306a36Sopenharmony_ci				struct page *page = pfn_swap_entry_to_page(entry);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci				/*
20362306a36Sopenharmony_ci				 * A protection check is difficult so
20462306a36Sopenharmony_ci				 * just be safe and disable write
20562306a36Sopenharmony_ci				 */
20662306a36Sopenharmony_ci				if (PageAnon(page))
20762306a36Sopenharmony_ci					entry = make_readable_exclusive_migration_entry(
20862306a36Sopenharmony_ci							     swp_offset(entry));
20962306a36Sopenharmony_ci				else
21062306a36Sopenharmony_ci					entry = make_readable_migration_entry(swp_offset(entry));
21162306a36Sopenharmony_ci				newpte = swp_entry_to_pte(entry);
21262306a36Sopenharmony_ci				if (pte_swp_soft_dirty(oldpte))
21362306a36Sopenharmony_ci					newpte = pte_swp_mksoft_dirty(newpte);
21462306a36Sopenharmony_ci			} else if (is_writable_device_private_entry(entry)) {
21562306a36Sopenharmony_ci				/*
21662306a36Sopenharmony_ci				 * We do not preserve soft-dirtiness. See
21762306a36Sopenharmony_ci				 * copy_nonpresent_pte() for explanation.
21862306a36Sopenharmony_ci				 */
21962306a36Sopenharmony_ci				entry = make_readable_device_private_entry(
22062306a36Sopenharmony_ci							swp_offset(entry));
22162306a36Sopenharmony_ci				newpte = swp_entry_to_pte(entry);
22262306a36Sopenharmony_ci				if (pte_swp_uffd_wp(oldpte))
22362306a36Sopenharmony_ci					newpte = pte_swp_mkuffd_wp(newpte);
22462306a36Sopenharmony_ci			} else if (is_writable_device_exclusive_entry(entry)) {
22562306a36Sopenharmony_ci				entry = make_readable_device_exclusive_entry(
22662306a36Sopenharmony_ci							swp_offset(entry));
22762306a36Sopenharmony_ci				newpte = swp_entry_to_pte(entry);
22862306a36Sopenharmony_ci				if (pte_swp_soft_dirty(oldpte))
22962306a36Sopenharmony_ci					newpte = pte_swp_mksoft_dirty(newpte);
23062306a36Sopenharmony_ci				if (pte_swp_uffd_wp(oldpte))
23162306a36Sopenharmony_ci					newpte = pte_swp_mkuffd_wp(newpte);
23262306a36Sopenharmony_ci			} else if (is_pte_marker_entry(entry)) {
23362306a36Sopenharmony_ci				/*
23462306a36Sopenharmony_ci				 * Ignore error swap entries unconditionally,
23562306a36Sopenharmony_ci				 * because any access should sigbus anyway.
23662306a36Sopenharmony_ci				 */
23762306a36Sopenharmony_ci				if (is_poisoned_swp_entry(entry))
23862306a36Sopenharmony_ci					continue;
23962306a36Sopenharmony_ci				/*
24062306a36Sopenharmony_ci				 * If this is uffd-wp pte marker and we'd like
24162306a36Sopenharmony_ci				 * to unprotect it, drop it; the next page
24262306a36Sopenharmony_ci				 * fault will trigger without uffd trapping.
24362306a36Sopenharmony_ci				 */
24462306a36Sopenharmony_ci				if (uffd_wp_resolve) {
24562306a36Sopenharmony_ci					pte_clear(vma->vm_mm, addr, pte);
24662306a36Sopenharmony_ci					pages++;
24762306a36Sopenharmony_ci				}
24862306a36Sopenharmony_ci				continue;
24962306a36Sopenharmony_ci			} else {
25062306a36Sopenharmony_ci				newpte = oldpte;
25162306a36Sopenharmony_ci			}
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci			if (uffd_wp)
25462306a36Sopenharmony_ci				newpte = pte_swp_mkuffd_wp(newpte);
25562306a36Sopenharmony_ci			else if (uffd_wp_resolve)
25662306a36Sopenharmony_ci				newpte = pte_swp_clear_uffd_wp(newpte);
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci			if (!pte_same(oldpte, newpte)) {
25962306a36Sopenharmony_ci				set_pte_at(vma->vm_mm, addr, pte, newpte);
26062306a36Sopenharmony_ci				pages++;
26162306a36Sopenharmony_ci			}
26262306a36Sopenharmony_ci		} else {
26362306a36Sopenharmony_ci			/* It must be an none page, or what else?.. */
26462306a36Sopenharmony_ci			WARN_ON_ONCE(!pte_none(oldpte));
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci			/*
26762306a36Sopenharmony_ci			 * Nobody plays with any none ptes besides
26862306a36Sopenharmony_ci			 * userfaultfd when applying the protections.
26962306a36Sopenharmony_ci			 */
27062306a36Sopenharmony_ci			if (likely(!uffd_wp))
27162306a36Sopenharmony_ci				continue;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci			if (userfaultfd_wp_use_markers(vma)) {
27462306a36Sopenharmony_ci				/*
27562306a36Sopenharmony_ci				 * For file-backed mem, we need to be able to
27662306a36Sopenharmony_ci				 * wr-protect a none pte, because even if the
27762306a36Sopenharmony_ci				 * pte is none, the page/swap cache could
27862306a36Sopenharmony_ci				 * exist.  Doing that by install a marker.
27962306a36Sopenharmony_ci				 */
28062306a36Sopenharmony_ci				set_pte_at(vma->vm_mm, addr, pte,
28162306a36Sopenharmony_ci					   make_pte_marker(PTE_MARKER_UFFD_WP));
28262306a36Sopenharmony_ci				pages++;
28362306a36Sopenharmony_ci			}
28462306a36Sopenharmony_ci		}
28562306a36Sopenharmony_ci	} while (pte++, addr += PAGE_SIZE, addr != end);
28662306a36Sopenharmony_ci	arch_leave_lazy_mmu_mode();
28762306a36Sopenharmony_ci	pte_unmap_unlock(pte - 1, ptl);
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	return pages;
29062306a36Sopenharmony_ci}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci/*
29362306a36Sopenharmony_ci * Return true if we want to split THPs into PTE mappings in change
29462306a36Sopenharmony_ci * protection procedure, false otherwise.
29562306a36Sopenharmony_ci */
29662306a36Sopenharmony_cistatic inline bool
29762306a36Sopenharmony_cipgtable_split_needed(struct vm_area_struct *vma, unsigned long cp_flags)
29862306a36Sopenharmony_ci{
29962306a36Sopenharmony_ci	/*
30062306a36Sopenharmony_ci	 * pte markers only resides in pte level, if we need pte markers,
30162306a36Sopenharmony_ci	 * we need to split.  We cannot wr-protect shmem thp because file
30262306a36Sopenharmony_ci	 * thp is handled differently when split by erasing the pmd so far.
30362306a36Sopenharmony_ci	 */
30462306a36Sopenharmony_ci	return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma);
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci/*
30862306a36Sopenharmony_ci * Return true if we want to populate pgtables in change protection
30962306a36Sopenharmony_ci * procedure, false otherwise
31062306a36Sopenharmony_ci */
31162306a36Sopenharmony_cistatic inline bool
31262306a36Sopenharmony_cipgtable_populate_needed(struct vm_area_struct *vma, unsigned long cp_flags)
31362306a36Sopenharmony_ci{
31462306a36Sopenharmony_ci	/* If not within ioctl(UFFDIO_WRITEPROTECT), then don't bother */
31562306a36Sopenharmony_ci	if (!(cp_flags & MM_CP_UFFD_WP))
31662306a36Sopenharmony_ci		return false;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	/* Populate if the userfaultfd mode requires pte markers */
31962306a36Sopenharmony_ci	return userfaultfd_wp_use_markers(vma);
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci/*
32362306a36Sopenharmony_ci * Populate the pgtable underneath for whatever reason if requested.
32462306a36Sopenharmony_ci * When {pte|pmd|...}_alloc() failed we treat it the same way as pgtable
32562306a36Sopenharmony_ci * allocation failures during page faults by kicking OOM and returning
32662306a36Sopenharmony_ci * error.
32762306a36Sopenharmony_ci */
32862306a36Sopenharmony_ci#define  change_pmd_prepare(vma, pmd, cp_flags)				\
32962306a36Sopenharmony_ci	({								\
33062306a36Sopenharmony_ci		long err = 0;						\
33162306a36Sopenharmony_ci		if (unlikely(pgtable_populate_needed(vma, cp_flags))) {	\
33262306a36Sopenharmony_ci			if (pte_alloc(vma->vm_mm, pmd))			\
33362306a36Sopenharmony_ci				err = -ENOMEM;				\
33462306a36Sopenharmony_ci		}							\
33562306a36Sopenharmony_ci		err;							\
33662306a36Sopenharmony_ci	})
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci/*
33962306a36Sopenharmony_ci * This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to
34062306a36Sopenharmony_ci * have separate change_pmd_prepare() because pte_alloc() returns 0 on success,
34162306a36Sopenharmony_ci * while {pmd|pud|p4d}_alloc() returns the valid pointer on success.
34262306a36Sopenharmony_ci */
34362306a36Sopenharmony_ci#define  change_prepare(vma, high, low, addr, cp_flags)			\
34462306a36Sopenharmony_ci	  ({								\
34562306a36Sopenharmony_ci		long err = 0;						\
34662306a36Sopenharmony_ci		if (unlikely(pgtable_populate_needed(vma, cp_flags))) {	\
34762306a36Sopenharmony_ci			low##_t *p = low##_alloc(vma->vm_mm, high, addr); \
34862306a36Sopenharmony_ci			if (p == NULL)					\
34962306a36Sopenharmony_ci				err = -ENOMEM;				\
35062306a36Sopenharmony_ci		}							\
35162306a36Sopenharmony_ci		err;							\
35262306a36Sopenharmony_ci	})
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_cistatic inline long change_pmd_range(struct mmu_gather *tlb,
35562306a36Sopenharmony_ci		struct vm_area_struct *vma, pud_t *pud, unsigned long addr,
35662306a36Sopenharmony_ci		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
35762306a36Sopenharmony_ci{
35862306a36Sopenharmony_ci	pmd_t *pmd;
35962306a36Sopenharmony_ci	unsigned long next;
36062306a36Sopenharmony_ci	long pages = 0;
36162306a36Sopenharmony_ci	unsigned long nr_huge_updates = 0;
36262306a36Sopenharmony_ci	struct mmu_notifier_range range;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	range.start = 0;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	pmd = pmd_offset(pud, addr);
36762306a36Sopenharmony_ci	do {
36862306a36Sopenharmony_ci		long ret;
36962306a36Sopenharmony_ci		pmd_t _pmd;
37062306a36Sopenharmony_ciagain:
37162306a36Sopenharmony_ci		next = pmd_addr_end(addr, end);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci		ret = change_pmd_prepare(vma, pmd, cp_flags);
37462306a36Sopenharmony_ci		if (ret) {
37562306a36Sopenharmony_ci			pages = ret;
37662306a36Sopenharmony_ci			break;
37762306a36Sopenharmony_ci		}
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci		if (pmd_none(*pmd))
38062306a36Sopenharmony_ci			goto next;
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci		/* invoke the mmu notifier if the pmd is populated */
38362306a36Sopenharmony_ci		if (!range.start) {
38462306a36Sopenharmony_ci			mmu_notifier_range_init(&range,
38562306a36Sopenharmony_ci				MMU_NOTIFY_PROTECTION_VMA, 0,
38662306a36Sopenharmony_ci				vma->vm_mm, addr, end);
38762306a36Sopenharmony_ci			mmu_notifier_invalidate_range_start(&range);
38862306a36Sopenharmony_ci		}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci		_pmd = pmdp_get_lockless(pmd);
39162306a36Sopenharmony_ci		if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd) || pmd_devmap(_pmd)) {
39262306a36Sopenharmony_ci			if ((next - addr != HPAGE_PMD_SIZE) ||
39362306a36Sopenharmony_ci			    pgtable_split_needed(vma, cp_flags)) {
39462306a36Sopenharmony_ci				__split_huge_pmd(vma, pmd, addr, false, NULL);
39562306a36Sopenharmony_ci				/*
39662306a36Sopenharmony_ci				 * For file-backed, the pmd could have been
39762306a36Sopenharmony_ci				 * cleared; make sure pmd populated if
39862306a36Sopenharmony_ci				 * necessary, then fall-through to pte level.
39962306a36Sopenharmony_ci				 */
40062306a36Sopenharmony_ci				ret = change_pmd_prepare(vma, pmd, cp_flags);
40162306a36Sopenharmony_ci				if (ret) {
40262306a36Sopenharmony_ci					pages = ret;
40362306a36Sopenharmony_ci					break;
40462306a36Sopenharmony_ci				}
40562306a36Sopenharmony_ci			} else {
40662306a36Sopenharmony_ci				ret = change_huge_pmd(tlb, vma, pmd,
40762306a36Sopenharmony_ci						addr, newprot, cp_flags);
40862306a36Sopenharmony_ci				if (ret) {
40962306a36Sopenharmony_ci					if (ret == HPAGE_PMD_NR) {
41062306a36Sopenharmony_ci						pages += HPAGE_PMD_NR;
41162306a36Sopenharmony_ci						nr_huge_updates++;
41262306a36Sopenharmony_ci					}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci					/* huge pmd was handled */
41562306a36Sopenharmony_ci					goto next;
41662306a36Sopenharmony_ci				}
41762306a36Sopenharmony_ci			}
41862306a36Sopenharmony_ci			/* fall through, the trans huge pmd just split */
41962306a36Sopenharmony_ci		}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci		ret = change_pte_range(tlb, vma, pmd, addr, next, newprot,
42262306a36Sopenharmony_ci				       cp_flags);
42362306a36Sopenharmony_ci		if (ret < 0)
42462306a36Sopenharmony_ci			goto again;
42562306a36Sopenharmony_ci		pages += ret;
42662306a36Sopenharmony_cinext:
42762306a36Sopenharmony_ci		cond_resched();
42862306a36Sopenharmony_ci	} while (pmd++, addr = next, addr != end);
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	if (range.start)
43162306a36Sopenharmony_ci		mmu_notifier_invalidate_range_end(&range);
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	if (nr_huge_updates)
43462306a36Sopenharmony_ci		count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
43562306a36Sopenharmony_ci	return pages;
43662306a36Sopenharmony_ci}
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_cistatic inline long change_pud_range(struct mmu_gather *tlb,
43962306a36Sopenharmony_ci		struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr,
44062306a36Sopenharmony_ci		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
44162306a36Sopenharmony_ci{
44262306a36Sopenharmony_ci	pud_t *pud;
44362306a36Sopenharmony_ci	unsigned long next;
44462306a36Sopenharmony_ci	long pages = 0, ret;
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	pud = pud_offset(p4d, addr);
44762306a36Sopenharmony_ci	do {
44862306a36Sopenharmony_ci		next = pud_addr_end(addr, end);
44962306a36Sopenharmony_ci		ret = change_prepare(vma, pud, pmd, addr, cp_flags);
45062306a36Sopenharmony_ci		if (ret)
45162306a36Sopenharmony_ci			return ret;
45262306a36Sopenharmony_ci		if (pud_none_or_clear_bad(pud))
45362306a36Sopenharmony_ci			continue;
45462306a36Sopenharmony_ci		pages += change_pmd_range(tlb, vma, pud, addr, next, newprot,
45562306a36Sopenharmony_ci					  cp_flags);
45662306a36Sopenharmony_ci	} while (pud++, addr = next, addr != end);
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	return pages;
45962306a36Sopenharmony_ci}
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_cistatic inline long change_p4d_range(struct mmu_gather *tlb,
46262306a36Sopenharmony_ci		struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr,
46362306a36Sopenharmony_ci		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
46462306a36Sopenharmony_ci{
46562306a36Sopenharmony_ci	p4d_t *p4d;
46662306a36Sopenharmony_ci	unsigned long next;
46762306a36Sopenharmony_ci	long pages = 0, ret;
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	p4d = p4d_offset(pgd, addr);
47062306a36Sopenharmony_ci	do {
47162306a36Sopenharmony_ci		next = p4d_addr_end(addr, end);
47262306a36Sopenharmony_ci		ret = change_prepare(vma, p4d, pud, addr, cp_flags);
47362306a36Sopenharmony_ci		if (ret)
47462306a36Sopenharmony_ci			return ret;
47562306a36Sopenharmony_ci		if (p4d_none_or_clear_bad(p4d))
47662306a36Sopenharmony_ci			continue;
47762306a36Sopenharmony_ci		pages += change_pud_range(tlb, vma, p4d, addr, next, newprot,
47862306a36Sopenharmony_ci					  cp_flags);
47962306a36Sopenharmony_ci	} while (p4d++, addr = next, addr != end);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	return pages;
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_cistatic long change_protection_range(struct mmu_gather *tlb,
48562306a36Sopenharmony_ci		struct vm_area_struct *vma, unsigned long addr,
48662306a36Sopenharmony_ci		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
48762306a36Sopenharmony_ci{
48862306a36Sopenharmony_ci	struct mm_struct *mm = vma->vm_mm;
48962306a36Sopenharmony_ci	pgd_t *pgd;
49062306a36Sopenharmony_ci	unsigned long next;
49162306a36Sopenharmony_ci	long pages = 0, ret;
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	BUG_ON(addr >= end);
49462306a36Sopenharmony_ci	pgd = pgd_offset(mm, addr);
49562306a36Sopenharmony_ci	tlb_start_vma(tlb, vma);
49662306a36Sopenharmony_ci	do {
49762306a36Sopenharmony_ci		next = pgd_addr_end(addr, end);
49862306a36Sopenharmony_ci		ret = change_prepare(vma, pgd, p4d, addr, cp_flags);
49962306a36Sopenharmony_ci		if (ret) {
50062306a36Sopenharmony_ci			pages = ret;
50162306a36Sopenharmony_ci			break;
50262306a36Sopenharmony_ci		}
50362306a36Sopenharmony_ci		if (pgd_none_or_clear_bad(pgd))
50462306a36Sopenharmony_ci			continue;
50562306a36Sopenharmony_ci		pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot,
50662306a36Sopenharmony_ci					  cp_flags);
50762306a36Sopenharmony_ci	} while (pgd++, addr = next, addr != end);
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	tlb_end_vma(tlb, vma);
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	return pages;
51262306a36Sopenharmony_ci}
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_cilong change_protection(struct mmu_gather *tlb,
51562306a36Sopenharmony_ci		       struct vm_area_struct *vma, unsigned long start,
51662306a36Sopenharmony_ci		       unsigned long end, unsigned long cp_flags)
51762306a36Sopenharmony_ci{
51862306a36Sopenharmony_ci	pgprot_t newprot = vma->vm_page_prot;
51962306a36Sopenharmony_ci	long pages;
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci#ifdef CONFIG_NUMA_BALANCING
52462306a36Sopenharmony_ci	/*
52562306a36Sopenharmony_ci	 * Ordinary protection updates (mprotect, uffd-wp, softdirty tracking)
52662306a36Sopenharmony_ci	 * are expected to reflect their requirements via VMA flags such that
52762306a36Sopenharmony_ci	 * vma_set_page_prot() will adjust vma->vm_page_prot accordingly.
52862306a36Sopenharmony_ci	 */
52962306a36Sopenharmony_ci	if (cp_flags & MM_CP_PROT_NUMA)
53062306a36Sopenharmony_ci		newprot = PAGE_NONE;
53162306a36Sopenharmony_ci#else
53262306a36Sopenharmony_ci	WARN_ON_ONCE(cp_flags & MM_CP_PROT_NUMA);
53362306a36Sopenharmony_ci#endif
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	if (is_vm_hugetlb_page(vma))
53662306a36Sopenharmony_ci		pages = hugetlb_change_protection(vma, start, end, newprot,
53762306a36Sopenharmony_ci						  cp_flags);
53862306a36Sopenharmony_ci	else
53962306a36Sopenharmony_ci		pages = change_protection_range(tlb, vma, start, end, newprot,
54062306a36Sopenharmony_ci						cp_flags);
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	return pages;
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_cistatic int prot_none_pte_entry(pte_t *pte, unsigned long addr,
54662306a36Sopenharmony_ci			       unsigned long next, struct mm_walk *walk)
54762306a36Sopenharmony_ci{
54862306a36Sopenharmony_ci	return pfn_modify_allowed(pte_pfn(ptep_get(pte)),
54962306a36Sopenharmony_ci				  *(pgprot_t *)(walk->private)) ?
55062306a36Sopenharmony_ci		0 : -EACCES;
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_cistatic int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
55462306a36Sopenharmony_ci				   unsigned long addr, unsigned long next,
55562306a36Sopenharmony_ci				   struct mm_walk *walk)
55662306a36Sopenharmony_ci{
55762306a36Sopenharmony_ci	return pfn_modify_allowed(pte_pfn(ptep_get(pte)),
55862306a36Sopenharmony_ci				  *(pgprot_t *)(walk->private)) ?
55962306a36Sopenharmony_ci		0 : -EACCES;
56062306a36Sopenharmony_ci}
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_cistatic int prot_none_test(unsigned long addr, unsigned long next,
56362306a36Sopenharmony_ci			  struct mm_walk *walk)
56462306a36Sopenharmony_ci{
56562306a36Sopenharmony_ci	return 0;
56662306a36Sopenharmony_ci}
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_cistatic const struct mm_walk_ops prot_none_walk_ops = {
56962306a36Sopenharmony_ci	.pte_entry		= prot_none_pte_entry,
57062306a36Sopenharmony_ci	.hugetlb_entry		= prot_none_hugetlb_entry,
57162306a36Sopenharmony_ci	.test_walk		= prot_none_test,
57262306a36Sopenharmony_ci	.walk_lock		= PGWALK_WRLOCK,
57362306a36Sopenharmony_ci};
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ciint
57662306a36Sopenharmony_cimprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
57762306a36Sopenharmony_ci	       struct vm_area_struct *vma, struct vm_area_struct **pprev,
57862306a36Sopenharmony_ci	       unsigned long start, unsigned long end, unsigned long newflags)
57962306a36Sopenharmony_ci{
58062306a36Sopenharmony_ci	struct mm_struct *mm = vma->vm_mm;
58162306a36Sopenharmony_ci	unsigned long oldflags = vma->vm_flags;
58262306a36Sopenharmony_ci	long nrpages = (end - start) >> PAGE_SHIFT;
58362306a36Sopenharmony_ci	unsigned int mm_cp_flags = 0;
58462306a36Sopenharmony_ci	unsigned long charged = 0;
58562306a36Sopenharmony_ci	pgoff_t pgoff;
58662306a36Sopenharmony_ci	int error;
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_ci	if (newflags == oldflags) {
58962306a36Sopenharmony_ci		*pprev = vma;
59062306a36Sopenharmony_ci		return 0;
59162306a36Sopenharmony_ci	}
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	/*
59462306a36Sopenharmony_ci	 * Do PROT_NONE PFN permission checks here when we can still
59562306a36Sopenharmony_ci	 * bail out without undoing a lot of state. This is a rather
59662306a36Sopenharmony_ci	 * uncommon case, so doesn't need to be very optimized.
59762306a36Sopenharmony_ci	 */
59862306a36Sopenharmony_ci	if (arch_has_pfn_modify_check() &&
59962306a36Sopenharmony_ci	    (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
60062306a36Sopenharmony_ci	    (newflags & VM_ACCESS_FLAGS) == 0) {
60162306a36Sopenharmony_ci		pgprot_t new_pgprot = vm_get_page_prot(newflags);
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci		error = walk_page_range(current->mm, start, end,
60462306a36Sopenharmony_ci				&prot_none_walk_ops, &new_pgprot);
60562306a36Sopenharmony_ci		if (error)
60662306a36Sopenharmony_ci			return error;
60762306a36Sopenharmony_ci	}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	/*
61062306a36Sopenharmony_ci	 * If we make a private mapping writable we increase our commit;
61162306a36Sopenharmony_ci	 * but (without finer accounting) cannot reduce our commit if we
61262306a36Sopenharmony_ci	 * make it unwritable again. hugetlb mapping were accounted for
61362306a36Sopenharmony_ci	 * even if read-only so there is no need to account for them here
61462306a36Sopenharmony_ci	 */
61562306a36Sopenharmony_ci	if (newflags & VM_WRITE) {
61662306a36Sopenharmony_ci		/* Check space limits when area turns into data. */
61762306a36Sopenharmony_ci		if (!may_expand_vm(mm, newflags, nrpages) &&
61862306a36Sopenharmony_ci				may_expand_vm(mm, oldflags, nrpages))
61962306a36Sopenharmony_ci			return -ENOMEM;
62062306a36Sopenharmony_ci		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
62162306a36Sopenharmony_ci						VM_SHARED|VM_NORESERVE))) {
62262306a36Sopenharmony_ci			charged = nrpages;
62362306a36Sopenharmony_ci			if (security_vm_enough_memory_mm(mm, charged))
62462306a36Sopenharmony_ci				return -ENOMEM;
62562306a36Sopenharmony_ci			newflags |= VM_ACCOUNT;
62662306a36Sopenharmony_ci		}
62762306a36Sopenharmony_ci	}
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	/*
63062306a36Sopenharmony_ci	 * First try to merge with previous and/or next vma.
63162306a36Sopenharmony_ci	 */
63262306a36Sopenharmony_ci	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
63362306a36Sopenharmony_ci	*pprev = vma_merge(vmi, mm, *pprev, start, end, newflags,
63462306a36Sopenharmony_ci			   vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
63562306a36Sopenharmony_ci			   vma->vm_userfaultfd_ctx, anon_vma_name(vma));
63662306a36Sopenharmony_ci	if (*pprev) {
63762306a36Sopenharmony_ci		vma = *pprev;
63862306a36Sopenharmony_ci		VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
63962306a36Sopenharmony_ci		goto success;
64062306a36Sopenharmony_ci	}
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci	*pprev = vma;
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	if (start != vma->vm_start) {
64562306a36Sopenharmony_ci		error = split_vma(vmi, vma, start, 1);
64662306a36Sopenharmony_ci		if (error)
64762306a36Sopenharmony_ci			goto fail;
64862306a36Sopenharmony_ci	}
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	if (end != vma->vm_end) {
65162306a36Sopenharmony_ci		error = split_vma(vmi, vma, end, 0);
65262306a36Sopenharmony_ci		if (error)
65362306a36Sopenharmony_ci			goto fail;
65462306a36Sopenharmony_ci	}
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_cisuccess:
65762306a36Sopenharmony_ci	/*
65862306a36Sopenharmony_ci	 * vm_flags and vm_page_prot are protected by the mmap_lock
65962306a36Sopenharmony_ci	 * held in write mode.
66062306a36Sopenharmony_ci	 */
66162306a36Sopenharmony_ci	vma_start_write(vma);
66262306a36Sopenharmony_ci	vm_flags_reset(vma, newflags);
66362306a36Sopenharmony_ci	if (vma_wants_manual_pte_write_upgrade(vma))
66462306a36Sopenharmony_ci		mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
66562306a36Sopenharmony_ci	vma_set_page_prot(vma);
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	change_protection(tlb, vma, start, end, mm_cp_flags);
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	/*
67062306a36Sopenharmony_ci	 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
67162306a36Sopenharmony_ci	 * fault on access.
67262306a36Sopenharmony_ci	 */
67362306a36Sopenharmony_ci	if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED &&
67462306a36Sopenharmony_ci			(newflags & VM_WRITE)) {
67562306a36Sopenharmony_ci		populate_vma_page_range(vma, start, end, NULL);
67662306a36Sopenharmony_ci	}
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	vm_stat_account(mm, oldflags, -nrpages);
67962306a36Sopenharmony_ci	vm_stat_account(mm, newflags, nrpages);
68062306a36Sopenharmony_ci	perf_event_mmap(vma);
68162306a36Sopenharmony_ci	return 0;
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_cifail:
68462306a36Sopenharmony_ci	vm_unacct_memory(charged);
68562306a36Sopenharmony_ci	return error;
68662306a36Sopenharmony_ci}
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci/*
68962306a36Sopenharmony_ci * pkey==-1 when doing a legacy mprotect()
69062306a36Sopenharmony_ci */
69162306a36Sopenharmony_cistatic int do_mprotect_pkey(unsigned long start, size_t len,
69262306a36Sopenharmony_ci		unsigned long prot, int pkey)
69362306a36Sopenharmony_ci{
69462306a36Sopenharmony_ci	unsigned long nstart, end, tmp, reqprot;
69562306a36Sopenharmony_ci	struct vm_area_struct *vma, *prev;
69662306a36Sopenharmony_ci	int error;
69762306a36Sopenharmony_ci	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
69862306a36Sopenharmony_ci	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
69962306a36Sopenharmony_ci				(prot & PROT_READ);
70062306a36Sopenharmony_ci	struct mmu_gather tlb;
70162306a36Sopenharmony_ci	struct vma_iterator vmi;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	start = untagged_addr(start);
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	if (prot & PROT_EXEC) {
70662306a36Sopenharmony_ci		CALL_HCK_LITE_HOOK(find_jit_memory_lhck, current, start, len, &error);
70762306a36Sopenharmony_ci		if (error) {
70862306a36Sopenharmony_ci			pr_info("JITINFO: mprotect protection triggered");
70962306a36Sopenharmony_ci			return error;
71062306a36Sopenharmony_ci		}
71162306a36Sopenharmony_ci	}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
71462306a36Sopenharmony_ci	if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
71562306a36Sopenharmony_ci		return -EINVAL;
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	if (start & ~PAGE_MASK)
71862306a36Sopenharmony_ci		return -EINVAL;
71962306a36Sopenharmony_ci	if (!len)
72062306a36Sopenharmony_ci		return 0;
72162306a36Sopenharmony_ci	len = PAGE_ALIGN(len);
72262306a36Sopenharmony_ci	end = start + len;
72362306a36Sopenharmony_ci	if (end <= start)
72462306a36Sopenharmony_ci		return -ENOMEM;
72562306a36Sopenharmony_ci	if (!arch_validate_prot(prot, start))
72662306a36Sopenharmony_ci		return -EINVAL;
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	reqprot = prot;
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	if (mmap_write_lock_killable(current->mm))
73162306a36Sopenharmony_ci		return -EINTR;
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	/*
73462306a36Sopenharmony_ci	 * If userspace did not allocate the pkey, do not let
73562306a36Sopenharmony_ci	 * them use it here.
73662306a36Sopenharmony_ci	 */
73762306a36Sopenharmony_ci	error = -EINVAL;
73862306a36Sopenharmony_ci	if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey))
73962306a36Sopenharmony_ci		goto out;
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	vma_iter_init(&vmi, current->mm, start);
74262306a36Sopenharmony_ci	vma = vma_find(&vmi, end);
74362306a36Sopenharmony_ci	error = -ENOMEM;
74462306a36Sopenharmony_ci	if (!vma)
74562306a36Sopenharmony_ci		goto out;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	if (unlikely(grows & PROT_GROWSDOWN)) {
74862306a36Sopenharmony_ci		if (vma->vm_start >= end)
74962306a36Sopenharmony_ci			goto out;
75062306a36Sopenharmony_ci		start = vma->vm_start;
75162306a36Sopenharmony_ci		error = -EINVAL;
75262306a36Sopenharmony_ci		if (!(vma->vm_flags & VM_GROWSDOWN))
75362306a36Sopenharmony_ci			goto out;
75462306a36Sopenharmony_ci	} else {
75562306a36Sopenharmony_ci		if (vma->vm_start > start)
75662306a36Sopenharmony_ci			goto out;
75762306a36Sopenharmony_ci		if (unlikely(grows & PROT_GROWSUP)) {
75862306a36Sopenharmony_ci			end = vma->vm_end;
75962306a36Sopenharmony_ci			error = -EINVAL;
76062306a36Sopenharmony_ci			if (!(vma->vm_flags & VM_GROWSUP))
76162306a36Sopenharmony_ci				goto out;
76262306a36Sopenharmony_ci		}
76362306a36Sopenharmony_ci	}
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci	prev = vma_prev(&vmi);
76662306a36Sopenharmony_ci	if (start > vma->vm_start)
76762306a36Sopenharmony_ci		prev = vma;
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	tlb_gather_mmu(&tlb, current->mm);
77062306a36Sopenharmony_ci	nstart = start;
77162306a36Sopenharmony_ci	tmp = vma->vm_start;
77262306a36Sopenharmony_ci	for_each_vma_range(vmi, vma, end) {
77362306a36Sopenharmony_ci		unsigned long mask_off_old_flags;
77462306a36Sopenharmony_ci		unsigned long newflags;
77562306a36Sopenharmony_ci		int new_vma_pkey;
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci		if (vma->vm_start != tmp) {
77862306a36Sopenharmony_ci			error = -ENOMEM;
77962306a36Sopenharmony_ci			break;
78062306a36Sopenharmony_ci		}
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci		/* Does the application expect PROT_READ to imply PROT_EXEC */
78362306a36Sopenharmony_ci		if (rier && (vma->vm_flags & VM_MAYEXEC))
78462306a36Sopenharmony_ci			prot |= PROT_EXEC;
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci		/*
78762306a36Sopenharmony_ci		 * Each mprotect() call explicitly passes r/w/x permissions.
78862306a36Sopenharmony_ci		 * If a permission is not passed to mprotect(), it must be
78962306a36Sopenharmony_ci		 * cleared from the VMA.
79062306a36Sopenharmony_ci		 */
79162306a36Sopenharmony_ci		mask_off_old_flags = VM_ACCESS_FLAGS | VM_FLAGS_CLEAR;
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci		new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
79462306a36Sopenharmony_ci		newflags = calc_vm_prot_bits(prot, new_vma_pkey);
79562306a36Sopenharmony_ci		newflags |= (vma->vm_flags & ~mask_off_old_flags);
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci		/* newflags >> 4 shift VM_MAY% in place of VM_% */
79862306a36Sopenharmony_ci		if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) {
79962306a36Sopenharmony_ci			error = -EACCES;
80062306a36Sopenharmony_ci			break;
80162306a36Sopenharmony_ci		}
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci		if (map_deny_write_exec(vma, newflags)) {
80462306a36Sopenharmony_ci			error = -EACCES;
80562306a36Sopenharmony_ci			break;
80662306a36Sopenharmony_ci		}
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci		/* Allow architectures to sanity-check the new flags */
80962306a36Sopenharmony_ci		if (!arch_validate_flags(newflags)) {
81062306a36Sopenharmony_ci			error = -EINVAL;
81162306a36Sopenharmony_ci			break;
81262306a36Sopenharmony_ci		}
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci		error = security_file_mprotect(vma, reqprot, prot);
81562306a36Sopenharmony_ci		if (error)
81662306a36Sopenharmony_ci			break;
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci		tmp = vma->vm_end;
81962306a36Sopenharmony_ci		if (tmp > end)
82062306a36Sopenharmony_ci			tmp = end;
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci		if (vma->vm_ops && vma->vm_ops->mprotect) {
82362306a36Sopenharmony_ci			error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags);
82462306a36Sopenharmony_ci			if (error)
82562306a36Sopenharmony_ci				break;
82662306a36Sopenharmony_ci		}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci		error = mprotect_fixup(&vmi, &tlb, vma, &prev, nstart, tmp, newflags);
82962306a36Sopenharmony_ci		if (error)
83062306a36Sopenharmony_ci			break;
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci		tmp = vma_iter_end(&vmi);
83362306a36Sopenharmony_ci		nstart = tmp;
83462306a36Sopenharmony_ci		prot = reqprot;
83562306a36Sopenharmony_ci	}
83662306a36Sopenharmony_ci	tlb_finish_mmu(&tlb);
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	if (!error && tmp < end)
83962306a36Sopenharmony_ci		error = -ENOMEM;
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ciout:
84262306a36Sopenharmony_ci	mmap_write_unlock(current->mm);
84362306a36Sopenharmony_ci	return error;
84462306a36Sopenharmony_ci}
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ciSYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
84762306a36Sopenharmony_ci		unsigned long, prot)
84862306a36Sopenharmony_ci{
84962306a36Sopenharmony_ci	return do_mprotect_pkey(start, len, prot, -1);
85062306a36Sopenharmony_ci}
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci#ifdef CONFIG_ARCH_HAS_PKEYS
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ciSYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
85562306a36Sopenharmony_ci		unsigned long, prot, int, pkey)
85662306a36Sopenharmony_ci{
85762306a36Sopenharmony_ci	return do_mprotect_pkey(start, len, prot, pkey);
85862306a36Sopenharmony_ci}
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ciSYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
86162306a36Sopenharmony_ci{
86262306a36Sopenharmony_ci	int pkey;
86362306a36Sopenharmony_ci	int ret;
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	/* No flags supported yet. */
86662306a36Sopenharmony_ci	if (flags)
86762306a36Sopenharmony_ci		return -EINVAL;
86862306a36Sopenharmony_ci	/* check for unsupported init values */
86962306a36Sopenharmony_ci	if (init_val & ~PKEY_ACCESS_MASK)
87062306a36Sopenharmony_ci		return -EINVAL;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	mmap_write_lock(current->mm);
87362306a36Sopenharmony_ci	pkey = mm_pkey_alloc(current->mm);
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	ret = -ENOSPC;
87662306a36Sopenharmony_ci	if (pkey == -1)
87762306a36Sopenharmony_ci		goto out;
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	ret = arch_set_user_pkey_access(current, pkey, init_val);
88062306a36Sopenharmony_ci	if (ret) {
88162306a36Sopenharmony_ci		mm_pkey_free(current->mm, pkey);
88262306a36Sopenharmony_ci		goto out;
88362306a36Sopenharmony_ci	}
88462306a36Sopenharmony_ci	ret = pkey;
88562306a36Sopenharmony_ciout:
88662306a36Sopenharmony_ci	mmap_write_unlock(current->mm);
88762306a36Sopenharmony_ci	return ret;
88862306a36Sopenharmony_ci}
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ciSYSCALL_DEFINE1(pkey_free, int, pkey)
89162306a36Sopenharmony_ci{
89262306a36Sopenharmony_ci	int ret;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	mmap_write_lock(current->mm);
89562306a36Sopenharmony_ci	ret = mm_pkey_free(current->mm, pkey);
89662306a36Sopenharmony_ci	mmap_write_unlock(current->mm);
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci	/*
89962306a36Sopenharmony_ci	 * We could provide warnings or errors if any VMA still
90062306a36Sopenharmony_ci	 * has the pkey set here.
90162306a36Sopenharmony_ci	 */
90262306a36Sopenharmony_ci	return ret;
90362306a36Sopenharmony_ci}
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_ci#endif /* CONFIG_ARCH_HAS_PKEYS */
906