Lines Matching refs:page

33  * 05.04.94  -  Multi-page memory management added for v1.1.
39 * Aug/Sep 2004 Changed to four level page tables (Andi Kleen)
97 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
104 struct page *mem_map;
301 * This function frees user-level page tables of a process.
352 * We add page table cache pages with PAGE_SIZE,
418 * Ensure all pte setup (eg. pte page lock and page clearing) are
420 * put into page tables.
422 * The other side of the story is the pointer chasing in the page
423 * table walking code (when walking the page table without locking;
427 * seen in-order. See the alpha page table accessors for the
428 * smp_rmb() barriers in page table walking code.
491 pte_t pte, struct page *page)
513 pr_alert("BUG: Bad page map: %lu messages suppressed\n",
525 pr_alert("BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n",
528 if (page)
529 dump_page(page, "bad pte");
542 * vm_normal_page -- This function gets the "struct page" associated with a pte.
544 * "Special" mappings do not wish to be associated with a "struct page" (either
546 * case, NULL is returned here. "Normal" mappings do have a struct page.
575 * page" backing, however the difference is that _all_ pages with a struct
576 * page (that is, those where pfn_valid is true) are refcounted and considered
583 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
639 * NOTE! We still have PageReserved() pages in the page tables.
649 struct page *page = vm_normal_page(vma, addr, pte);
651 if (page)
652 return page_folio(page);
657 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
690 * NOTE! We still have PageReserved() pages in the page tables.
699 struct page *page, unsigned long address,
707 pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
717 VM_BUG_ON(pte_write(pte) && !(PageAnon(page) && PageAnonExclusive(page)));
720 * No need to take a page reference as one was already
723 if (PageAnon(page))
724 page_add_anon_rmap(page, vma, address, RMAP_NONE);
728 * memory so the entry shouldn't point to a filebacked page.
742 * Tries to restore an exclusive pte if the page lock can be acquired without
750 struct page *page = pfn_swap_entry_to_page(entry);
752 if (trylock_page(page)) {
753 restore_exclusive_pte(vma, page, addr, src_pte);
754 unlock_page(page);
762 * copy one vm_area from one task to the other. Assumes the page tables
775 struct page *page;
797 page = pfn_swap_entry_to_page(entry);
799 rss[mm_counter(page)]++;
818 page = pfn_swap_entry_to_page(entry);
829 get_page(page);
830 rss[mm_counter(page)]++;
832 BUG_ON(page_try_dup_anon_rmap(page, false, src_vma));
876 * Copy a present and normal page.
879 * instead, the caller can just increase the page refcount
882 * And if we need a pre-allocated page but don't yet have
884 * code know so that it can do so outside the page table
890 struct folio **prealloc, struct page *page)
900 * We have a prealloc page, all good! Take it
901 * over and copy the page & arm it.
904 copy_user_highpage(&new_folio->page, page, addr, src_vma);
910 /* All done, just insert the new page copy in the child */
911 pte = mk_pte(&new_folio->page, dst_vma->vm_page_prot);
921 * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page
932 struct page *page;
935 page = vm_normal_page(src_vma, addr, pte);
936 if (page)
937 folio = page_folio(page);
938 if (page && folio_test_anon(folio)) {
940 * If this page may have been pinned by the parent process,
941 * copy the page immediately for the child so that we'll always
942 * guarantee the pinned page won't be randomly replaced in the
946 if (unlikely(page_try_dup_anon_rmap(page, false, src_vma))) {
950 addr, rss, prealloc, page);
953 } else if (page) {
955 page_dup_file_rmap(page, false);
956 rss[mm_counter_file(page)]++;
967 VM_BUG_ON(page && folio_test_anon(folio) && PageAnonExclusive(page));
1087 * If we need a pre-allocated page for this pte, drop the
1094 * pre-alloc page cannot be reused by next time so as
1096 * will allocate page according to address). This
1239 * false when we can speed up fork() by allowing lazy page faults later until
1249 * retrieve from page cache, and skip copying will lose those info.
1261 * Don't copy ptes where a page fault will fill them correctly. Fork
1354 /* Decides whether we should zap this page with the page pointer specified */
1355 static inline bool should_zap_page(struct zap_details *details, struct page *page)
1357 /* If we can make a decision without *page.. */
1361 /* E.g. the caller passes NULL for the case of a zero page */
1362 if (!page)
1366 return !PageAnon(page);
1419 struct page *page;
1430 page = vm_normal_page(vma, addr, ptent);
1433 page = NULL;
1435 if (unlikely(!should_zap_page(details, page)))
1443 if (unlikely(!page)) {
1452 if (!PageAnon(page)) {
1454 set_page_dirty(page);
1461 mark_page_accessed(page);
1463 rss[mm_counter(page)]--;
1465 page_remove_rmap(page, vma, false);
1466 if (unlikely(page_mapcount(page) < 0))
1467 print_bad_pte(vma, addr, ptent, page);
1469 if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
1480 page = pfn_swap_entry_to_page(entry);
1481 if (unlikely(!should_zap_page(details, page)))
1485 * work with anonymous page so far, so we don't need to
1490 rss[mm_counter(page)]--;
1492 page_remove_rmap(page, vma, false);
1493 put_page(page);
1495 /* Genuine swap entry, hence a private anon page */
1502 page = pfn_swap_entry_to_page(entry);
1503 if (!should_zap_page(details, page))
1505 rss[mm_counter(page)]--;
1837 static int validate_page_before_insert(struct page *page)
1839 if (PageAnon(page) || PageSlab(page) || page_has_type(page))
1841 flush_dcache_page(page);
1846 unsigned long addr, struct page *page, pgprot_t prot)
1851 get_page(page);
1852 inc_mm_counter(vma->vm_mm, mm_counter_file(page));
1853 page_add_file_rmap(page, vma, false);
1854 set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot));
1859 * This is the old fallback for page remapping.
1866 struct page *page, pgprot_t prot)
1872 retval = validate_page_before_insert(page);
1879 retval = insert_page_into_pte_locked(vma, pte, addr, page, prot);
1886 unsigned long addr, struct page *page, pgprot_t prot)
1890 if (!page_count(page))
1892 err = validate_page_before_insert(page);
1895 return insert_page_into_pte_locked(vma, pte, addr, page, prot);
1902 struct page **pages, unsigned long *num, pgprot_t prot)
1975 struct page **pages, unsigned long *num)
1986 /* Defer page refcount checking till we're about to map that page. */
1992 * vm_insert_page - insert single page into user vma
1994 * @addr: target user address of this page
1995 * @page: source kernel page
2000 * The page has to be a nice clean _individual_ kernel allocation.
2001 * If you allocate a compound page, you need to have marked it as
2002 * such (__GFP_COMP), or manually just split the page up yourself
2006 * took an arbitrary page protection parameter. This doesn't allow
2011 * The page does not need to be reserved.
2016 * function from other places, for example from page-fault handler.
2021 struct page *page)
2025 if (!page_count(page))
2032 return insert_page(vma, addr, page, vma->vm_page_prot);
2040 * @num: number of pages in page array
2047 static int __vm_map_pages(struct vm_area_struct *vma, struct page **pages,
2076 * @num: number of pages in page array
2081 * If we fail to insert any page into the vma, the function will return
2090 int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
2101 * @num: number of pages in page array
2110 int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
2133 * mapped PFN is a writeable COW page. In the mkwrite
2174 * @addr: target user address of this page
2176 * @pgprot: pgprot flags for the inserted page
2179 * to override pgprot on a per-page basis.
2192 * This is ensured by core vm only modifying these page table entries using
2196 * Also when new page-table entries are created, this is only done using the
2198 * except for page-table entries that point to anonymous pages as the result
2235 * @addr: target user address of this page
2292 * refcount the page if pfn_valid is true (hence insert_page rather
2294 * without pte special, it would there be refcounted as a normal page.
2298 struct page *page;
2305 page = pfn_to_page(pfn_t_to_pfn(pfn));
2306 err = insert_page(vma, addr, page, pgprot);
2458 * raw PFN mappings, and do not have a "struct page" associated
2496 * @addr: target page aligned user address to start at
2497 * @pfn: page frame number of kernel physical memory address
2499 * @prot: page protection flags for this mapping
2544 * You *really* shouldn't map things that aren't page-aligned,
2761 * Scan a region of virtual memory, filling in page tables as necessary
2762 * and calling a provided function on each leaf page table.
2773 * each leaf page table where it exists.
2775 * Unlike apply_to_page_range, this does _not_ fill in page tables
2786 * handle_pte_fault chooses page fault handler according to an entry which was
2811 * -EHWPOISON: copy failed due to hwpoison in source page
2814 static inline int __wp_page_copy_user(struct page *dst, struct page *src,
2833 * If the source page was a PFN mapping, we don't have
2834 * a "struct page" for it. We do a best-effort copy by
2843 * take a double page fault, so mark it accessed here.
2867 * This really shouldn't fail, because the page is there
2868 * in the page tables. But it might just be unreadable,
2876 /* Re-validate under PTL if the page is still mapped */
2887 * The same page can be mapped back since last copy attempt.
2927 * Notify the address space that the page is about to become writable so that
2928 * it can prohibit this or wait for the page to get into an appropriate state.
2961 * Handle dirtying of a page in shared file mapping on a write fault.
2963 * The function expects the page to be locked and unlocks it.
2969 struct folio *folio = page_folio(vmf->page);
2988 * Throttle page dirtying rate down to writeback speed.
2991 * set page.mapping but still dirty their pages
3011 * Handle write page faults for pages that can be reused in the current vma
3014 * or due to us being the last reference standing to the page. In either
3015 * case, all we need to do here is to mark the page as writable and update
3022 struct page *page = vmf->page;
3026 VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page));
3033 if (page)
3034 page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
3046 * Handle the case of a page which we actually need to copy to a new page,
3049 * Called with mmap_lock locked and the old page referenced, but
3054 * - Allocate a page, copy the content of the old page to the new one.
3056 * - Take the PTL. If the pte changed, bail out and release the allocated page
3057 * - If the pte is still the way we remember it, update the page table and all
3058 * relevant references. This includes dropping the reference the page-table
3059 * held to the old page, as well as updating the rmap.
3060 * - In any case, unlock the PTL and drop the reference we took to the old page.
3076 if (vmf->page)
3077 old_folio = page_folio(vmf->page);
3091 ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
3107 kmsan_copy_page_meta(&new_folio->page, vmf->page);
3128 dec_mm_counter(mm, mm_counter_file(&old_folio->page));
3136 entry = mk_pte(&new_folio->page, vma->vm_page_prot);
3166 * mmu page tables (such as kvm shadow page tables), we want the
3167 * new page to be mapped directly into the secondary page table.
3174 * Only after switching the pte to the new page may
3177 * before the pte is switched to the new page, and
3178 * "reuse" the old page writing into it while our pte
3189 * no process can access the old page before the
3190 * decremented mapcount is visible. And the old page
3193 * old page will be flushed before it can be reused.
3195 page_remove_rmap(vmf->page, vma, false);
3198 /* Free the old page.. */
3213 free_swap_cache(&old_folio->page);
3230 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3231 * writeable once the page is prepared
3235 * This function handles all that is needed to finish a write page fault in a
3236 * shared mapping due to PTE being read-only once the mapped page is prepared.
3239 * The function expects the page to be locked or other protection against
3253 * We might have raced with another page fault while we released the
3266 * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
3334 * * users try to write to a shared page (FAULT_FLAG_WRITE)
3335 * * GUP wants to take a R/O pin on a possibly shared anonymous page
3338 * It is done by copying the page to a new address and decrementing the
3339 * shared-page counter for the old page.
3342 * done by the caller (the low-level page fault routine in most cases).
3346 * In case of FAULT_FLAG_WRITE, we also mark the page dirty at this point even
3347 * though the page will change only once the write actually happens. This
3376 vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
3378 if (vmf->page)
3379 folio = page_folio(vmf->page);
3393 if (!vmf->page)
3399 * Private mapping: create an exclusive anonymous page copy if reuse
3404 * If the page is exclusive to this process we must reuse the
3405 * page without further checks.
3407 if (PageAnonExclusive(vmf->page))
3440 page_move_anon_rmap(vmf->page, vma);
3507 * the page has been remapped again: and then uses unmap_mapping_folio()
3536 * @start: Index of first page to be unmapped.
3542 * a file is being truncated, but not when invalidating pages from the page
3570 * @holebegin: byte in first page to unmap, relative to the start of
3573 * must keep the partial page. In contrast, we must get rid of
3604 struct folio *folio = page_folio(vmf->page);
3633 restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte);
3654 * If we want to map a page that's in the swapcache writable, we
3692 * This is actually a page-missing access, but with uffd-wp special pte
3742 struct page *page;
3761 vmf->page = pfn_swap_entry_to_page(entry);
3774 vmf->page = pfn_swap_entry_to_page(entry);
3783 * Get a page reference while we know the page can't be
3786 get_page(vmf->page);
3788 ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
3789 put_page(vmf->page);
3808 page = folio_file_page(folio, swp_offset(entry));
3822 /* Relax a bit to prevent rapid repeated page faults */
3831 page = &folio->page;
3852 swap_readpage(page, true, NULL);
3856 page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
3858 if (page)
3859 folio = page_folio(page);
3876 /* Had to read the page from swap area: Major fault */
3880 } else if (PageHWPoison(page)) {
3896 * swapcache from under us. The page pin, and pte_same test
3898 * swapcache, we need to check that the page's swap has not
3902 page_swap_entry(page).val != entry.val))
3907 * page->index of !PageKSM() pages would be nonlinear inside the
3910 page = ksm_might_need_to_copy(page, vma, vmf->address);
3911 if (unlikely(!page)) {
3914 } else if (unlikely(PTR_ERR(page) == -EHWPOISON)) {
3918 folio = page_folio(page);
3921 * If we want to map a page that's in the swapcache writable, we
3948 * must never point at an anonymous page in the swapcache that is
3950 * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity
3952 * concurrently faulted in this page and set PG_anon_exclusive.
3955 BUG_ON(folio_test_anon(folio) && PageAnonExclusive(page));
3965 * We have a fresh page that is not exposed to the
3973 * concurrent page modifications while under writeback.
3975 * So if we stumble over such a page in the swapcache
3976 * we must not set the page exclusive, otherwise we can
3982 * writeback only if we fully unmapped the page and
3983 * there are no unexpected references on the page after
3994 * Some architectures may have to restore extra metadata to the page
4002 * We're already holding a reference on the page but haven't mapped it
4011 pte = mk_pte(page, vma->vm_page_prot);
4027 flush_icache_page(vma, page);
4036 page_add_new_anon_rmap(page, vma, vmf->address);
4039 page_add_anon_rmap(page, vma, vmf->address, rmap_flags);
4043 (pte_write(pte) && !PageAnonExclusive(page)));
4123 /* use extra page table for userexpte */
4131 /* Use the zero-page for reads */
4150 /* Deliver the page fault to userland, check inside PT lock */
4158 /* Allocate our own private page. */
4171 * preceding stores to the page contents become visible before
4176 entry = mk_pte(&folio->page, vma->vm_page_prot);
4194 /* Deliver the page fault to userland, check inside PT lock */
4268 if (unlikely(PageHWPoison(vmf->page))) {
4269 struct page *page = vmf->page;
4272 if (page_mapped(page))
4273 unmap_mapping_pages(page_mapping(page),
4274 page->index, 1, false);
4275 /* Retry if a clean page was removed from the cache. */
4276 if (invalidate_inode_page(page))
4278 unlock_page(page);
4280 put_page(page);
4281 vmf->page = NULL;
4286 lock_page(vmf->page);
4288 VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page);
4307 vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
4318 page = compound_head(page);
4319 if (compound_order(page) != HPAGE_PMD_ORDER)
4324 * the corrupted page may mapped by PMD silently to escape the
4328 if (unlikely(PageHasHWPoisoned(page)))
4345 flush_icache_pages(vma, page, HPAGE_PMD_NR);
4347 entry = mk_huge_pmd(page, vma->vm_page_prot);
4351 add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
4352 page_add_file_rmap(page, vma, true);
4372 vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
4381 * @folio: The folio that contains @page.
4382 * @page: The first page to create a PTE for.
4387 struct page *page, unsigned int nr, unsigned long addr)
4395 flush_icache_pages(vma, page, nr);
4396 entry = mk_pte(page, vma->vm_page_prot);
4407 /* copy-on-write page */
4414 add_mm_counter(vma->vm_mm, mm_counter_file(page), nr);
4415 folio_add_file_rmap_range(folio, page, nr, vma, false);
4419 /* no need to invalidate: a not-present page won't be cached */
4432 * finish_fault - finish page fault once we have prepared the page to fault
4436 * This function handles all that is needed to finish a page fault once the
4437 * page to fault in is prepared. It handles locking of PTEs, inserts PTE for
4438 * given page, adds reverse page mapping, handles memcg charges and LRU
4441 * The function expects the page to be locked and on success it consumes a
4442 * reference of a page being mapped (for the PTE which maps it).
4449 struct page *page;
4452 /* Did we COW the page? */
4454 page = vmf->cow_page;
4456 page = vmf->page;
4460 * page
4469 if (PageTransCompound(page)) {
4470 ret = do_set_pmd(vmf, page);
4488 struct folio *folio = page_folio(page);
4490 set_pte_range(vmf, folio, page, 1, vmf->address);
4512 * fault_around_bytes must be rounded down to the nearest page order as it's
4521 * The minimum value is 1 page, however this results in no fault-around
4545 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
4548 * This function doesn't cross VMA or page table boundaries, in order to call
4556 * fault_around_pages * PAGE_SIZE rounded down to the machine page size
4557 * (and therefore to page order). This way it's easier to guarantee
4558 * that we don't cross page table boundaries.
4564 /* The page offset of vmf->address within the VMA. */
4602 /* A single page implies no faulting 'around' at all. */
4613 * if page by the offset is not ready to be mapped (cold cache or
4632 folio = page_folio(vmf->page);
4669 copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
4673 unlock_page(vmf->page);
4674 put_page(vmf->page);
4698 folio = page_folio(vmf->page);
4701 * Check if the backing address space wants to know that the page is
4778 int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
4781 get_page(page);
4792 return mpol_misplaced(page, vma, addr);
4798 struct page *page = NULL;
4830 page = vm_normal_page(vma, vmf->address, pte);
4831 if (!page || is_zone_device_page(page))
4835 if (PageCompound(page))
4850 * Flag if the page is shared between multiple address spaces. This
4853 if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
4856 page_nid = page_to_nid(page);
4858 * For memory tiering mode, cpupid of slow memory page is used
4859 * to record page access time. So use default value.
4865 last_cpupid = page_cpupid_last(page);
4866 target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
4869 put_page(page);
4876 if (migrate_misplaced_page(page, vma, target_nid)) {
4995 * PowerPC hashed page tables that act as extended TLBs).
5010 * want to allocate huge page, and if we expose page table
5063 /* Skip spurious TLB flush for retried page fault */
5069 * This still avoids useless tlb flushes for .text page faults
5143 /* Huge pud page fault raced with pmd_alloc? */
5182 * mm_account_fault - Do page fault accounting
5186 * the task who triggered this page fault.
5191 * This will take care of most of the page fault accounting. Meanwhile, it
5194 * still be in per-arch page fault handlers at the entry of page fault.
5217 * reaching here. So this is not a "this many hardware page faults"
5407 * Helper for page fault handling.
5413 * For example, if we have a kernel bug that causes a page
5537 * Allocate p4d page table.
5560 * Allocate page upper directory.
5583 * Allocate page middle directory.
5736 * not page based.
5816 struct page *page = get_user_page_vma_remote(mm, addr,
5819 if (IS_ERR_OR_NULL(page)) {
5852 maddr = kmap(page);
5854 copy_to_user_page(vma, page, addr,
5856 set_page_dirty_lock(page);
5858 copy_from_user_page(vma, page, addr,
5861 kunmap(page);
5862 put_page(page);
5894 * Do not walk the page table directly, use get_user_pages
5963 * Process all subpages of the specified huge page with the specified
5980 /* If target subpage in first half of huge page */
5983 /* Process subpages at the end of huge page */
5991 /* If target subpage in second half of huge page */
5994 /* Process subpages at the begin of huge page */
6022 static void clear_gigantic_page(struct page *page,
6027 struct page *p;
6031 p = nth_page(page, i);
6039 struct page *page = arg;
6041 clear_user_highpage(page + idx, addr);
6045 void clear_huge_page(struct page *page,
6052 clear_gigantic_page(page, addr, pages_per_huge_page);
6056 process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page);
6065 struct page *dst_page;
6066 struct page *src_page;
6083 struct page *dst;
6084 struct page *src;
6107 .dst = &dst->page,
6108 .src = &src->page,
6127 struct page *subpage;
6157 page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,