memory.c - OpenGrok cross reference for /kernel/linux/linux-6.6/mm/memory.c

Lines Matching refs:page
33  * 05.04.94  -  Multi-page memory management added for v1.1.
39  * Aug/Sep 2004 Changed to four level page tables (Andi Kleen)
97 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
104 struct page *mem_map;
301  * This function frees user-level page tables of a process.
352 	 * We add page table cache pages with PAGE_SIZE,
418 		 * Ensure all pte setup (eg. pte page lock and page clearing) are
420 		 * put into page tables.
422 		 * The other side of the story is the pointer chasing in the page
423 		 * table walking code (when walking the page table without locking;
427 		 * seen in-order. See the alpha page table accessors for the
428 		 * smp_rmb() barriers in page table walking code.
491 			  pte_t pte, struct page *page)
513 			pr_alert("BUG: Bad page map: %lu messages suppressed\n",
525 	pr_alert("BUG: Bad page map in process %s  pte:%08llx pmd:%08llx\n",
528 	if (page)
529 		dump_page(page, "bad pte");
542  * vm_normal_page -- This function gets the "struct page" associated with a pte.
544  * "Special" mappings do not wish to be associated with a "struct page" (either
546  * case, NULL is returned here. "Normal" mappings do have a struct page.
575  * page" backing, however the difference is that _all_ pages with a struct
576  * page (that is, those where pfn_valid is true) are refcounted and considered
583 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
639 	 * NOTE! We still have PageReserved() pages in the page tables.
649 	struct page *page = vm_normal_page(vma, addr, pte);
651 	if (page)
652 		return page_folio(page);
657 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
690 	 * NOTE! We still have PageReserved() pages in the page tables.
699 				  struct page *page, unsigned long address,
707 	pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
717 	VM_BUG_ON(pte_write(pte) && !(PageAnon(page) && PageAnonExclusive(page)));
720 	 * No need to take a page reference as one was already
723 	if (PageAnon(page))
724 		page_add_anon_rmap(page, vma, address, RMAP_NONE);
728 		 * memory so the entry shouldn't point to a filebacked page.
742  * Tries to restore an exclusive pte if the page lock can be acquired without
750 	struct page *page = pfn_swap_entry_to_page(entry);
752 	if (trylock_page(page)) {
753 		restore_exclusive_pte(vma, page, addr, src_pte);
754 		unlock_page(page);
762  * copy one vm_area from one task to the other. Assumes the page tables
775 	struct page *page;
797 		page = pfn_swap_entry_to_page(entry);
799 		rss[mm_counter(page)]++;
818 		page = pfn_swap_entry_to_page(entry);
829 		get_page(page);
830 		rss[mm_counter(page)]++;
832 		BUG_ON(page_try_dup_anon_rmap(page, false, src_vma));
876  * Copy a present and normal page.
879  * instead, the caller can just increase the page refcount
882  * And if we need a pre-allocated page but don't yet have
884  * code know so that it can do so outside the page table
890 		  struct folio **prealloc, struct page *page)
900 	 * We have a prealloc page, all good!  Take it
901 	 * over and copy the page & arm it.
904 	copy_user_highpage(&new_folio->page, page, addr, src_vma);
910 	/* All done, just insert the new page copy in the child */
911 	pte = mk_pte(&new_folio->page, dst_vma->vm_page_prot);
921  * Copy one pte.  Returns 0 if succeeded, or -EAGAIN if one preallocated page
932 	struct page *page;
935 	page = vm_normal_page(src_vma, addr, pte);
936 	if (page)
937 		folio = page_folio(page);
938 	if (page && folio_test_anon(folio)) {
940 		 * If this page may have been pinned by the parent process,
941 		 * copy the page immediately for the child so that we'll always
942 		 * guarantee the pinned page won't be randomly replaced in the
946 		if (unlikely(page_try_dup_anon_rmap(page, false, src_vma))) {
950 						 addr, rss, prealloc, page);
953 	} else if (page) {
955 		page_dup_file_rmap(page, false);
956 		rss[mm_counter_file(page)]++;
967 	VM_BUG_ON(page && folio_test_anon(folio) && PageAnonExclusive(page));
1087 		 * If we need a pre-allocated page for this pte, drop the
1094 			 * pre-alloc page cannot be reused by next time so as
1096 			 * will allocate page according to address).  This
1239  * false when we can speed up fork() by allowing lazy page faults later until
1249 	 * retrieve from page cache, and skip copying will lose those info.
1261 	 * Don't copy ptes where a page fault will fill them correctly.  Fork
1354 /* Decides whether we should zap this page with the page pointer specified */
1355 static inline bool should_zap_page(struct zap_details *details, struct page *page)
1357 	/* If we can make a decision without *page.. */
1361 	/* E.g. the caller passes NULL for the case of a zero page */
1362 	if (!page)
1366 	return !PageAnon(page);
1419 		struct page *page;
1430 			page = vm_normal_page(vma, addr, ptent);
1433 				page =  NULL;
1435 			if (unlikely(!should_zap_page(details, page)))
1443 			if (unlikely(!page)) {
1452 			if (!PageAnon(page)) {
1454 					set_page_dirty(page);
1461 					mark_page_accessed(page);
1463 			rss[mm_counter(page)]--;
1465 				page_remove_rmap(page, vma, false);
1466 				if (unlikely(page_mapcount(page) < 0))
1467 					print_bad_pte(vma, addr, ptent, page);
1469 			if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
1480 			page = pfn_swap_entry_to_page(entry);
1481 			if (unlikely(!should_zap_page(details, page)))
1485 			 * work with anonymous page so far, so we don't need to
1490 			rss[mm_counter(page)]--;
1492 				page_remove_rmap(page, vma, false);
1493 			put_page(page);
1495 			/* Genuine swap entry, hence a private anon page */
1502 			page = pfn_swap_entry_to_page(entry);
1503 			if (!should_zap_page(details, page))
1505 			rss[mm_counter(page)]--;
1837 static int validate_page_before_insert(struct page *page)
1839 	if (PageAnon(page) || PageSlab(page) || page_has_type(page))
1841 	flush_dcache_page(page);
1846 			unsigned long addr, struct page *page, pgprot_t prot)
1851 	get_page(page);
1852 	inc_mm_counter(vma->vm_mm, mm_counter_file(page));
1853 	page_add_file_rmap(page, vma, false);
1854 	set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot));
1859  * This is the old fallback for page remapping.
1866 			struct page *page, pgprot_t prot)
1872 	retval = validate_page_before_insert(page);
1879 	retval = insert_page_into_pte_locked(vma, pte, addr, page, prot);
1886 			unsigned long addr, struct page *page, pgprot_t prot)
1890 	if (!page_count(page))
1892 	err = validate_page_before_insert(page);
1895 	return insert_page_into_pte_locked(vma, pte, addr, page, prot);
1902 			struct page **pages, unsigned long *num, pgprot_t prot)
1975 			struct page **pages, unsigned long *num)
1986 	/* Defer page refcount checking till we're about to map that page. */
1992  * vm_insert_page - insert single page into user vma
1994  * @addr: target user address of this page
1995  * @page: source kernel page
2000  * The page has to be a nice clean _individual_ kernel allocation.
2001  * If you allocate a compound page, you need to have marked it as
2002  * such (__GFP_COMP), or manually just split the page up yourself
2006  * took an arbitrary page protection parameter. This doesn't allow
2011  * The page does not need to be reserved.
2016  * function from other places, for example from page-fault handler.
2021 			struct page *page)
2025 	if (!page_count(page))
2032 	return insert_page(vma, addr, page, vma->vm_page_prot);
2040  * @num: number of pages in page array
2047 static int __vm_map_pages(struct vm_area_struct *vma, struct page **pages,
2076  * @num: number of pages in page array
2081  * If we fail to insert any page into the vma, the function will return
2090 int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
2101  * @num: number of pages in page array
2110 int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
2133 			 * mapped PFN is a writeable COW page.  In the mkwrite
2174  * @addr: target user address of this page
2176  * @pgprot: pgprot flags for the inserted page
2179  * to override pgprot on a per-page basis.
2192  * This is ensured by core vm only modifying these page table entries using
2196  * Also when new page-table entries are created, this is only done using the
2198  * except for page-table entries that point to anonymous pages as the result
2235  * @addr: target user address of this page
2292 	 * refcount the page if pfn_valid is true (hence insert_page rather
2294 	 * without pte special, it would there be refcounted as a normal page.
2298 		struct page *page;
2305 		page = pfn_to_page(pfn_t_to_pfn(pfn));
2306 		err = insert_page(vma, addr, page, pgprot);
2458 	 *	raw PFN mappings, and do not have a "struct page" associated
2496  * @addr: target page aligned user address to start at
2497  * @pfn: page frame number of kernel physical memory address
2499  * @prot: page protection flags for this mapping
2544 	 * You *really* shouldn't map things that aren't page-aligned,
2761  * Scan a region of virtual memory, filling in page tables as necessary
2762  * and calling a provided function on each leaf page table.
2773  * each leaf page table where it exists.
2775  * Unlike apply_to_page_range, this does _not_ fill in page tables
2786  * handle_pte_fault chooses page fault handler according to an entry which was
2811  *	-EHWPOISON:	copy failed due to hwpoison in source page
2814 static inline int __wp_page_copy_user(struct page *dst, struct page *src,
2833 	 * If the source page was a PFN mapping, we don't have
2834 	 * a "struct page" for it. We do a best-effort copy by
2843 	 * take a double page fault, so mark it accessed here.
2867 	 * This really shouldn't fail, because the page is there
2868 	 * in the page tables. But it might just be unreadable,
2876 		/* Re-validate under PTL if the page is still mapped */
2887 		 * The same page can be mapped back since last copy attempt.
2927  * Notify the address space that the page is about to become writable so that
2928  * it can prohibit this or wait for the page to get into an appropriate state.
2961  * Handle dirtying of a page in shared file mapping on a write fault.
2963  * The function expects the page to be locked and unlocks it.
2969 	struct folio *folio = page_folio(vmf->page);
2988 	 * Throttle page dirtying rate down to writeback speed.
2991 	 * set page.mapping but still dirty their pages
3011  * Handle write page faults for pages that can be reused in the current vma
3014  * or due to us being the last reference standing to the page. In either
3015  * case, all we need to do here is to mark the page as writable and update
3022 	struct page *page = vmf->page;
3026 	VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page));
3033 	if (page)
3034 		page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
3046  * Handle the case of a page which we actually need to copy to a new page,
3049  * Called with mmap_lock locked and the old page referenced, but
3054  * - Allocate a page, copy the content of the old page to the new one.
3056  * - Take the PTL. If the pte changed, bail out and release the allocated page
3057  * - If the pte is still the way we remember it, update the page table and all
3058  *   relevant references. This includes dropping the reference the page-table
3059  *   held to the old page, as well as updating the rmap.
3060  * - In any case, unlock the PTL and drop the reference we took to the old page.
3076 	if (vmf->page)
3077 		old_folio = page_folio(vmf->page);
3091 		ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
3107 		kmsan_copy_page_meta(&new_folio->page, vmf->page);
3128 				dec_mm_counter(mm, mm_counter_file(&old_folio->page));
3136 		entry = mk_pte(&new_folio->page, vma->vm_page_prot);
3166 		 * mmu page tables (such as kvm shadow page tables), we want the
3167 		 * new page to be mapped directly into the secondary page table.
3174 			 * Only after switching the pte to the new page may
3177 			 * before the pte is switched to the new page, and
3178 			 * "reuse" the old page writing into it while our pte
3189 			 * no process can access the old page before the
3190 			 * decremented mapcount is visible. And the old page
3193 			 * old page will be flushed before it can be reused.
3195 			page_remove_rmap(vmf->page, vma, false);
3198 		/* Free the old page.. */
3213 			free_swap_cache(&old_folio->page);
3230  * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3231  *			  writeable once the page is prepared
3235  * This function handles all that is needed to finish a write page fault in a
3236  * shared mapping due to PTE being read-only once the mapped page is prepared.
3239  * The function expects the page to be locked or other protection against
3253 	 * We might have raced with another page fault while we released the
3266  * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
3334  * * users try to write to a shared page (FAULT_FLAG_WRITE)
3335  * * GUP wants to take a R/O pin on a possibly shared anonymous page
3338  * It is done by copying the page to a new address and decrementing the
3339  * shared-page counter for the old page.
3342  * done by the caller (the low-level page fault routine in most cases).
3346  * In case of FAULT_FLAG_WRITE, we also mark the page dirty at this point even
3347  * though the page will change only once the write actually happens. This
3376 	vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
3378 	if (vmf->page)
3379 		folio = page_folio(vmf->page);
3393 		if (!vmf->page)
3399 	 * Private mapping: create an exclusive anonymous page copy if reuse
3404 		 * If the page is exclusive to this process we must reuse the
3405 		 * page without further checks.
3407 		if (PageAnonExclusive(vmf->page))
3440 		page_move_anon_rmap(vmf->page, vma);
3507  * the page has been remapped again: and then uses unmap_mapping_folio()
3536  * @start: Index of first page to be unmapped.
3542  * a file is being truncated, but not when invalidating pages from the page
3570  * @holebegin: byte in first page to unmap, relative to the start of
3573  * must keep the partial page.  In contrast, we must get rid of
3604 	struct folio *folio = page_folio(vmf->page);
3633 		restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte);
3654 	 * If we want to map a page that's in the swapcache writable, we
3692  * This is actually a page-missing access, but with uffd-wp special pte
3742 	struct page *page;
3761 			vmf->page = pfn_swap_entry_to_page(entry);
3774 			vmf->page = pfn_swap_entry_to_page(entry);
3783 			 * Get a page reference while we know the page can't be
3786 			get_page(vmf->page);
3788 			ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
3789 			put_page(vmf->page);
3808 		page = folio_file_page(folio, swp_offset(entry));
3822 				/* Relax a bit to prevent rapid repeated page faults */
3831 			page = &folio->page;
3852 				swap_readpage(page, true, NULL);
3856 			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
3858 			if (page)
3859 				folio = page_folio(page);
3876 		/* Had to read the page from swap area: Major fault */
3880 	} else if (PageHWPoison(page)) {
3896 		 * swapcache from under us.  The page pin, and pte_same test
3898 		 * swapcache, we need to check that the page's swap has not
3902 			     page_swap_entry(page).val != entry.val))
3907 		 * page->index of !PageKSM() pages would be nonlinear inside the
3910 		page = ksm_might_need_to_copy(page, vma, vmf->address);
3911 		if (unlikely(!page)) {
3914 		} else if (unlikely(PTR_ERR(page) == -EHWPOISON)) {
3918 		folio = page_folio(page);
3921 		 * If we want to map a page that's in the swapcache writable, we
3948 	 * must never point at an anonymous page in the swapcache that is
3950 	 * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity
3952 	 * concurrently faulted in this page and set PG_anon_exclusive.
3955 	BUG_ON(folio_test_anon(folio) && PageAnonExclusive(page));
3965 			 * We have a fresh page that is not exposed to the
3973 			 * concurrent page modifications while under writeback.
3975 			 * So if we stumble over such a page in the swapcache
3976 			 * we must not set the page exclusive, otherwise we can
3982 			 * writeback only if we fully unmapped the page and
3983 			 * there are no unexpected references on the page after
3994 	 * Some architectures may have to restore extra metadata to the page
4002 	 * We're already holding a reference on the page but haven't mapped it
4011 	pte = mk_pte(page, vma->vm_page_prot);
4027 	flush_icache_page(vma, page);
4036 		page_add_new_anon_rmap(page, vma, vmf->address);
4039 		page_add_anon_rmap(page, vma, vmf->address, rmap_flags);
4043 			(pte_write(pte) && !PageAnonExclusive(page)));
4123 	/* use extra page table for userexpte */
4131 	/* Use the zero-page for reads */
4150 		/* Deliver the page fault to userland, check inside PT lock */
4158 	/* Allocate our own private page. */
4171 	 * preceding stores to the page contents become visible before
4176 	entry = mk_pte(&folio->page, vma->vm_page_prot);
4194 	/* Deliver the page fault to userland, check inside PT lock */
4268 	if (unlikely(PageHWPoison(vmf->page))) {
4269 		struct page *page = vmf->page;
4272 			if (page_mapped(page))
4273 				unmap_mapping_pages(page_mapping(page),
4274 						    page->index, 1, false);
4275 			/* Retry if a clean page was removed from the cache. */
4276 			if (invalidate_inode_page(page))
4278 			unlock_page(page);
4280 		put_page(page);
4281 		vmf->page = NULL;
4286 		lock_page(vmf->page);
4288 		VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page);
4307 vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
4318 	page = compound_head(page);
4319 	if (compound_order(page) != HPAGE_PMD_ORDER)
4324 	 * the corrupted page may mapped by PMD silently to escape the
4328 	if (unlikely(PageHasHWPoisoned(page)))
4345 	flush_icache_pages(vma, page, HPAGE_PMD_NR);
4347 	entry = mk_huge_pmd(page, vma->vm_page_prot);
4351 	add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
4352 	page_add_file_rmap(page, vma, true);
4372 vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
4381  * @folio: The folio that contains @page.
4382  * @page: The first page to create a PTE for.
4387 		struct page *page, unsigned int nr, unsigned long addr)
4395 	flush_icache_pages(vma, page, nr);
4396 	entry = mk_pte(page, vma->vm_page_prot);
4407 	/* copy-on-write page */
4414 		add_mm_counter(vma->vm_mm, mm_counter_file(page), nr);
4415 		folio_add_file_rmap_range(folio, page, nr, vma, false);
4419 	/* no need to invalidate: a not-present page won't be cached */
4432  * finish_fault - finish page fault once we have prepared the page to fault
4436  * This function handles all that is needed to finish a page fault once the
4437  * page to fault in is prepared. It handles locking of PTEs, inserts PTE for
4438  * given page, adds reverse page mapping, handles memcg charges and LRU
4441  * The function expects the page to be locked and on success it consumes a
4442  * reference of a page being mapped (for the PTE which maps it).
4449 	struct page *page;
4452 	/* Did we COW the page? */
4454 		page = vmf->cow_page;
4456 		page = vmf->page;
4460 	 * page
4469 		if (PageTransCompound(page)) {
4470 			ret = do_set_pmd(vmf, page);
4488 		struct folio *folio = page_folio(page);
4490 		set_pte_range(vmf, folio, page, 1, vmf->address);
4512  * fault_around_bytes must be rounded down to the nearest page order as it's
4521 	 * The minimum value is 1 page, however this results in no fault-around
4545  * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
4548  * This function doesn't cross VMA or page table boundaries, in order to call
4556  * fault_around_pages * PAGE_SIZE rounded down to the machine page size
4557  * (and therefore to page order).  This way it's easier to guarantee
4558  * that we don't cross page table boundaries.
4564 	/* The page offset of vmf->address within the VMA. */
4602 	/* A single page implies no faulting 'around' at all. */
4613 	 * if page by the offset is not ready to be mapped (cold cache or
4632 	folio = page_folio(vmf->page);
4669 	copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
4673 	unlock_page(vmf->page);
4674 	put_page(vmf->page);
4698 	folio = page_folio(vmf->page);
4701 	 * Check if the backing address space wants to know that the page is
4778 int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
4781 	get_page(page);
4792 	return mpol_misplaced(page, vma, addr);
4798 	struct page *page = NULL;
4830 	page = vm_normal_page(vma, vmf->address, pte);
4831 	if (!page || is_zone_device_page(page))
4835 	if (PageCompound(page))
4850 	 * Flag if the page is shared between multiple address spaces. This
4853 	if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
4856 	page_nid = page_to_nid(page);
4858 	 * For memory tiering mode, cpupid of slow memory page is used
4859 	 * to record page access time.  So use default value.
4865 		last_cpupid = page_cpupid_last(page);
4866 	target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
4869 		put_page(page);
4876 	if (migrate_misplaced_page(page, vma, target_nid)) {
4995  * PowerPC hashed page tables that act as extended TLBs).
5010 		 * want to allocate huge page, and if we expose page table
5063 		/* Skip spurious TLB flush for retried page fault */
5069 		 * This still avoids useless tlb flushes for .text page faults
5143 	/* Huge pud page fault raced with pmd_alloc? */
5182  * mm_account_fault - Do page fault accounting
5186  *        the task who triggered this page fault.
5191  * This will take care of most of the page fault accounting.  Meanwhile, it
5194  * still be in per-arch page fault handlers at the entry of page fault.
5217 	 * reaching here. So this is not a "this many hardware page faults"
5407  * Helper for page fault handling.
5413  * For example, if we have a kernel bug that causes a page
5537  * Allocate p4d page table.
5560  * Allocate page upper directory.
5583  * Allocate page middle directory.
5736  * not page based.
5816 		struct page *page = get_user_page_vma_remote(mm, addr,
5819 		if (IS_ERR_OR_NULL(page)) {
5852 			maddr = kmap(page);
5854 				copy_to_user_page(vma, page, addr,
5856 				set_page_dirty_lock(page);
5858 				copy_from_user_page(vma, page, addr,
5861 			kunmap(page);
5862 			put_page(page);
5894  * Do not walk the page table directly, use get_user_pages
5963  * Process all subpages of the specified huge page with the specified
5980 		/* If target subpage in first half of huge page */
5983 		/* Process subpages at the end of huge page */
5991 		/* If target subpage in second half of huge page */
5994 		/* Process subpages at the begin of huge page */
6022 static void clear_gigantic_page(struct page *page,
6027 	struct page *p;
6031 		p = nth_page(page, i);
6039 	struct page *page = arg;
6041 	clear_user_highpage(page + idx, addr);
6045 void clear_huge_page(struct page *page,
6052 		clear_gigantic_page(page, addr, pages_per_huge_page);
6056 	process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page);
6065 	struct page *dst_page;
6066 	struct page *src_page;
6083 	struct page *dst;
6084 	struct page *src;
6107 		.dst = &dst->page,
6108 		.src = &src->page,
6127 	struct page *subpage;
6157 	page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,