162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* internal.h: mm/ internal definitions 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 562306a36Sopenharmony_ci * Written by David Howells (dhowells@redhat.com) 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#ifndef __MM_INTERNAL_H 862306a36Sopenharmony_ci#define __MM_INTERNAL_H 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/fs.h> 1162306a36Sopenharmony_ci#include <linux/mm.h> 1262306a36Sopenharmony_ci#include <linux/pagemap.h> 1362306a36Sopenharmony_ci#include <linux/swap.h> 1462306a36Sopenharmony_ci#include <linux/rmap.h> 1562306a36Sopenharmony_ci#include <linux/tracepoint-defs.h> 1662306a36Sopenharmony_ci#include <linux/types.h> 1762306a36Sopenharmony_ci#include <linux/reclaim_acct.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_cistruct folio_batch; 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci/* 2262306a36Sopenharmony_ci * The set of flags that only affect watermark checking and reclaim 2362306a36Sopenharmony_ci * behaviour. This is used by the MM to obey the caller constraints 2462306a36Sopenharmony_ci * about IO, FS and watermark checking while ignoring placement 2562306a36Sopenharmony_ci * hints such as HIGHMEM usage. 2662306a36Sopenharmony_ci */ 2762306a36Sopenharmony_ci#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ 2862306a36Sopenharmony_ci __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\ 2962306a36Sopenharmony_ci __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ 3062306a36Sopenharmony_ci __GFP_NOLOCKDEP) 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/* The GFP flags allowed during early boot */ 3362306a36Sopenharmony_ci#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* Control allocation cpuset and node placement constraints */ 3662306a36Sopenharmony_ci#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci/* Do not use these with a slab allocator */ 3962306a36Sopenharmony_ci#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cienum reclaim_invoker { 4262306a36Sopenharmony_ci ALL, 4362306a36Sopenharmony_ci KSWAPD, 4462306a36Sopenharmony_ci ZSWAPD, 4562306a36Sopenharmony_ci DIRECT_RECLAIM, 4662306a36Sopenharmony_ci NODE_RECLAIM, 4762306a36Sopenharmony_ci SOFT_LIMIT, 4862306a36Sopenharmony_ci RCC_RECLAIM, 4962306a36Sopenharmony_ci FILE_RECLAIM, 5062306a36Sopenharmony_ci ANON_RECLAIM 5162306a36Sopenharmony_ci}; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistruct scan_control { 5462306a36Sopenharmony_ci /* How many pages shrink_list() should reclaim */ 5562306a36Sopenharmony_ci unsigned long nr_to_reclaim; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci /* 5862306a36Sopenharmony_ci * Nodemask of nodes allowed by the caller. If NULL, all nodes 5962306a36Sopenharmony_ci * are scanned. 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_ci nodemask_t *nodemask; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci /* 6462306a36Sopenharmony_ci * The memory cgroup that hit its limit and as a result is the 6562306a36Sopenharmony_ci * primary target of this reclaim invocation. 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_ci struct mem_cgroup *target_mem_cgroup; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci /* 7062306a36Sopenharmony_ci * Scan pressure balancing between anon and file LRUs 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_ci unsigned long anon_cost; 7362306a36Sopenharmony_ci unsigned long file_cost; 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci /* Can active folios be deactivated as part of reclaim? */ 7662306a36Sopenharmony_ci#define DEACTIVATE_ANON 1 7762306a36Sopenharmony_ci#define DEACTIVATE_FILE 2 7862306a36Sopenharmony_ci unsigned int may_deactivate:2; 7962306a36Sopenharmony_ci unsigned int force_deactivate:1; 8062306a36Sopenharmony_ci unsigned int skipped_deactivate:1; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci /* Writepage batching in laptop mode; RECLAIM_WRITE */ 8362306a36Sopenharmony_ci unsigned int may_writepage:1; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci /* Can mapped folios be reclaimed? */ 8662306a36Sopenharmony_ci unsigned int may_unmap:1; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci /* Can folios be swapped as part of reclaim? */ 8962306a36Sopenharmony_ci unsigned int may_swap:1; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci /* Proactive reclaim invoked by userspace through memory.reclaim */ 9262306a36Sopenharmony_ci unsigned int proactive:1; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci /* 9562306a36Sopenharmony_ci * Cgroup memory below memory.low is protected as long as we 9662306a36Sopenharmony_ci * don't threaten to OOM. If any cgroup is reclaimed at 9762306a36Sopenharmony_ci * reduced force or passed over entirely due to its memory.low 9862306a36Sopenharmony_ci * setting (memcg_low_skipped), and nothing is reclaimed as a 9962306a36Sopenharmony_ci * result, then go back for one more cycle that reclaims the protected 10062306a36Sopenharmony_ci * memory (memcg_low_reclaim) to avert OOM. 10162306a36Sopenharmony_ci */ 10262306a36Sopenharmony_ci unsigned int memcg_low_reclaim:1; 10362306a36Sopenharmony_ci unsigned int memcg_low_skipped:1; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci unsigned int hibernation_mode:1; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci /* One of the zones is ready for compaction */ 10862306a36Sopenharmony_ci unsigned int compaction_ready:1; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci /* There is easily reclaimable cold cache in the current node */ 11162306a36Sopenharmony_ci unsigned int cache_trim_mode:1; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci /* The file folios on the current node are dangerously low */ 11462306a36Sopenharmony_ci unsigned int file_is_tiny:1; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci /* Always discard instead of demoting to lower tier memory */ 11762306a36Sopenharmony_ci unsigned int no_demotion:1; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci /* Allocation order */ 12062306a36Sopenharmony_ci s8 order; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci /* Scan (total_size >> priority) pages at once */ 12362306a36Sopenharmony_ci s8 priority; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci /* The highest zone to isolate folios for reclaim from */ 12662306a36Sopenharmony_ci s8 reclaim_idx; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci /* This context's GFP mask */ 12962306a36Sopenharmony_ci gfp_t gfp_mask; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci /* Incremented by the number of inactive pages that were scanned */ 13262306a36Sopenharmony_ci unsigned long nr_scanned; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci /* Number of pages freed so far during a call to shrink_zones() */ 13562306a36Sopenharmony_ci unsigned long nr_reclaimed; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci struct { 13862306a36Sopenharmony_ci unsigned int dirty; 13962306a36Sopenharmony_ci unsigned int unqueued_dirty; 14062306a36Sopenharmony_ci unsigned int congested; 14162306a36Sopenharmony_ci unsigned int writeback; 14262306a36Sopenharmony_ci unsigned int immediate; 14362306a36Sopenharmony_ci unsigned int file_taken; 14462306a36Sopenharmony_ci unsigned int taken; 14562306a36Sopenharmony_ci } nr; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci enum reclaim_invoker invoker; 14862306a36Sopenharmony_ci u32 isolate_count; 14962306a36Sopenharmony_ci unsigned long nr_scanned_anon; 15062306a36Sopenharmony_ci unsigned long nr_scanned_file; 15162306a36Sopenharmony_ci unsigned long nr_reclaimed_anon; 15262306a36Sopenharmony_ci unsigned long nr_reclaimed_file; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci /* for recording the reclaimed slab by now */ 15562306a36Sopenharmony_ci struct reclaim_state reclaim_state; 15662306a36Sopenharmony_ci}; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_cienum scan_balance { 15962306a36Sopenharmony_ci SCAN_EQUAL, 16062306a36Sopenharmony_ci SCAN_FRACT, 16162306a36Sopenharmony_ci SCAN_ANON, 16262306a36Sopenharmony_ci SCAN_FILE, 16362306a36Sopenharmony_ci}; 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci/* 16662306a36Sopenharmony_ci * Different from WARN_ON_ONCE(), no warning will be issued 16762306a36Sopenharmony_ci * when we specify __GFP_NOWARN. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ci#define WARN_ON_ONCE_GFP(cond, gfp) ({ \ 17062306a36Sopenharmony_ci static bool __section(".data.once") __warned; \ 17162306a36Sopenharmony_ci int __ret_warn_once = !!(cond); \ 17262306a36Sopenharmony_ci \ 17362306a36Sopenharmony_ci if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \ 17462306a36Sopenharmony_ci __warned = true; \ 17562306a36Sopenharmony_ci WARN_ON(1); \ 17662306a36Sopenharmony_ci } \ 17762306a36Sopenharmony_ci unlikely(__ret_warn_once); \ 17862306a36Sopenharmony_ci}) 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_civoid page_writeback_init(void); 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci/* 18362306a36Sopenharmony_ci * If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages, 18462306a36Sopenharmony_ci * its nr_pages_mapped would be 0x400000: choose the COMPOUND_MAPPED bit 18562306a36Sopenharmony_ci * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently 18662306a36Sopenharmony_ci * leaves nr_pages_mapped at 0, but avoid surprise if it participates later. 18762306a36Sopenharmony_ci */ 18862306a36Sopenharmony_ci#define COMPOUND_MAPPED 0x800000 18962306a36Sopenharmony_ci#define FOLIO_PAGES_MAPPED (COMPOUND_MAPPED - 1) 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci/* 19262306a36Sopenharmony_ci * Flags passed to __show_mem() and show_free_areas() to suppress output in 19362306a36Sopenharmony_ci * various contexts. 19462306a36Sopenharmony_ci */ 19562306a36Sopenharmony_ci#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci/* 19862306a36Sopenharmony_ci * How many individual pages have an elevated _mapcount. Excludes 19962306a36Sopenharmony_ci * the folio's entire_mapcount. 20062306a36Sopenharmony_ci */ 20162306a36Sopenharmony_cistatic inline int folio_nr_pages_mapped(struct folio *folio) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED; 20462306a36Sopenharmony_ci} 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_cistatic inline void *folio_raw_mapping(struct folio *folio) 20762306a36Sopenharmony_ci{ 20862306a36Sopenharmony_ci unsigned long mapping = (unsigned long)folio->mapping; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci return (void *)(mapping & ~PAGE_MAPPING_FLAGS); 21162306a36Sopenharmony_ci} 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_civoid __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, 21462306a36Sopenharmony_ci int nr_throttled); 21562306a36Sopenharmony_cistatic inline void acct_reclaim_writeback(struct folio *folio) 21662306a36Sopenharmony_ci{ 21762306a36Sopenharmony_ci pg_data_t *pgdat = folio_pgdat(folio); 21862306a36Sopenharmony_ci int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled); 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci if (nr_throttled) 22162306a36Sopenharmony_ci __acct_reclaim_writeback(pgdat, folio, nr_throttled); 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic inline void wake_throttle_isolated(pg_data_t *pgdat) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci wait_queue_head_t *wqh; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED]; 22962306a36Sopenharmony_ci if (waitqueue_active(wqh)) 23062306a36Sopenharmony_ci wake_up(wqh); 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_civm_fault_t do_swap_page(struct vm_fault *vmf); 23462306a36Sopenharmony_civoid folio_rotate_reclaimable(struct folio *folio); 23562306a36Sopenharmony_cibool __folio_end_writeback(struct folio *folio); 23662306a36Sopenharmony_civoid deactivate_file_folio(struct folio *folio); 23762306a36Sopenharmony_civoid folio_activate(struct folio *folio); 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_civoid free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, 24062306a36Sopenharmony_ci struct vm_area_struct *start_vma, unsigned long floor, 24162306a36Sopenharmony_ci unsigned long ceiling, bool mm_wr_locked); 24262306a36Sopenharmony_civoid pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_cistruct zap_details; 24562306a36Sopenharmony_civoid unmap_page_range(struct mmu_gather *tlb, 24662306a36Sopenharmony_ci struct vm_area_struct *vma, 24762306a36Sopenharmony_ci unsigned long addr, unsigned long end, 24862306a36Sopenharmony_ci struct zap_details *details); 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_civoid page_cache_ra_order(struct readahead_control *, struct file_ra_state *, 25162306a36Sopenharmony_ci unsigned int order); 25262306a36Sopenharmony_civoid force_page_cache_ra(struct readahead_control *, unsigned long nr); 25362306a36Sopenharmony_cistatic inline void force_page_cache_readahead(struct address_space *mapping, 25462306a36Sopenharmony_ci struct file *file, pgoff_t index, unsigned long nr_to_read) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index); 25762306a36Sopenharmony_ci force_page_cache_ra(&ractl, nr_to_read); 25862306a36Sopenharmony_ci} 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ciunsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, 26162306a36Sopenharmony_ci pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); 26262306a36Sopenharmony_ciunsigned find_get_entries(struct address_space *mapping, pgoff_t *start, 26362306a36Sopenharmony_ci pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); 26462306a36Sopenharmony_civoid filemap_free_folio(struct address_space *mapping, struct folio *folio); 26562306a36Sopenharmony_ciint truncate_inode_folio(struct address_space *mapping, struct folio *folio); 26662306a36Sopenharmony_cibool truncate_inode_partial_folio(struct folio *folio, loff_t start, 26762306a36Sopenharmony_ci loff_t end); 26862306a36Sopenharmony_cilong invalidate_inode_page(struct page *page); 26962306a36Sopenharmony_ciunsigned long mapping_try_invalidate(struct address_space *mapping, 27062306a36Sopenharmony_ci pgoff_t start, pgoff_t end, unsigned long *nr_failed); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci/** 27362306a36Sopenharmony_ci * folio_evictable - Test whether a folio is evictable. 27462306a36Sopenharmony_ci * @folio: The folio to test. 27562306a36Sopenharmony_ci * 27662306a36Sopenharmony_ci * Test whether @folio is evictable -- i.e., should be placed on 27762306a36Sopenharmony_ci * active/inactive lists vs unevictable list. 27862306a36Sopenharmony_ci * 27962306a36Sopenharmony_ci * Reasons folio might not be evictable: 28062306a36Sopenharmony_ci * 1. folio's mapping marked unevictable 28162306a36Sopenharmony_ci * 2. One of the pages in the folio is part of an mlocked VMA 28262306a36Sopenharmony_ci */ 28362306a36Sopenharmony_cistatic inline bool folio_evictable(struct folio *folio) 28462306a36Sopenharmony_ci{ 28562306a36Sopenharmony_ci bool ret; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci /* Prevent address_space of inode and swap cache from being freed */ 28862306a36Sopenharmony_ci rcu_read_lock(); 28962306a36Sopenharmony_ci ret = !mapping_unevictable(folio_mapping(folio)) && 29062306a36Sopenharmony_ci !folio_test_mlocked(folio); 29162306a36Sopenharmony_ci rcu_read_unlock(); 29262306a36Sopenharmony_ci return ret; 29362306a36Sopenharmony_ci} 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci/* 29662306a36Sopenharmony_ci * Turn a non-refcounted page (->_refcount == 0) into refcounted with 29762306a36Sopenharmony_ci * a count of one. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_cistatic inline void set_page_refcounted(struct page *page) 30062306a36Sopenharmony_ci{ 30162306a36Sopenharmony_ci VM_BUG_ON_PAGE(PageTail(page), page); 30262306a36Sopenharmony_ci VM_BUG_ON_PAGE(page_ref_count(page), page); 30362306a36Sopenharmony_ci set_page_count(page, 1); 30462306a36Sopenharmony_ci} 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci/* 30762306a36Sopenharmony_ci * Return true if a folio needs ->release_folio() calling upon it. 30862306a36Sopenharmony_ci */ 30962306a36Sopenharmony_cistatic inline bool folio_needs_release(struct folio *folio) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci struct address_space *mapping = folio_mapping(folio); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci return folio_has_private(folio) || 31462306a36Sopenharmony_ci (mapping && mapping_release_always(mapping)); 31562306a36Sopenharmony_ci} 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ciextern unsigned long highest_memmap_pfn; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci/* 32062306a36Sopenharmony_ci * Maximum number of reclaim retries without progress before the OOM 32162306a36Sopenharmony_ci * killer is consider the only way forward. 32262306a36Sopenharmony_ci */ 32362306a36Sopenharmony_ci#define MAX_RECLAIM_RETRIES 16 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci/* 32662306a36Sopenharmony_ci * in mm/vmscan.c: 32762306a36Sopenharmony_ci */ 32862306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_MONITOR 32962306a36Sopenharmony_ciextern void kswapd_monitor_wake_up_queue(void); 33062306a36Sopenharmony_ci#endif 33162306a36Sopenharmony_cibool isolate_lru_page(struct page *page); 33262306a36Sopenharmony_cibool folio_isolate_lru(struct folio *folio); 33362306a36Sopenharmony_civoid putback_lru_page(struct page *page); 33462306a36Sopenharmony_civoid folio_putback_lru(struct folio *folio); 33562306a36Sopenharmony_ciextern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason); 33662306a36Sopenharmony_ciextern unsigned int shrink_folio_list(struct list_head *page_list, struct pglist_data *pgdat, 33762306a36Sopenharmony_ci struct scan_control *sc, struct reclaim_stat *stat, bool ignore_references); 33862306a36Sopenharmony_ciextern unsigned long isolate_lru_folios(unsigned long nr_to_scan, struct lruvec *lruvec, 33962306a36Sopenharmony_ci struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc, 34062306a36Sopenharmony_ci enum lru_list lru); 34162306a36Sopenharmony_ciextern unsigned move_folios_to_lru(struct lruvec *lruvec, struct list_head *list); 34262306a36Sopenharmony_ciextern void shrink_active_list(unsigned long nr_to_scan, struct lruvec *lruvec, 34362306a36Sopenharmony_ci struct scan_control *sc, enum lru_list lru); 34462306a36Sopenharmony_ciextern unsigned long shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, 34562306a36Sopenharmony_ci struct scan_control *sc, enum lru_list lru); 34662306a36Sopenharmony_ciextern void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci/* 34962306a36Sopenharmony_ci * in mm/rmap.c: 35062306a36Sopenharmony_ci */ 35162306a36Sopenharmony_cipmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci/* 35462306a36Sopenharmony_ci * in mm/page_alloc.c 35562306a36Sopenharmony_ci */ 35662306a36Sopenharmony_ci#define K(x) ((x) << (PAGE_SHIFT-10)) 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ciextern char * const zone_names[MAX_NR_ZONES]; 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci/* perform sanity checks on struct pages being allocated or freed */ 36162306a36Sopenharmony_ciDECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ciextern int min_free_kbytes; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_civoid setup_per_zone_wmarks(void); 36662306a36Sopenharmony_civoid calculate_min_free_kbytes(void); 36762306a36Sopenharmony_ciint __meminit init_per_zone_wmark_min(void); 36862306a36Sopenharmony_civoid page_alloc_sysctl_init(void); 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci/* 37162306a36Sopenharmony_ci * Structure for holding the mostly immutable allocation parameters passed 37262306a36Sopenharmony_ci * between functions involved in allocations, including the alloc_pages* 37362306a36Sopenharmony_ci * family of functions. 37462306a36Sopenharmony_ci * 37562306a36Sopenharmony_ci * nodemask, migratetype and highest_zoneidx are initialized only once in 37662306a36Sopenharmony_ci * __alloc_pages() and then never change. 37762306a36Sopenharmony_ci * 37862306a36Sopenharmony_ci * zonelist, preferred_zone and highest_zoneidx are set first in 37962306a36Sopenharmony_ci * __alloc_pages() for the fast path, and might be later changed 38062306a36Sopenharmony_ci * in __alloc_pages_slowpath(). All other functions pass the whole structure 38162306a36Sopenharmony_ci * by a const pointer. 38262306a36Sopenharmony_ci */ 38362306a36Sopenharmony_cistruct alloc_context { 38462306a36Sopenharmony_ci struct zonelist *zonelist; 38562306a36Sopenharmony_ci nodemask_t *nodemask; 38662306a36Sopenharmony_ci struct zoneref *preferred_zoneref; 38762306a36Sopenharmony_ci int migratetype; 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci /* 39062306a36Sopenharmony_ci * highest_zoneidx represents highest usable zone index of 39162306a36Sopenharmony_ci * the allocation request. Due to the nature of the zone, 39262306a36Sopenharmony_ci * memory on lower zone than the highest_zoneidx will be 39362306a36Sopenharmony_ci * protected by lowmem_reserve[highest_zoneidx]. 39462306a36Sopenharmony_ci * 39562306a36Sopenharmony_ci * highest_zoneidx is also used by reclaim/compaction to limit 39662306a36Sopenharmony_ci * the target zone since higher zone than this index cannot be 39762306a36Sopenharmony_ci * usable for this allocation request. 39862306a36Sopenharmony_ci */ 39962306a36Sopenharmony_ci enum zone_type highest_zoneidx; 40062306a36Sopenharmony_ci bool spread_dirty_pages; 40162306a36Sopenharmony_ci}; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci/* 40462306a36Sopenharmony_ci * This function returns the order of a free page in the buddy system. In 40562306a36Sopenharmony_ci * general, page_zone(page)->lock must be held by the caller to prevent the 40662306a36Sopenharmony_ci * page from being allocated in parallel and returning garbage as the order. 40762306a36Sopenharmony_ci * If a caller does not hold page_zone(page)->lock, it must guarantee that the 40862306a36Sopenharmony_ci * page cannot be allocated or merged in parallel. Alternatively, it must 40962306a36Sopenharmony_ci * handle invalid values gracefully, and use buddy_order_unsafe() below. 41062306a36Sopenharmony_ci */ 41162306a36Sopenharmony_cistatic inline unsigned int buddy_order(struct page *page) 41262306a36Sopenharmony_ci{ 41362306a36Sopenharmony_ci /* PageBuddy() must be checked by the caller */ 41462306a36Sopenharmony_ci return page_private(page); 41562306a36Sopenharmony_ci} 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci/* 41862306a36Sopenharmony_ci * Like buddy_order(), but for callers who cannot afford to hold the zone lock. 41962306a36Sopenharmony_ci * PageBuddy() should be checked first by the caller to minimize race window, 42062306a36Sopenharmony_ci * and invalid values must be handled gracefully. 42162306a36Sopenharmony_ci * 42262306a36Sopenharmony_ci * READ_ONCE is used so that if the caller assigns the result into a local 42362306a36Sopenharmony_ci * variable and e.g. tests it for valid range before using, the compiler cannot 42462306a36Sopenharmony_ci * decide to remove the variable and inline the page_private(page) multiple 42562306a36Sopenharmony_ci * times, potentially observing different values in the tests and the actual 42662306a36Sopenharmony_ci * use of the result. 42762306a36Sopenharmony_ci */ 42862306a36Sopenharmony_ci#define buddy_order_unsafe(page) READ_ONCE(page_private(page)) 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci/* 43162306a36Sopenharmony_ci * This function checks whether a page is free && is the buddy 43262306a36Sopenharmony_ci * we can coalesce a page and its buddy if 43362306a36Sopenharmony_ci * (a) the buddy is not in a hole (check before calling!) && 43462306a36Sopenharmony_ci * (b) the buddy is in the buddy system && 43562306a36Sopenharmony_ci * (c) a page and its buddy have the same order && 43662306a36Sopenharmony_ci * (d) a page and its buddy are in the same zone. 43762306a36Sopenharmony_ci * 43862306a36Sopenharmony_ci * For recording whether a page is in the buddy system, we set PageBuddy. 43962306a36Sopenharmony_ci * Setting, clearing, and testing PageBuddy is serialized by zone->lock. 44062306a36Sopenharmony_ci * 44162306a36Sopenharmony_ci * For recording page's order, we use page_private(page). 44262306a36Sopenharmony_ci */ 44362306a36Sopenharmony_cistatic inline bool page_is_buddy(struct page *page, struct page *buddy, 44462306a36Sopenharmony_ci unsigned int order) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci if (!page_is_guard(buddy) && !PageBuddy(buddy)) 44762306a36Sopenharmony_ci return false; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci if (buddy_order(buddy) != order) 45062306a36Sopenharmony_ci return false; 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci /* 45362306a36Sopenharmony_ci * zone check is done late to avoid uselessly calculating 45462306a36Sopenharmony_ci * zone/node ids for pages that could never merge. 45562306a36Sopenharmony_ci */ 45662306a36Sopenharmony_ci if (page_zone_id(page) != page_zone_id(buddy)) 45762306a36Sopenharmony_ci return false; 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci return true; 46262306a36Sopenharmony_ci} 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci/* 46562306a36Sopenharmony_ci * Locate the struct page for both the matching buddy in our 46662306a36Sopenharmony_ci * pair (buddy1) and the combined O(n+1) page they form (page). 46762306a36Sopenharmony_ci * 46862306a36Sopenharmony_ci * 1) Any buddy B1 will have an order O twin B2 which satisfies 46962306a36Sopenharmony_ci * the following equation: 47062306a36Sopenharmony_ci * B2 = B1 ^ (1 << O) 47162306a36Sopenharmony_ci * For example, if the starting buddy (buddy2) is #8 its order 47262306a36Sopenharmony_ci * 1 buddy is #10: 47362306a36Sopenharmony_ci * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 47462306a36Sopenharmony_ci * 47562306a36Sopenharmony_ci * 2) Any buddy B will have an order O+1 parent P which 47662306a36Sopenharmony_ci * satisfies the following equation: 47762306a36Sopenharmony_ci * P = B & ~(1 << O) 47862306a36Sopenharmony_ci * 47962306a36Sopenharmony_ci * Assumption: *_mem_map is contiguous at least up to MAX_ORDER 48062306a36Sopenharmony_ci */ 48162306a36Sopenharmony_cistatic inline unsigned long 48262306a36Sopenharmony_ci__find_buddy_pfn(unsigned long page_pfn, unsigned int order) 48362306a36Sopenharmony_ci{ 48462306a36Sopenharmony_ci return page_pfn ^ (1 << order); 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci/* 48862306a36Sopenharmony_ci * Find the buddy of @page and validate it. 48962306a36Sopenharmony_ci * @page: The input page 49062306a36Sopenharmony_ci * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the 49162306a36Sopenharmony_ci * function is used in the performance-critical __free_one_page(). 49262306a36Sopenharmony_ci * @order: The order of the page 49362306a36Sopenharmony_ci * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to 49462306a36Sopenharmony_ci * page_to_pfn(). 49562306a36Sopenharmony_ci * 49662306a36Sopenharmony_ci * The found buddy can be a non PageBuddy, out of @page's zone, or its order is 49762306a36Sopenharmony_ci * not the same as @page. The validation is necessary before use it. 49862306a36Sopenharmony_ci * 49962306a36Sopenharmony_ci * Return: the found buddy page or NULL if not found. 50062306a36Sopenharmony_ci */ 50162306a36Sopenharmony_cistatic inline struct page *find_buddy_page_pfn(struct page *page, 50262306a36Sopenharmony_ci unsigned long pfn, unsigned int order, unsigned long *buddy_pfn) 50362306a36Sopenharmony_ci{ 50462306a36Sopenharmony_ci unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order); 50562306a36Sopenharmony_ci struct page *buddy; 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci buddy = page + (__buddy_pfn - pfn); 50862306a36Sopenharmony_ci if (buddy_pfn) 50962306a36Sopenharmony_ci *buddy_pfn = __buddy_pfn; 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci if (page_is_buddy(page, buddy, order)) 51262306a36Sopenharmony_ci return buddy; 51362306a36Sopenharmony_ci return NULL; 51462306a36Sopenharmony_ci} 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ciextern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, 51762306a36Sopenharmony_ci unsigned long end_pfn, struct zone *zone); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_cistatic inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, 52062306a36Sopenharmony_ci unsigned long end_pfn, struct zone *zone) 52162306a36Sopenharmony_ci{ 52262306a36Sopenharmony_ci if (zone->contiguous) 52362306a36Sopenharmony_ci return pfn_to_page(start_pfn); 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci return __pageblock_pfn_to_page(start_pfn, end_pfn, zone); 52662306a36Sopenharmony_ci} 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_civoid set_zone_contiguous(struct zone *zone); 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_cistatic inline void clear_zone_contiguous(struct zone *zone) 53162306a36Sopenharmony_ci{ 53262306a36Sopenharmony_ci zone->contiguous = false; 53362306a36Sopenharmony_ci} 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ciextern int __isolate_free_page(struct page *page, unsigned int order); 53662306a36Sopenharmony_ciextern void __putback_isolated_page(struct page *page, unsigned int order, 53762306a36Sopenharmony_ci int mt); 53862306a36Sopenharmony_ciextern void memblock_free_pages(struct page *page, unsigned long pfn, 53962306a36Sopenharmony_ci unsigned int order); 54062306a36Sopenharmony_ciextern void __free_pages_core(struct page *page, unsigned int order); 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci/* 54362306a36Sopenharmony_ci * This will have no effect, other than possibly generating a warning, if the 54462306a36Sopenharmony_ci * caller passes in a non-large folio. 54562306a36Sopenharmony_ci */ 54662306a36Sopenharmony_cistatic inline void folio_set_order(struct folio *folio, unsigned int order) 54762306a36Sopenharmony_ci{ 54862306a36Sopenharmony_ci if (WARN_ON_ONCE(!order || !folio_test_large(folio))) 54962306a36Sopenharmony_ci return; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci folio->_flags_1 = (folio->_flags_1 & ~0xffUL) | order; 55262306a36Sopenharmony_ci#ifdef CONFIG_64BIT 55362306a36Sopenharmony_ci folio->_folio_nr_pages = 1U << order; 55462306a36Sopenharmony_ci#endif 55562306a36Sopenharmony_ci} 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_civoid folio_undo_large_rmappable(struct folio *folio); 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_cistatic inline void prep_compound_head(struct page *page, unsigned int order) 56062306a36Sopenharmony_ci{ 56162306a36Sopenharmony_ci struct folio *folio = (struct folio *)page; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci folio_set_order(folio, order); 56462306a36Sopenharmony_ci atomic_set(&folio->_entire_mapcount, -1); 56562306a36Sopenharmony_ci atomic_set(&folio->_nr_pages_mapped, 0); 56662306a36Sopenharmony_ci atomic_set(&folio->_pincount, 0); 56762306a36Sopenharmony_ci} 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_cistatic inline void prep_compound_tail(struct page *head, int tail_idx) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci struct page *p = head + tail_idx; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci p->mapping = TAIL_MAPPING; 57462306a36Sopenharmony_ci set_compound_head(p, head); 57562306a36Sopenharmony_ci set_page_private(p, 0); 57662306a36Sopenharmony_ci} 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ciextern void prep_compound_page(struct page *page, unsigned int order); 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ciextern void post_alloc_hook(struct page *page, unsigned int order, 58162306a36Sopenharmony_ci gfp_t gfp_flags); 58262306a36Sopenharmony_ciextern int user_min_free_kbytes; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ciextern void free_unref_page(struct page *page, unsigned int order); 58562306a36Sopenharmony_ciextern void free_unref_page_list(struct list_head *list); 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ciextern void zone_pcp_reset(struct zone *zone); 58862306a36Sopenharmony_ciextern void zone_pcp_disable(struct zone *zone); 58962306a36Sopenharmony_ciextern void zone_pcp_enable(struct zone *zone); 59062306a36Sopenharmony_ciextern void zone_pcp_init(struct zone *zone); 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ciextern void *memmap_alloc(phys_addr_t size, phys_addr_t align, 59362306a36Sopenharmony_ci phys_addr_t min_addr, 59462306a36Sopenharmony_ci int nid, bool exact_nid); 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_civoid memmap_init_range(unsigned long, int, unsigned long, unsigned long, 59762306a36Sopenharmony_ci unsigned long, enum meminit_context, struct vmem_altmap *, int); 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ciint split_free_page(struct page *free_page, 60162306a36Sopenharmony_ci unsigned int order, unsigned long split_pfn_offset); 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci#if defined CONFIG_COMPACTION || defined CONFIG_CMA 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci/* 60662306a36Sopenharmony_ci * in mm/compaction.c 60762306a36Sopenharmony_ci */ 60862306a36Sopenharmony_ci/* 60962306a36Sopenharmony_ci * compact_control is used to track pages being migrated and the free pages 61062306a36Sopenharmony_ci * they are being migrated to during memory compaction. The free_pfn starts 61162306a36Sopenharmony_ci * at the end of a zone and migrate_pfn begins at the start. Movable pages 61262306a36Sopenharmony_ci * are moved to the end of a zone during a compaction run and the run 61362306a36Sopenharmony_ci * completes when free_pfn <= migrate_pfn 61462306a36Sopenharmony_ci */ 61562306a36Sopenharmony_cistruct compact_control { 61662306a36Sopenharmony_ci struct list_head freepages; /* List of free pages to migrate to */ 61762306a36Sopenharmony_ci struct list_head migratepages; /* List of pages being migrated */ 61862306a36Sopenharmony_ci unsigned int nr_freepages; /* Number of isolated free pages */ 61962306a36Sopenharmony_ci unsigned int nr_migratepages; /* Number of pages to migrate */ 62062306a36Sopenharmony_ci unsigned long free_pfn; /* isolate_freepages search base */ 62162306a36Sopenharmony_ci /* 62262306a36Sopenharmony_ci * Acts as an in/out parameter to page isolation for migration. 62362306a36Sopenharmony_ci * isolate_migratepages uses it as a search base. 62462306a36Sopenharmony_ci * isolate_migratepages_block will update the value to the next pfn 62562306a36Sopenharmony_ci * after the last isolated one. 62662306a36Sopenharmony_ci */ 62762306a36Sopenharmony_ci unsigned long migrate_pfn; 62862306a36Sopenharmony_ci unsigned long fast_start_pfn; /* a pfn to start linear scan from */ 62962306a36Sopenharmony_ci struct zone *zone; 63062306a36Sopenharmony_ci unsigned long total_migrate_scanned; 63162306a36Sopenharmony_ci unsigned long total_free_scanned; 63262306a36Sopenharmony_ci unsigned short fast_search_fail;/* failures to use free list searches */ 63362306a36Sopenharmony_ci short search_order; /* order to start a fast search at */ 63462306a36Sopenharmony_ci const gfp_t gfp_mask; /* gfp mask of a direct compactor */ 63562306a36Sopenharmony_ci int order; /* order a direct compactor needs */ 63662306a36Sopenharmony_ci int migratetype; /* migratetype of direct compactor */ 63762306a36Sopenharmony_ci const unsigned int alloc_flags; /* alloc flags of a direct compactor */ 63862306a36Sopenharmony_ci const int highest_zoneidx; /* zone index of a direct compactor */ 63962306a36Sopenharmony_ci enum migrate_mode mode; /* Async or sync migration mode */ 64062306a36Sopenharmony_ci bool ignore_skip_hint; /* Scan blocks even if marked skip */ 64162306a36Sopenharmony_ci bool no_set_skip_hint; /* Don't mark blocks for skipping */ 64262306a36Sopenharmony_ci bool ignore_block_suitable; /* Scan blocks considered unsuitable */ 64362306a36Sopenharmony_ci bool direct_compaction; /* False from kcompactd or /proc/... */ 64462306a36Sopenharmony_ci bool proactive_compaction; /* kcompactd proactive compaction */ 64562306a36Sopenharmony_ci bool whole_zone; /* Whole zone should/has been scanned */ 64662306a36Sopenharmony_ci bool contended; /* Signal lock contention */ 64762306a36Sopenharmony_ci bool finish_pageblock; /* Scan the remainder of a pageblock. Used 64862306a36Sopenharmony_ci * when there are potentially transient 64962306a36Sopenharmony_ci * isolation or migration failures to 65062306a36Sopenharmony_ci * ensure forward progress. 65162306a36Sopenharmony_ci */ 65262306a36Sopenharmony_ci bool alloc_contig; /* alloc_contig_range allocation */ 65362306a36Sopenharmony_ci}; 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci/* 65662306a36Sopenharmony_ci * Used in direct compaction when a page should be taken from the freelists 65762306a36Sopenharmony_ci * immediately when one is created during the free path. 65862306a36Sopenharmony_ci */ 65962306a36Sopenharmony_cistruct capture_control { 66062306a36Sopenharmony_ci struct compact_control *cc; 66162306a36Sopenharmony_ci struct page *page; 66262306a36Sopenharmony_ci}; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ciunsigned long 66562306a36Sopenharmony_ciisolate_freepages_range(struct compact_control *cc, 66662306a36Sopenharmony_ci unsigned long start_pfn, unsigned long end_pfn); 66762306a36Sopenharmony_ciint 66862306a36Sopenharmony_ciisolate_migratepages_range(struct compact_control *cc, 66962306a36Sopenharmony_ci unsigned long low_pfn, unsigned long end_pfn); 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ciint __alloc_contig_migrate_range(struct compact_control *cc, 67262306a36Sopenharmony_ci unsigned long start, unsigned long end); 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci/* Free whole pageblock and set its migration type to MIGRATE_CMA. */ 67562306a36Sopenharmony_civoid init_cma_reserved_pageblock(struct page *page); 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ciint find_suitable_fallback(struct free_area *area, unsigned int order, 68062306a36Sopenharmony_ci int migratetype, bool only_stealable, bool *can_steal); 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_cistatic inline bool free_area_empty(struct free_area *area, int migratetype) 68362306a36Sopenharmony_ci{ 68462306a36Sopenharmony_ci return list_empty(&area->free_list[migratetype]); 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci/* 68862306a36Sopenharmony_ci * These three helpers classifies VMAs for virtual memory accounting. 68962306a36Sopenharmony_ci */ 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci/* 69262306a36Sopenharmony_ci * Executable code area - executable, not writable, not stack 69362306a36Sopenharmony_ci */ 69462306a36Sopenharmony_cistatic inline bool is_exec_mapping(vm_flags_t flags) 69562306a36Sopenharmony_ci{ 69662306a36Sopenharmony_ci return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC; 69762306a36Sopenharmony_ci} 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci/* 70062306a36Sopenharmony_ci * Stack area (including shadow stacks) 70162306a36Sopenharmony_ci * 70262306a36Sopenharmony_ci * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous: 70362306a36Sopenharmony_ci * do_mmap() forbids all other combinations. 70462306a36Sopenharmony_ci */ 70562306a36Sopenharmony_cistatic inline bool is_stack_mapping(vm_flags_t flags) 70662306a36Sopenharmony_ci{ 70762306a36Sopenharmony_ci return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK); 70862306a36Sopenharmony_ci} 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci/* 71162306a36Sopenharmony_ci * Data area - private, writable, not stack 71262306a36Sopenharmony_ci */ 71362306a36Sopenharmony_cistatic inline bool is_data_mapping(vm_flags_t flags) 71462306a36Sopenharmony_ci{ 71562306a36Sopenharmony_ci return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE; 71662306a36Sopenharmony_ci} 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci/* mm/util.c */ 71962306a36Sopenharmony_cistruct anon_vma *folio_anon_vma(struct folio *folio); 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci#ifdef CONFIG_MMU 72262306a36Sopenharmony_civoid unmap_mapping_folio(struct folio *folio); 72362306a36Sopenharmony_ciextern long populate_vma_page_range(struct vm_area_struct *vma, 72462306a36Sopenharmony_ci unsigned long start, unsigned long end, int *locked); 72562306a36Sopenharmony_ciextern long faultin_vma_page_range(struct vm_area_struct *vma, 72662306a36Sopenharmony_ci unsigned long start, unsigned long end, 72762306a36Sopenharmony_ci bool write, int *locked); 72862306a36Sopenharmony_ciextern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags, 72962306a36Sopenharmony_ci unsigned long bytes); 73062306a36Sopenharmony_ci/* 73162306a36Sopenharmony_ci * mlock_vma_folio() and munlock_vma_folio(): 73262306a36Sopenharmony_ci * should be called with vma's mmap_lock held for read or write, 73362306a36Sopenharmony_ci * under page table lock for the pte/pmd being added or removed. 73462306a36Sopenharmony_ci * 73562306a36Sopenharmony_ci * mlock is usually called at the end of page_add_*_rmap(), munlock at 73662306a36Sopenharmony_ci * the end of page_remove_rmap(); but new anon folios are managed by 73762306a36Sopenharmony_ci * folio_add_lru_vma() calling mlock_new_folio(). 73862306a36Sopenharmony_ci * 73962306a36Sopenharmony_ci * @compound is used to include pmd mappings of THPs, but filter out 74062306a36Sopenharmony_ci * pte mappings of THPs, which cannot be consistently counted: a pte 74162306a36Sopenharmony_ci * mapping of the THP head cannot be distinguished by the page alone. 74262306a36Sopenharmony_ci */ 74362306a36Sopenharmony_civoid mlock_folio(struct folio *folio); 74462306a36Sopenharmony_cistatic inline void mlock_vma_folio(struct folio *folio, 74562306a36Sopenharmony_ci struct vm_area_struct *vma, bool compound) 74662306a36Sopenharmony_ci{ 74762306a36Sopenharmony_ci /* 74862306a36Sopenharmony_ci * The VM_SPECIAL check here serves two purposes. 74962306a36Sopenharmony_ci * 1) VM_IO check prevents migration from double-counting during mlock. 75062306a36Sopenharmony_ci * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED 75162306a36Sopenharmony_ci * is never left set on a VM_SPECIAL vma, there is an interval while 75262306a36Sopenharmony_ci * file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may 75362306a36Sopenharmony_ci * still be set while VM_SPECIAL bits are added: so ignore it then. 75462306a36Sopenharmony_ci */ 75562306a36Sopenharmony_ci if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED) && 75662306a36Sopenharmony_ci (compound || !folio_test_large(folio))) 75762306a36Sopenharmony_ci mlock_folio(folio); 75862306a36Sopenharmony_ci} 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_civoid munlock_folio(struct folio *folio); 76162306a36Sopenharmony_cistatic inline void munlock_vma_folio(struct folio *folio, 76262306a36Sopenharmony_ci struct vm_area_struct *vma, bool compound) 76362306a36Sopenharmony_ci{ 76462306a36Sopenharmony_ci if (unlikely(vma->vm_flags & VM_LOCKED) && 76562306a36Sopenharmony_ci (compound || !folio_test_large(folio))) 76662306a36Sopenharmony_ci munlock_folio(folio); 76762306a36Sopenharmony_ci} 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_civoid mlock_new_folio(struct folio *folio); 77062306a36Sopenharmony_cibool need_mlock_drain(int cpu); 77162306a36Sopenharmony_civoid mlock_drain_local(void); 77262306a36Sopenharmony_civoid mlock_drain_remote(int cpu); 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ciextern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci/* 77762306a36Sopenharmony_ci * Return the start of user virtual address at the specific offset within 77862306a36Sopenharmony_ci * a vma. 77962306a36Sopenharmony_ci */ 78062306a36Sopenharmony_cistatic inline unsigned long 78162306a36Sopenharmony_civma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages, 78262306a36Sopenharmony_ci struct vm_area_struct *vma) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci unsigned long address; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci if (pgoff >= vma->vm_pgoff) { 78762306a36Sopenharmony_ci address = vma->vm_start + 78862306a36Sopenharmony_ci ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 78962306a36Sopenharmony_ci /* Check for address beyond vma (or wrapped through 0?) */ 79062306a36Sopenharmony_ci if (address < vma->vm_start || address >= vma->vm_end) 79162306a36Sopenharmony_ci address = -EFAULT; 79262306a36Sopenharmony_ci } else if (pgoff + nr_pages - 1 >= vma->vm_pgoff) { 79362306a36Sopenharmony_ci /* Test above avoids possibility of wrap to 0 on 32-bit */ 79462306a36Sopenharmony_ci address = vma->vm_start; 79562306a36Sopenharmony_ci } else { 79662306a36Sopenharmony_ci address = -EFAULT; 79762306a36Sopenharmony_ci } 79862306a36Sopenharmony_ci return address; 79962306a36Sopenharmony_ci} 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci/* 80262306a36Sopenharmony_ci * Return the start of user virtual address of a page within a vma. 80362306a36Sopenharmony_ci * Returns -EFAULT if all of the page is outside the range of vma. 80462306a36Sopenharmony_ci * If page is a compound head, the entire compound page is considered. 80562306a36Sopenharmony_ci */ 80662306a36Sopenharmony_cistatic inline unsigned long 80762306a36Sopenharmony_civma_address(struct page *page, struct vm_area_struct *vma) 80862306a36Sopenharmony_ci{ 80962306a36Sopenharmony_ci VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ 81062306a36Sopenharmony_ci return vma_pgoff_address(page_to_pgoff(page), compound_nr(page), vma); 81162306a36Sopenharmony_ci} 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci/* 81462306a36Sopenharmony_ci * Then at what user virtual address will none of the range be found in vma? 81562306a36Sopenharmony_ci * Assumes that vma_address() already returned a good starting address. 81662306a36Sopenharmony_ci */ 81762306a36Sopenharmony_cistatic inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw) 81862306a36Sopenharmony_ci{ 81962306a36Sopenharmony_ci struct vm_area_struct *vma = pvmw->vma; 82062306a36Sopenharmony_ci pgoff_t pgoff; 82162306a36Sopenharmony_ci unsigned long address; 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci /* Common case, plus ->pgoff is invalid for KSM */ 82462306a36Sopenharmony_ci if (pvmw->nr_pages == 1) 82562306a36Sopenharmony_ci return pvmw->address + PAGE_SIZE; 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci pgoff = pvmw->pgoff + pvmw->nr_pages; 82862306a36Sopenharmony_ci address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 82962306a36Sopenharmony_ci /* Check for address beyond vma (or wrapped through 0?) */ 83062306a36Sopenharmony_ci if (address < vma->vm_start || address > vma->vm_end) 83162306a36Sopenharmony_ci address = vma->vm_end; 83262306a36Sopenharmony_ci return address; 83362306a36Sopenharmony_ci} 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_cistatic inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, 83662306a36Sopenharmony_ci struct file *fpin) 83762306a36Sopenharmony_ci{ 83862306a36Sopenharmony_ci int flags = vmf->flags; 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci if (fpin) 84162306a36Sopenharmony_ci return fpin; 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci /* 84462306a36Sopenharmony_ci * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or 84562306a36Sopenharmony_ci * anything, so we only pin the file and drop the mmap_lock if only 84662306a36Sopenharmony_ci * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt. 84762306a36Sopenharmony_ci */ 84862306a36Sopenharmony_ci if (fault_flag_allow_retry_first(flags) && 84962306a36Sopenharmony_ci !(flags & FAULT_FLAG_RETRY_NOWAIT)) { 85062306a36Sopenharmony_ci fpin = get_file(vmf->vma->vm_file); 85162306a36Sopenharmony_ci release_fault_lock(vmf); 85262306a36Sopenharmony_ci } 85362306a36Sopenharmony_ci return fpin; 85462306a36Sopenharmony_ci} 85562306a36Sopenharmony_ci#else /* !CONFIG_MMU */ 85662306a36Sopenharmony_cistatic inline void unmap_mapping_folio(struct folio *folio) { } 85762306a36Sopenharmony_cistatic inline void mlock_new_folio(struct folio *folio) { } 85862306a36Sopenharmony_cistatic inline bool need_mlock_drain(int cpu) { return false; } 85962306a36Sopenharmony_cistatic inline void mlock_drain_local(void) { } 86062306a36Sopenharmony_cistatic inline void mlock_drain_remote(int cpu) { } 86162306a36Sopenharmony_cistatic inline void vunmap_range_noflush(unsigned long start, unsigned long end) 86262306a36Sopenharmony_ci{ 86362306a36Sopenharmony_ci} 86462306a36Sopenharmony_ci#endif /* !CONFIG_MMU */ 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci/* Memory initialisation debug and verification */ 86762306a36Sopenharmony_ci#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 86862306a36Sopenharmony_ciDECLARE_STATIC_KEY_TRUE(deferred_pages); 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_cibool __init deferred_grow_zone(struct zone *zone, unsigned int order); 87162306a36Sopenharmony_ci#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_cienum mminit_level { 87462306a36Sopenharmony_ci MMINIT_WARNING, 87562306a36Sopenharmony_ci MMINIT_VERIFY, 87662306a36Sopenharmony_ci MMINIT_TRACE 87762306a36Sopenharmony_ci}; 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_MEMORY_INIT 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ciextern int mminit_loglevel; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci#define mminit_dprintk(level, prefix, fmt, arg...) \ 88462306a36Sopenharmony_cido { \ 88562306a36Sopenharmony_ci if (level < mminit_loglevel) { \ 88662306a36Sopenharmony_ci if (level <= MMINIT_WARNING) \ 88762306a36Sopenharmony_ci pr_warn("mminit::" prefix " " fmt, ##arg); \ 88862306a36Sopenharmony_ci else \ 88962306a36Sopenharmony_ci printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \ 89062306a36Sopenharmony_ci } \ 89162306a36Sopenharmony_ci} while (0) 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ciextern void mminit_verify_pageflags_layout(void); 89462306a36Sopenharmony_ciextern void mminit_verify_zonelist(void); 89562306a36Sopenharmony_ci#else 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_cistatic inline void mminit_dprintk(enum mminit_level level, 89862306a36Sopenharmony_ci const char *prefix, const char *fmt, ...) 89962306a36Sopenharmony_ci{ 90062306a36Sopenharmony_ci} 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_cistatic inline void mminit_verify_pageflags_layout(void) 90362306a36Sopenharmony_ci{ 90462306a36Sopenharmony_ci} 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_cistatic inline void mminit_verify_zonelist(void) 90762306a36Sopenharmony_ci{ 90862306a36Sopenharmony_ci} 90962306a36Sopenharmony_ci#endif /* CONFIG_DEBUG_MEMORY_INIT */ 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci#define NODE_RECLAIM_NOSCAN -2 91262306a36Sopenharmony_ci#define NODE_RECLAIM_FULL -1 91362306a36Sopenharmony_ci#define NODE_RECLAIM_SOME 0 91462306a36Sopenharmony_ci#define NODE_RECLAIM_SUCCESS 1 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci#ifdef CONFIG_NUMA 91762306a36Sopenharmony_ciextern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); 91862306a36Sopenharmony_ciextern int find_next_best_node(int node, nodemask_t *used_node_mask); 91962306a36Sopenharmony_ci#else 92062306a36Sopenharmony_cistatic inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, 92162306a36Sopenharmony_ci unsigned int order) 92262306a36Sopenharmony_ci{ 92362306a36Sopenharmony_ci return NODE_RECLAIM_NOSCAN; 92462306a36Sopenharmony_ci} 92562306a36Sopenharmony_cistatic inline int find_next_best_node(int node, nodemask_t *used_node_mask) 92662306a36Sopenharmony_ci{ 92762306a36Sopenharmony_ci return NUMA_NO_NODE; 92862306a36Sopenharmony_ci} 92962306a36Sopenharmony_ci#endif 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ci/* 93262306a36Sopenharmony_ci * mm/memory-failure.c 93362306a36Sopenharmony_ci */ 93462306a36Sopenharmony_ciextern int hwpoison_filter(struct page *p); 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ciextern u32 hwpoison_filter_dev_major; 93762306a36Sopenharmony_ciextern u32 hwpoison_filter_dev_minor; 93862306a36Sopenharmony_ciextern u64 hwpoison_filter_flags_mask; 93962306a36Sopenharmony_ciextern u64 hwpoison_filter_flags_value; 94062306a36Sopenharmony_ciextern u64 hwpoison_filter_memcg; 94162306a36Sopenharmony_ciextern u32 hwpoison_filter_enable; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ciextern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, 94462306a36Sopenharmony_ci unsigned long, unsigned long, 94562306a36Sopenharmony_ci unsigned long, unsigned long); 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_ciextern void set_pageblock_order(void); 94862306a36Sopenharmony_ciunsigned long reclaim_pages(struct list_head *folio_list); 94962306a36Sopenharmony_ciunsigned int reclaim_clean_pages_from_list(struct zone *zone, 95062306a36Sopenharmony_ci struct list_head *folio_list); 95162306a36Sopenharmony_ci/* The ALLOC_WMARK bits are used as an index to zone->watermark */ 95262306a36Sopenharmony_ci#define ALLOC_WMARK_MIN WMARK_MIN 95362306a36Sopenharmony_ci#define ALLOC_WMARK_LOW WMARK_LOW 95462306a36Sopenharmony_ci#define ALLOC_WMARK_HIGH WMARK_HIGH 95562306a36Sopenharmony_ci#define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */ 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci/* Mask to get the watermark bits */ 95862306a36Sopenharmony_ci#define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1) 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci/* 96162306a36Sopenharmony_ci * Only MMU archs have async oom victim reclaim - aka oom_reaper so we 96262306a36Sopenharmony_ci * cannot assume a reduced access to memory reserves is sufficient for 96362306a36Sopenharmony_ci * !MMU 96462306a36Sopenharmony_ci */ 96562306a36Sopenharmony_ci#ifdef CONFIG_MMU 96662306a36Sopenharmony_ci#define ALLOC_OOM 0x08 96762306a36Sopenharmony_ci#else 96862306a36Sopenharmony_ci#define ALLOC_OOM ALLOC_NO_WATERMARKS 96962306a36Sopenharmony_ci#endif 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access 97262306a36Sopenharmony_ci * to 25% of the min watermark or 97362306a36Sopenharmony_ci * 62.5% if __GFP_HIGH is set. 97462306a36Sopenharmony_ci */ 97562306a36Sopenharmony_ci#define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% 97662306a36Sopenharmony_ci * of the min watermark. 97762306a36Sopenharmony_ci */ 97862306a36Sopenharmony_ci#define ALLOC_CPUSET 0x40 /* check for correct cpuset */ 97962306a36Sopenharmony_ci#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ 98062306a36Sopenharmony_ci#ifdef CONFIG_ZONE_DMA32 98162306a36Sopenharmony_ci#define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */ 98262306a36Sopenharmony_ci#else 98362306a36Sopenharmony_ci#define ALLOC_NOFRAGMENT 0x0 98462306a36Sopenharmony_ci#endif 98562306a36Sopenharmony_ci#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ 98662306a36Sopenharmony_ci#define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ 98762306a36Sopenharmony_ci 98862306a36Sopenharmony_ci/* Flags that allow allocations below the min watermark. */ 98962306a36Sopenharmony_ci#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_cienum ttu_flags; 99262306a36Sopenharmony_cistruct tlbflush_unmap_batch; 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci/* 99662306a36Sopenharmony_ci * only for MM internal work items which do not depend on 99762306a36Sopenharmony_ci * any allocations or locks which might depend on allocations 99862306a36Sopenharmony_ci */ 99962306a36Sopenharmony_ciextern struct workqueue_struct *mm_percpu_wq; 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_ci#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH 100262306a36Sopenharmony_civoid try_to_unmap_flush(void); 100362306a36Sopenharmony_civoid try_to_unmap_flush_dirty(void); 100462306a36Sopenharmony_civoid flush_tlb_batched_pending(struct mm_struct *mm); 100562306a36Sopenharmony_ci#else 100662306a36Sopenharmony_cistatic inline void try_to_unmap_flush(void) 100762306a36Sopenharmony_ci{ 100862306a36Sopenharmony_ci} 100962306a36Sopenharmony_cistatic inline void try_to_unmap_flush_dirty(void) 101062306a36Sopenharmony_ci{ 101162306a36Sopenharmony_ci} 101262306a36Sopenharmony_cistatic inline void flush_tlb_batched_pending(struct mm_struct *mm) 101362306a36Sopenharmony_ci{ 101462306a36Sopenharmony_ci} 101562306a36Sopenharmony_ci#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ciextern const struct trace_print_flags pageflag_names[]; 101862306a36Sopenharmony_ciextern const struct trace_print_flags pagetype_names[]; 101962306a36Sopenharmony_ciextern const struct trace_print_flags vmaflag_names[]; 102062306a36Sopenharmony_ciextern const struct trace_print_flags gfpflag_names[]; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_cistatic inline bool is_migrate_highatomic(enum migratetype migratetype) 102362306a36Sopenharmony_ci{ 102462306a36Sopenharmony_ci return migratetype == MIGRATE_HIGHATOMIC; 102562306a36Sopenharmony_ci} 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_cistatic inline bool is_migrate_highatomic_page(struct page *page) 102862306a36Sopenharmony_ci{ 102962306a36Sopenharmony_ci return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC; 103062306a36Sopenharmony_ci} 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_civoid setup_zone_pageset(struct zone *zone); 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_cistruct migration_target_control { 103562306a36Sopenharmony_ci int nid; /* preferred node id */ 103662306a36Sopenharmony_ci nodemask_t *nmask; 103762306a36Sopenharmony_ci gfp_t gfp_mask; 103862306a36Sopenharmony_ci}; 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci/* 104162306a36Sopenharmony_ci * mm/filemap.c 104262306a36Sopenharmony_ci */ 104362306a36Sopenharmony_cisize_t splice_folio_into_pipe(struct pipe_inode_info *pipe, 104462306a36Sopenharmony_ci struct folio *folio, loff_t fpos, size_t size); 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci/* 104762306a36Sopenharmony_ci * mm/vmalloc.c 104862306a36Sopenharmony_ci */ 104962306a36Sopenharmony_ci#ifdef CONFIG_MMU 105062306a36Sopenharmony_civoid __init vmalloc_init(void); 105162306a36Sopenharmony_ciint __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, 105262306a36Sopenharmony_ci pgprot_t prot, struct page **pages, unsigned int page_shift); 105362306a36Sopenharmony_ci#else 105462306a36Sopenharmony_cistatic inline void vmalloc_init(void) 105562306a36Sopenharmony_ci{ 105662306a36Sopenharmony_ci} 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_cistatic inline 105962306a36Sopenharmony_ciint __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, 106062306a36Sopenharmony_ci pgprot_t prot, struct page **pages, unsigned int page_shift) 106162306a36Sopenharmony_ci{ 106262306a36Sopenharmony_ci return -EINVAL; 106362306a36Sopenharmony_ci} 106462306a36Sopenharmony_ci#endif 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ciint __must_check __vmap_pages_range_noflush(unsigned long addr, 106762306a36Sopenharmony_ci unsigned long end, pgprot_t prot, 106862306a36Sopenharmony_ci struct page **pages, unsigned int page_shift); 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_civoid vunmap_range_noflush(unsigned long start, unsigned long end); 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_civoid __vunmap_range_noflush(unsigned long start, unsigned long end); 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ciint numa_migrate_prep(struct page *page, struct vm_area_struct *vma, 107562306a36Sopenharmony_ci unsigned long addr, int page_nid, int *flags); 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_civoid free_zone_device_page(struct page *page); 107862306a36Sopenharmony_ciint migrate_device_coherent_page(struct page *page); 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci/* 108162306a36Sopenharmony_ci * mm/gup.c 108262306a36Sopenharmony_ci */ 108362306a36Sopenharmony_cistruct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); 108462306a36Sopenharmony_ciint __must_check try_grab_page(struct page *page, unsigned int flags); 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci/* 108762306a36Sopenharmony_ci * mm/huge_memory.c 108862306a36Sopenharmony_ci */ 108962306a36Sopenharmony_cistruct page *follow_trans_huge_pmd(struct vm_area_struct *vma, 109062306a36Sopenharmony_ci unsigned long addr, pmd_t *pmd, 109162306a36Sopenharmony_ci unsigned int flags); 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_cienum { 109462306a36Sopenharmony_ci /* mark page accessed */ 109562306a36Sopenharmony_ci FOLL_TOUCH = 1 << 16, 109662306a36Sopenharmony_ci /* a retry, previous pass started an IO */ 109762306a36Sopenharmony_ci FOLL_TRIED = 1 << 17, 109862306a36Sopenharmony_ci /* we are working on non-current tsk/mm */ 109962306a36Sopenharmony_ci FOLL_REMOTE = 1 << 18, 110062306a36Sopenharmony_ci /* pages must be released via unpin_user_page */ 110162306a36Sopenharmony_ci FOLL_PIN = 1 << 19, 110262306a36Sopenharmony_ci /* gup_fast: prevent fall-back to slow gup */ 110362306a36Sopenharmony_ci FOLL_FAST_ONLY = 1 << 20, 110462306a36Sopenharmony_ci /* allow unlocking the mmap lock */ 110562306a36Sopenharmony_ci FOLL_UNLOCKABLE = 1 << 21, 110662306a36Sopenharmony_ci}; 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci/* 110962306a36Sopenharmony_ci * Indicates for which pages that are write-protected in the page table, 111062306a36Sopenharmony_ci * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the 111162306a36Sopenharmony_ci * GUP pin will remain consistent with the pages mapped into the page tables 111262306a36Sopenharmony_ci * of the MM. 111362306a36Sopenharmony_ci * 111462306a36Sopenharmony_ci * Temporary unmapping of PageAnonExclusive() pages or clearing of 111562306a36Sopenharmony_ci * PageAnonExclusive() has to protect against concurrent GUP: 111662306a36Sopenharmony_ci * * Ordinary GUP: Using the PT lock 111762306a36Sopenharmony_ci * * GUP-fast and fork(): mm->write_protect_seq 111862306a36Sopenharmony_ci * * GUP-fast and KSM or temporary unmapping (swap, migration): see 111962306a36Sopenharmony_ci * page_try_share_anon_rmap() 112062306a36Sopenharmony_ci * 112162306a36Sopenharmony_ci * Must be called with the (sub)page that's actually referenced via the 112262306a36Sopenharmony_ci * page table entry, which might not necessarily be the head page for a 112362306a36Sopenharmony_ci * PTE-mapped THP. 112462306a36Sopenharmony_ci * 112562306a36Sopenharmony_ci * If the vma is NULL, we're coming from the GUP-fast path and might have 112662306a36Sopenharmony_ci * to fallback to the slow path just to lookup the vma. 112762306a36Sopenharmony_ci */ 112862306a36Sopenharmony_cistatic inline bool gup_must_unshare(struct vm_area_struct *vma, 112962306a36Sopenharmony_ci unsigned int flags, struct page *page) 113062306a36Sopenharmony_ci{ 113162306a36Sopenharmony_ci /* 113262306a36Sopenharmony_ci * FOLL_WRITE is implicitly handled correctly as the page table entry 113362306a36Sopenharmony_ci * has to be writable -- and if it references (part of) an anonymous 113462306a36Sopenharmony_ci * folio, that part is required to be marked exclusive. 113562306a36Sopenharmony_ci */ 113662306a36Sopenharmony_ci if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN) 113762306a36Sopenharmony_ci return false; 113862306a36Sopenharmony_ci /* 113962306a36Sopenharmony_ci * Note: PageAnon(page) is stable until the page is actually getting 114062306a36Sopenharmony_ci * freed. 114162306a36Sopenharmony_ci */ 114262306a36Sopenharmony_ci if (!PageAnon(page)) { 114362306a36Sopenharmony_ci /* 114462306a36Sopenharmony_ci * We only care about R/O long-term pining: R/O short-term 114562306a36Sopenharmony_ci * pinning does not have the semantics to observe successive 114662306a36Sopenharmony_ci * changes through the process page tables. 114762306a36Sopenharmony_ci */ 114862306a36Sopenharmony_ci if (!(flags & FOLL_LONGTERM)) 114962306a36Sopenharmony_ci return false; 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci /* We really need the vma ... */ 115262306a36Sopenharmony_ci if (!vma) 115362306a36Sopenharmony_ci return true; 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci /* 115662306a36Sopenharmony_ci * ... because we only care about writable private ("COW") 115762306a36Sopenharmony_ci * mappings where we have to break COW early. 115862306a36Sopenharmony_ci */ 115962306a36Sopenharmony_ci return is_cow_mapping(vma->vm_flags); 116062306a36Sopenharmony_ci } 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci /* Paired with a memory barrier in page_try_share_anon_rmap(). */ 116362306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) 116462306a36Sopenharmony_ci smp_rmb(); 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci /* 116762306a36Sopenharmony_ci * During GUP-fast we might not get called on the head page for a 116862306a36Sopenharmony_ci * hugetlb page that is mapped using cont-PTE, because GUP-fast does 116962306a36Sopenharmony_ci * not work with the abstracted hugetlb PTEs that always point at the 117062306a36Sopenharmony_ci * head page. For hugetlb, PageAnonExclusive only applies on the head 117162306a36Sopenharmony_ci * page (as it cannot be partially COW-shared), so lookup the head page. 117262306a36Sopenharmony_ci */ 117362306a36Sopenharmony_ci if (unlikely(!PageHead(page) && PageHuge(page))) 117462306a36Sopenharmony_ci page = compound_head(page); 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci /* 117762306a36Sopenharmony_ci * Note that PageKsm() pages cannot be exclusive, and consequently, 117862306a36Sopenharmony_ci * cannot get pinned. 117962306a36Sopenharmony_ci */ 118062306a36Sopenharmony_ci return !PageAnonExclusive(page); 118162306a36Sopenharmony_ci} 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ciextern bool mirrored_kernelcore; 118462306a36Sopenharmony_ciextern bool memblock_has_mirror(void); 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_cistatic inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) 118762306a36Sopenharmony_ci{ 118862306a36Sopenharmony_ci /* 118962306a36Sopenharmony_ci * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty 119062306a36Sopenharmony_ci * enablements, because when without soft-dirty being compiled in, 119162306a36Sopenharmony_ci * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) 119262306a36Sopenharmony_ci * will be constantly true. 119362306a36Sopenharmony_ci */ 119462306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) 119562306a36Sopenharmony_ci return false; 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci /* 119862306a36Sopenharmony_ci * Soft-dirty is kind of special: its tracking is enabled when the 119962306a36Sopenharmony_ci * vma flags not set. 120062306a36Sopenharmony_ci */ 120162306a36Sopenharmony_ci return !(vma->vm_flags & VM_SOFTDIRTY); 120262306a36Sopenharmony_ci} 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_cistatic inline void vma_iter_config(struct vma_iterator *vmi, 120562306a36Sopenharmony_ci unsigned long index, unsigned long last) 120662306a36Sopenharmony_ci{ 120762306a36Sopenharmony_ci MAS_BUG_ON(&vmi->mas, vmi->mas.node != MAS_START && 120862306a36Sopenharmony_ci (vmi->mas.index > index || vmi->mas.last < index)); 120962306a36Sopenharmony_ci __mas_set_range(&vmi->mas, index, last - 1); 121062306a36Sopenharmony_ci} 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci/* 121362306a36Sopenharmony_ci * VMA Iterator functions shared between nommu and mmap 121462306a36Sopenharmony_ci */ 121562306a36Sopenharmony_cistatic inline int vma_iter_prealloc(struct vma_iterator *vmi, 121662306a36Sopenharmony_ci struct vm_area_struct *vma) 121762306a36Sopenharmony_ci{ 121862306a36Sopenharmony_ci return mas_preallocate(&vmi->mas, vma, GFP_KERNEL); 121962306a36Sopenharmony_ci} 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_cistatic inline void vma_iter_clear(struct vma_iterator *vmi) 122262306a36Sopenharmony_ci{ 122362306a36Sopenharmony_ci mas_store_prealloc(&vmi->mas, NULL); 122462306a36Sopenharmony_ci} 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_cistatic inline int vma_iter_clear_gfp(struct vma_iterator *vmi, 122762306a36Sopenharmony_ci unsigned long start, unsigned long end, gfp_t gfp) 122862306a36Sopenharmony_ci{ 122962306a36Sopenharmony_ci __mas_set_range(&vmi->mas, start, end - 1); 123062306a36Sopenharmony_ci mas_store_gfp(&vmi->mas, NULL, gfp); 123162306a36Sopenharmony_ci if (unlikely(mas_is_err(&vmi->mas))) 123262306a36Sopenharmony_ci return -ENOMEM; 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci return 0; 123562306a36Sopenharmony_ci} 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_cistatic inline struct vm_area_struct *vma_iter_load(struct vma_iterator *vmi) 123862306a36Sopenharmony_ci{ 123962306a36Sopenharmony_ci return mas_walk(&vmi->mas); 124062306a36Sopenharmony_ci} 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci/* Store a VMA with preallocated memory */ 124362306a36Sopenharmony_cistatic inline void vma_iter_store(struct vma_iterator *vmi, 124462306a36Sopenharmony_ci struct vm_area_struct *vma) 124562306a36Sopenharmony_ci{ 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci#if defined(CONFIG_DEBUG_VM_MAPLE_TREE) 124862306a36Sopenharmony_ci if (MAS_WARN_ON(&vmi->mas, vmi->mas.node != MAS_START && 124962306a36Sopenharmony_ci vmi->mas.index > vma->vm_start)) { 125062306a36Sopenharmony_ci pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n", 125162306a36Sopenharmony_ci vmi->mas.index, vma->vm_start, vma->vm_start, 125262306a36Sopenharmony_ci vma->vm_end, vmi->mas.index, vmi->mas.last); 125362306a36Sopenharmony_ci } 125462306a36Sopenharmony_ci if (MAS_WARN_ON(&vmi->mas, vmi->mas.node != MAS_START && 125562306a36Sopenharmony_ci vmi->mas.last < vma->vm_start)) { 125662306a36Sopenharmony_ci pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n", 125762306a36Sopenharmony_ci vmi->mas.last, vma->vm_start, vma->vm_start, vma->vm_end, 125862306a36Sopenharmony_ci vmi->mas.index, vmi->mas.last); 125962306a36Sopenharmony_ci } 126062306a36Sopenharmony_ci#endif 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci if (vmi->mas.node != MAS_START && 126362306a36Sopenharmony_ci ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start))) 126462306a36Sopenharmony_ci vma_iter_invalidate(vmi); 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1); 126762306a36Sopenharmony_ci mas_store_prealloc(&vmi->mas, vma); 126862306a36Sopenharmony_ci} 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_cistatic inline int vma_iter_store_gfp(struct vma_iterator *vmi, 127162306a36Sopenharmony_ci struct vm_area_struct *vma, gfp_t gfp) 127262306a36Sopenharmony_ci{ 127362306a36Sopenharmony_ci if (vmi->mas.node != MAS_START && 127462306a36Sopenharmony_ci ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start))) 127562306a36Sopenharmony_ci vma_iter_invalidate(vmi); 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1); 127862306a36Sopenharmony_ci mas_store_gfp(&vmi->mas, vma, gfp); 127962306a36Sopenharmony_ci if (unlikely(mas_is_err(&vmi->mas))) 128062306a36Sopenharmony_ci return -ENOMEM; 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci return 0; 128362306a36Sopenharmony_ci} 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci/* 128662306a36Sopenharmony_ci * VMA lock generalization 128762306a36Sopenharmony_ci */ 128862306a36Sopenharmony_cistruct vma_prepare { 128962306a36Sopenharmony_ci struct vm_area_struct *vma; 129062306a36Sopenharmony_ci struct vm_area_struct *adj_next; 129162306a36Sopenharmony_ci struct file *file; 129262306a36Sopenharmony_ci struct address_space *mapping; 129362306a36Sopenharmony_ci struct anon_vma *anon_vma; 129462306a36Sopenharmony_ci struct vm_area_struct *insert; 129562306a36Sopenharmony_ci struct vm_area_struct *remove; 129662306a36Sopenharmony_ci struct vm_area_struct *remove2; 129762306a36Sopenharmony_ci}; 129862306a36Sopenharmony_ci#endif /* __MM_INTERNAL_H */ 1299