18c2ecf20Sopenharmony_ci=======
28c2ecf20Sopenharmony_ciLocking
38c2ecf20Sopenharmony_ci=======
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ciThe text below describes the locking rules for VFS-related methods.
68c2ecf20Sopenharmony_ciIt is (believed to be) up-to-date. *Please*, if you change anything in
78c2ecf20Sopenharmony_ciprototypes or locking protocols - update this file. And update the relevant
88c2ecf20Sopenharmony_ciinstances in the tree, don't leave that to maintainers of filesystems/devices/
98c2ecf20Sopenharmony_cietc. At the very least, put the list of dubious cases in the end of this file.
108c2ecf20Sopenharmony_ciDon't turn it into log - maintainers of out-of-the-tree code are supposed to
118c2ecf20Sopenharmony_cibe able to use diff(1).
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ciThing currently missing here: socket operations. Alexey?
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_cidentry_operations
168c2ecf20Sopenharmony_ci=================
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ciprototypes::
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci	int (*d_revalidate)(struct dentry *, unsigned int);
218c2ecf20Sopenharmony_ci	int (*d_weak_revalidate)(struct dentry *, unsigned int);
228c2ecf20Sopenharmony_ci	int (*d_hash)(const struct dentry *, struct qstr *);
238c2ecf20Sopenharmony_ci	int (*d_compare)(const struct dentry *,
248c2ecf20Sopenharmony_ci			unsigned int, const char *, const struct qstr *);
258c2ecf20Sopenharmony_ci	int (*d_delete)(struct dentry *);
268c2ecf20Sopenharmony_ci	int (*d_init)(struct dentry *);
278c2ecf20Sopenharmony_ci	void (*d_release)(struct dentry *);
288c2ecf20Sopenharmony_ci	void (*d_iput)(struct dentry *, struct inode *);
298c2ecf20Sopenharmony_ci	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
308c2ecf20Sopenharmony_ci	struct vfsmount *(*d_automount)(struct path *path);
318c2ecf20Sopenharmony_ci	int (*d_manage)(const struct path *, bool);
328c2ecf20Sopenharmony_ci	struct dentry *(*d_real)(struct dentry *, const struct inode *);
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_cilocking rules:
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci================== ===========	========	==============	========
378c2ecf20Sopenharmony_ciops		   rename_lock	->d_lock	may block	rcu-walk
388c2ecf20Sopenharmony_ci================== ===========	========	==============	========
398c2ecf20Sopenharmony_cid_revalidate:	   no		no		yes (ref-walk)	maybe
408c2ecf20Sopenharmony_cid_weak_revalidate: no		no		yes	 	no
418c2ecf20Sopenharmony_cid_hash		   no		no		no		maybe
428c2ecf20Sopenharmony_cid_compare:	   yes		no		no		maybe
438c2ecf20Sopenharmony_cid_delete:	   no		yes		no		no
448c2ecf20Sopenharmony_cid_init:		   no		no		yes		no
458c2ecf20Sopenharmony_cid_release:	   no		no		yes		no
468c2ecf20Sopenharmony_cid_prune:           no		yes		no		no
478c2ecf20Sopenharmony_cid_iput:		   no		no		yes		no
488c2ecf20Sopenharmony_cid_dname:	   no		no		no		no
498c2ecf20Sopenharmony_cid_automount:	   no		no		yes		no
508c2ecf20Sopenharmony_cid_manage:	   no		no		yes (ref-walk)	maybe
518c2ecf20Sopenharmony_cid_real		   no		no		yes 		no
528c2ecf20Sopenharmony_ci================== ===========	========	==============	========
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ciinode_operations
558c2ecf20Sopenharmony_ci================
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ciprototypes::
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	int (*create) (struct inode *,struct dentry *,umode_t, bool);
608c2ecf20Sopenharmony_ci	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
618c2ecf20Sopenharmony_ci	int (*link) (struct dentry *,struct inode *,struct dentry *);
628c2ecf20Sopenharmony_ci	int (*unlink) (struct inode *,struct dentry *);
638c2ecf20Sopenharmony_ci	int (*symlink) (struct inode *,struct dentry *,const char *);
648c2ecf20Sopenharmony_ci	int (*mkdir) (struct inode *,struct dentry *,umode_t);
658c2ecf20Sopenharmony_ci	int (*rmdir) (struct inode *,struct dentry *);
668c2ecf20Sopenharmony_ci	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
678c2ecf20Sopenharmony_ci	int (*rename) (struct inode *, struct dentry *,
688c2ecf20Sopenharmony_ci			struct inode *, struct dentry *, unsigned int);
698c2ecf20Sopenharmony_ci	int (*readlink) (struct dentry *, char __user *,int);
708c2ecf20Sopenharmony_ci	const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *);
718c2ecf20Sopenharmony_ci	void (*truncate) (struct inode *);
728c2ecf20Sopenharmony_ci	int (*permission) (struct inode *, int, unsigned int);
738c2ecf20Sopenharmony_ci	int (*get_acl)(struct inode *, int);
748c2ecf20Sopenharmony_ci	int (*setattr) (struct dentry *, struct iattr *);
758c2ecf20Sopenharmony_ci	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
768c2ecf20Sopenharmony_ci	ssize_t (*listxattr) (struct dentry *, char *, size_t);
778c2ecf20Sopenharmony_ci	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
788c2ecf20Sopenharmony_ci	void (*update_time)(struct inode *, struct timespec *, int);
798c2ecf20Sopenharmony_ci	int (*atomic_open)(struct inode *, struct dentry *,
808c2ecf20Sopenharmony_ci				struct file *, unsigned open_flag,
818c2ecf20Sopenharmony_ci				umode_t create_mode);
828c2ecf20Sopenharmony_ci	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_cilocking rules:
858c2ecf20Sopenharmony_ci	all may block
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci============	=============================================
888c2ecf20Sopenharmony_ciops		i_rwsem(inode)
898c2ecf20Sopenharmony_ci============	=============================================
908c2ecf20Sopenharmony_cilookup:		shared
918c2ecf20Sopenharmony_cicreate:		exclusive
928c2ecf20Sopenharmony_cilink:		exclusive (both)
938c2ecf20Sopenharmony_cimknod:		exclusive
948c2ecf20Sopenharmony_cisymlink:	exclusive
958c2ecf20Sopenharmony_cimkdir:		exclusive
968c2ecf20Sopenharmony_ciunlink:		exclusive (both)
978c2ecf20Sopenharmony_cirmdir:		exclusive (both)(see below)
988c2ecf20Sopenharmony_cirename:		exclusive (both parents, some children)	(see below)
998c2ecf20Sopenharmony_cireadlink:	no
1008c2ecf20Sopenharmony_ciget_link:	no
1018c2ecf20Sopenharmony_cisetattr:	exclusive
1028c2ecf20Sopenharmony_cipermission:	no (may not block if called in rcu-walk mode)
1038c2ecf20Sopenharmony_ciget_acl:	no
1048c2ecf20Sopenharmony_cigetattr:	no
1058c2ecf20Sopenharmony_cilistxattr:	no
1068c2ecf20Sopenharmony_cifiemap:		no
1078c2ecf20Sopenharmony_ciupdate_time:	no
1088c2ecf20Sopenharmony_ciatomic_open:	shared (exclusive if O_CREAT is set in open flags)
1098c2ecf20Sopenharmony_citmpfile:	no
1108c2ecf20Sopenharmony_ci============	=============================================
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
1148c2ecf20Sopenharmony_ci	exclusive on victim.
1158c2ecf20Sopenharmony_ci	cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
1168c2ecf20Sopenharmony_ci	->unlink() and ->rename() have ->i_rwsem exclusive on all non-directories
1178c2ecf20Sopenharmony_ci	involved.
1188c2ecf20Sopenharmony_ci	->rename() has ->i_rwsem exclusive on any subdirectory that changes parent.
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ciSee Documentation/filesystems/directory-locking.rst for more detailed discussion
1218c2ecf20Sopenharmony_ciof the locking scheme for directory operations.
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_cixattr_handler operations
1248c2ecf20Sopenharmony_ci========================
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ciprototypes::
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	bool (*list)(struct dentry *dentry);
1298c2ecf20Sopenharmony_ci	int (*get)(const struct xattr_handler *handler, struct dentry *dentry,
1308c2ecf20Sopenharmony_ci		   struct inode *inode, const char *name, void *buffer,
1318c2ecf20Sopenharmony_ci		   size_t size);
1328c2ecf20Sopenharmony_ci	int (*set)(const struct xattr_handler *handler, struct dentry *dentry,
1338c2ecf20Sopenharmony_ci		   struct inode *inode, const char *name, const void *buffer,
1348c2ecf20Sopenharmony_ci		   size_t size, int flags);
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_cilocking rules:
1378c2ecf20Sopenharmony_ci	all may block
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci=====		==============
1408c2ecf20Sopenharmony_ciops		i_rwsem(inode)
1418c2ecf20Sopenharmony_ci=====		==============
1428c2ecf20Sopenharmony_cilist:		no
1438c2ecf20Sopenharmony_ciget:		no
1448c2ecf20Sopenharmony_ciset:		exclusive
1458c2ecf20Sopenharmony_ci=====		==============
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_cisuper_operations
1488c2ecf20Sopenharmony_ci================
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ciprototypes::
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	struct inode *(*alloc_inode)(struct super_block *sb);
1538c2ecf20Sopenharmony_ci	void (*free_inode)(struct inode *);
1548c2ecf20Sopenharmony_ci	void (*destroy_inode)(struct inode *);
1558c2ecf20Sopenharmony_ci	void (*dirty_inode) (struct inode *, int flags);
1568c2ecf20Sopenharmony_ci	int (*write_inode) (struct inode *, struct writeback_control *wbc);
1578c2ecf20Sopenharmony_ci	int (*drop_inode) (struct inode *);
1588c2ecf20Sopenharmony_ci	void (*evict_inode) (struct inode *);
1598c2ecf20Sopenharmony_ci	void (*put_super) (struct super_block *);
1608c2ecf20Sopenharmony_ci	int (*sync_fs)(struct super_block *sb, int wait);
1618c2ecf20Sopenharmony_ci	int (*freeze_fs) (struct super_block *);
1628c2ecf20Sopenharmony_ci	int (*unfreeze_fs) (struct super_block *);
1638c2ecf20Sopenharmony_ci	int (*statfs) (struct dentry *, struct kstatfs *);
1648c2ecf20Sopenharmony_ci	int (*remount_fs) (struct super_block *, int *, char *);
1658c2ecf20Sopenharmony_ci	void (*umount_begin) (struct super_block *);
1668c2ecf20Sopenharmony_ci	int (*show_options)(struct seq_file *, struct dentry *);
1678c2ecf20Sopenharmony_ci	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
1688c2ecf20Sopenharmony_ci	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1698c2ecf20Sopenharmony_ci	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_cilocking rules:
1728c2ecf20Sopenharmony_ci	All may block [not true, see below]
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci======================	============	========================
1758c2ecf20Sopenharmony_ciops			s_umount	note
1768c2ecf20Sopenharmony_ci======================	============	========================
1778c2ecf20Sopenharmony_cialloc_inode:
1788c2ecf20Sopenharmony_cifree_inode:				called from RCU callback
1798c2ecf20Sopenharmony_cidestroy_inode:
1808c2ecf20Sopenharmony_cidirty_inode:
1818c2ecf20Sopenharmony_ciwrite_inode:
1828c2ecf20Sopenharmony_cidrop_inode:				!!!inode->i_lock!!!
1838c2ecf20Sopenharmony_cievict_inode:
1848c2ecf20Sopenharmony_ciput_super:		write
1858c2ecf20Sopenharmony_cisync_fs:		read
1868c2ecf20Sopenharmony_cifreeze_fs:		write
1878c2ecf20Sopenharmony_ciunfreeze_fs:		write
1888c2ecf20Sopenharmony_cistatfs:			maybe(read)	(see below)
1898c2ecf20Sopenharmony_ciremount_fs:		write
1908c2ecf20Sopenharmony_ciumount_begin:		no
1918c2ecf20Sopenharmony_cishow_options:		no		(namespace_sem)
1928c2ecf20Sopenharmony_ciquota_read:		no		(see below)
1938c2ecf20Sopenharmony_ciquota_write:		no		(see below)
1948c2ecf20Sopenharmony_cibdev_try_to_free_page:	no		(see below)
1958c2ecf20Sopenharmony_ci======================	============	========================
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci->statfs() has s_umount (shared) when called by ustat(2) (native or
1988c2ecf20Sopenharmony_cicompat), but that's an accident of bad API; s_umount is used to pin
1998c2ecf20Sopenharmony_cithe superblock down when we only have dev_t given us by userland to
2008c2ecf20Sopenharmony_ciidentify the superblock.  Everything else (statfs(), fstatfs(), etc.)
2018c2ecf20Sopenharmony_cidoesn't hold it when calling ->statfs() - superblock is pinned down
2028c2ecf20Sopenharmony_ciby resolving the pathname passed to syscall.
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci->quota_read() and ->quota_write() functions are both guaranteed to
2058c2ecf20Sopenharmony_cibe the only ones operating on the quota file by the quota code (via
2068c2ecf20Sopenharmony_cidqio_sem) (unless an admin really wants to screw up something and
2078c2ecf20Sopenharmony_ciwrites to quota files with quotas on). For other details about locking
2088c2ecf20Sopenharmony_cisee also dquot_operations section.
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci->bdev_try_to_free_page is called from the ->releasepage handler of
2118c2ecf20Sopenharmony_cithe block device inode.  See there for more details.
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cifile_system_type
2148c2ecf20Sopenharmony_ci================
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ciprototypes::
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	struct dentry *(*mount) (struct file_system_type *, int,
2198c2ecf20Sopenharmony_ci		       const char *, void *);
2208c2ecf20Sopenharmony_ci	void (*kill_sb) (struct super_block *);
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_cilocking rules:
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci=======		=========
2258c2ecf20Sopenharmony_ciops		may block
2268c2ecf20Sopenharmony_ci=======		=========
2278c2ecf20Sopenharmony_cimount		yes
2288c2ecf20Sopenharmony_cikill_sb		yes
2298c2ecf20Sopenharmony_ci=======		=========
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci->mount() returns ERR_PTR or the root dentry; its superblock should be locked
2328c2ecf20Sopenharmony_cion return.
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci->kill_sb() takes a write-locked superblock, does all shutdown work on it,
2358c2ecf20Sopenharmony_ciunlocks and drops the reference.
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ciaddress_space_operations
2388c2ecf20Sopenharmony_ci========================
2398c2ecf20Sopenharmony_ciprototypes::
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	int (*writepage)(struct page *page, struct writeback_control *wbc);
2428c2ecf20Sopenharmony_ci	int (*readpage)(struct file *, struct page *);
2438c2ecf20Sopenharmony_ci	int (*writepages)(struct address_space *, struct writeback_control *);
2448c2ecf20Sopenharmony_ci	int (*set_page_dirty)(struct page *page);
2458c2ecf20Sopenharmony_ci	void (*readahead)(struct readahead_control *);
2468c2ecf20Sopenharmony_ci	int (*readpages)(struct file *filp, struct address_space *mapping,
2478c2ecf20Sopenharmony_ci			struct list_head *pages, unsigned nr_pages);
2488c2ecf20Sopenharmony_ci	int (*write_begin)(struct file *, struct address_space *mapping,
2498c2ecf20Sopenharmony_ci				loff_t pos, unsigned len, unsigned flags,
2508c2ecf20Sopenharmony_ci				struct page **pagep, void **fsdata);
2518c2ecf20Sopenharmony_ci	int (*write_end)(struct file *, struct address_space *mapping,
2528c2ecf20Sopenharmony_ci				loff_t pos, unsigned len, unsigned copied,
2538c2ecf20Sopenharmony_ci				struct page *page, void *fsdata);
2548c2ecf20Sopenharmony_ci	sector_t (*bmap)(struct address_space *, sector_t);
2558c2ecf20Sopenharmony_ci	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
2568c2ecf20Sopenharmony_ci	int (*releasepage) (struct page *, int);
2578c2ecf20Sopenharmony_ci	void (*freepage)(struct page *);
2588c2ecf20Sopenharmony_ci	int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
2598c2ecf20Sopenharmony_ci	bool (*isolate_page) (struct page *, isolate_mode_t);
2608c2ecf20Sopenharmony_ci	int (*migratepage)(struct address_space *, struct page *, struct page *);
2618c2ecf20Sopenharmony_ci	void (*putback_page) (struct page *);
2628c2ecf20Sopenharmony_ci	int (*launder_page)(struct page *);
2638c2ecf20Sopenharmony_ci	int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
2648c2ecf20Sopenharmony_ci	int (*error_remove_page)(struct address_space *, struct page *);
2658c2ecf20Sopenharmony_ci	int (*swap_activate)(struct file *);
2668c2ecf20Sopenharmony_ci	int (*swap_deactivate)(struct file *);
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_cilocking rules:
2698c2ecf20Sopenharmony_ci	All except set_page_dirty and freepage may block
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci======================	======================== =========
2728c2ecf20Sopenharmony_ciops			PageLocked(page)	 i_rwsem
2738c2ecf20Sopenharmony_ci======================	======================== =========
2748c2ecf20Sopenharmony_ciwritepage:		yes, unlocks (see below)
2758c2ecf20Sopenharmony_cireadpage:		yes, unlocks
2768c2ecf20Sopenharmony_ciwritepages:
2778c2ecf20Sopenharmony_ciset_page_dirty		no
2788c2ecf20Sopenharmony_cireadahead:		yes, unlocks
2798c2ecf20Sopenharmony_cireadpages:		no
2808c2ecf20Sopenharmony_ciwrite_begin:		locks the page		 exclusive
2818c2ecf20Sopenharmony_ciwrite_end:		yes, unlocks		 exclusive
2828c2ecf20Sopenharmony_cibmap:
2838c2ecf20Sopenharmony_ciinvalidatepage:		yes
2848c2ecf20Sopenharmony_cireleasepage:		yes
2858c2ecf20Sopenharmony_cifreepage:		yes
2868c2ecf20Sopenharmony_cidirect_IO:
2878c2ecf20Sopenharmony_ciisolate_page:		yes
2888c2ecf20Sopenharmony_cimigratepage:		yes (both)
2898c2ecf20Sopenharmony_ciputback_page:		yes
2908c2ecf20Sopenharmony_cilaunder_page:		yes
2918c2ecf20Sopenharmony_ciis_partially_uptodate:	yes
2928c2ecf20Sopenharmony_cierror_remove_page:	yes
2938c2ecf20Sopenharmony_ciswap_activate:		no
2948c2ecf20Sopenharmony_ciswap_deactivate:	no
2958c2ecf20Sopenharmony_ci======================	======================== =========
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci->write_begin(), ->write_end() and ->readpage() may be called from
2988c2ecf20Sopenharmony_cithe request handler (/dev/loop).
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci->readpage() unlocks the page, either synchronously or via I/O
3018c2ecf20Sopenharmony_cicompletion.
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci->readahead() unlocks the pages that I/O is attempted on like ->readpage().
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci->readpages() populates the pagecache with the passed pages and starts
3068c2ecf20Sopenharmony_ciI/O against them.  They come unlocked upon I/O completion.
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci->writepage() is used for two purposes: for "memory cleansing" and for
3098c2ecf20Sopenharmony_ci"sync".  These are quite different operations and the behaviour may differ
3108c2ecf20Sopenharmony_cidepending upon the mode.
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ciIf writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
3138c2ecf20Sopenharmony_ciit *must* start I/O against the page, even if that would involve
3148c2ecf20Sopenharmony_ciblocking on in-progress I/O.
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ciIf writepage is called for memory cleansing (sync_mode ==
3178c2ecf20Sopenharmony_ciWBC_SYNC_NONE) then its role is to get as much writeout underway as
3188c2ecf20Sopenharmony_cipossible.  So writepage should try to avoid blocking against
3198c2ecf20Sopenharmony_cicurrently-in-progress I/O.
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ciIf the filesystem is not called for "sync" and it determines that it
3228c2ecf20Sopenharmony_ciwould need to block against in-progress I/O to be able to start new I/O
3238c2ecf20Sopenharmony_ciagainst the page the filesystem should redirty the page with
3248c2ecf20Sopenharmony_ciredirty_page_for_writepage(), then unlock the page and return zero.
3258c2ecf20Sopenharmony_ciThis may also be done to avoid internal deadlocks, but rarely.
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ciIf the filesystem is called for sync then it must wait on any
3288c2ecf20Sopenharmony_ciin-progress I/O and then start new I/O.
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ciThe filesystem should unlock the page synchronously, before returning to the
3318c2ecf20Sopenharmony_cicaller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
3328c2ecf20Sopenharmony_civalue. WRITEPAGE_ACTIVATE means that page cannot really be written out
3338c2ecf20Sopenharmony_cicurrently, and VM should stop calling ->writepage() on this page for some
3348c2ecf20Sopenharmony_citime. VM does this by moving page to the head of the active list, hence the
3358c2ecf20Sopenharmony_ciname.
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ciUnless the filesystem is going to redirty_page_for_writepage(), unlock the page
3388c2ecf20Sopenharmony_ciand return zero, writepage *must* run set_page_writeback() against the page,
3398c2ecf20Sopenharmony_cifollowed by unlocking it.  Once set_page_writeback() has been run against the
3408c2ecf20Sopenharmony_cipage, write I/O can be submitted and the write I/O completion handler must run
3418c2ecf20Sopenharmony_ciend_page_writeback() once the I/O is complete.  If no I/O is submitted, the
3428c2ecf20Sopenharmony_cifilesystem must run end_page_writeback() against the page before returning from
3438c2ecf20Sopenharmony_ciwritepage.
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ciThat is: after 2.5.12, pages which are under writeout are *not* locked.  Note,
3468c2ecf20Sopenharmony_ciif the filesystem needs the page to be locked during writeout, that is ok, too,
3478c2ecf20Sopenharmony_cithe page is allowed to be unlocked at any point in time between the calls to
3488c2ecf20Sopenharmony_ciset_page_writeback() and end_page_writeback().
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ciNote, failure to run either redirty_page_for_writepage() or the combination of
3518c2ecf20Sopenharmony_ciset_page_writeback()/end_page_writeback() on a page submitted to writepage
3528c2ecf20Sopenharmony_ciwill leave the page itself marked clean but it will be tagged as dirty in the
3538c2ecf20Sopenharmony_ciradix tree.  This incoherency can lead to all sorts of hard-to-debug problems
3548c2ecf20Sopenharmony_ciin the filesystem like having dirty inodes at umount and losing written data.
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci->writepages() is used for periodic writeback and for syscall-initiated
3578c2ecf20Sopenharmony_cisync operations.  The address_space should start I/O against at least
3588c2ecf20Sopenharmony_ci``*nr_to_write`` pages.  ``*nr_to_write`` must be decremented for each page
3598c2ecf20Sopenharmony_ciwhich is written.  The address_space implementation may write more (or less)
3608c2ecf20Sopenharmony_cipages than ``*nr_to_write`` asks for, but it should try to be reasonably close.
3618c2ecf20Sopenharmony_ciIf nr_to_write is NULL, all dirty pages must be written.
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ciwritepages should _only_ write pages which are present on
3648c2ecf20Sopenharmony_cimapping->io_pages.
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci->set_page_dirty() is called from various places in the kernel
3678c2ecf20Sopenharmony_ciwhen the target page is marked as needing writeback.  It may be called
3688c2ecf20Sopenharmony_ciunder spinlock (it cannot block) and is sometimes called with the page
3698c2ecf20Sopenharmony_cinot locked.
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
3728c2ecf20Sopenharmony_cifilesystems and by the swapper. The latter will eventually go away.  Please,
3738c2ecf20Sopenharmony_cikeep it that way and don't breed new callers.
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci->invalidatepage() is called when the filesystem must attempt to drop
3768c2ecf20Sopenharmony_cisome or all of the buffers from the page when it is being truncated. It
3778c2ecf20Sopenharmony_cireturns zero on success. If ->invalidatepage is zero, the kernel uses
3788c2ecf20Sopenharmony_ciblock_invalidatepage() instead.
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci->releasepage() is called when the kernel is about to try to drop the
3818c2ecf20Sopenharmony_cibuffers from the page in preparation for freeing it.  It returns zero to
3828c2ecf20Sopenharmony_ciindicate that the buffers are (or may be) freeable.  If ->releasepage is zero,
3838c2ecf20Sopenharmony_cithe kernel assumes that the fs has no private interest in the buffers.
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci->freepage() is called when the kernel is done dropping the page
3868c2ecf20Sopenharmony_cifrom the page cache.
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci->launder_page() may be called prior to releasing a page if
3898c2ecf20Sopenharmony_ciit is still found to be dirty. It returns zero if the page was successfully
3908c2ecf20Sopenharmony_cicleaned, or an error value if not. Note that in order to prevent the page
3918c2ecf20Sopenharmony_cigetting mapped back in and redirtied, it needs to be kept locked
3928c2ecf20Sopenharmony_ciacross the entire operation.
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci->swap_activate will be called with a non-zero argument on
3958c2ecf20Sopenharmony_cifiles backing (non block device backed) swapfiles. A return value
3968c2ecf20Sopenharmony_ciof zero indicates success, in which case this file can be used for
3978c2ecf20Sopenharmony_cibacking swapspace. The swapspace operations will be proxied to the
3988c2ecf20Sopenharmony_ciaddress space operations.
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci->swap_deactivate() will be called in the sys_swapoff()
4018c2ecf20Sopenharmony_cipath after ->swap_activate() returned success.
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_cifile_lock_operations
4048c2ecf20Sopenharmony_ci====================
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ciprototypes::
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
4098c2ecf20Sopenharmony_ci	void (*fl_release_private)(struct file_lock *);
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_cilocking rules:
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci===================	=============	=========
4158c2ecf20Sopenharmony_ciops			inode->i_lock	may block
4168c2ecf20Sopenharmony_ci===================	=============	=========
4178c2ecf20Sopenharmony_cifl_copy_lock:		yes		no
4188c2ecf20Sopenharmony_cifl_release_private:	maybe		maybe[1]_
4198c2ecf20Sopenharmony_ci===================	=============	=========
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci.. [1]:
4228c2ecf20Sopenharmony_ci   ->fl_release_private for flock or POSIX locks is currently allowed
4238c2ecf20Sopenharmony_ci   to block. Leases however can still be freed while the i_lock is held and
4248c2ecf20Sopenharmony_ci   so fl_release_private called on a lease should not block.
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_cilock_manager_operations
4278c2ecf20Sopenharmony_ci=======================
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ciprototypes::
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	void (*lm_notify)(struct file_lock *);  /* unblock callback */
4328c2ecf20Sopenharmony_ci	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
4338c2ecf20Sopenharmony_ci	void (*lm_break)(struct file_lock *); /* break_lease callback */
4348c2ecf20Sopenharmony_ci	int (*lm_change)(struct file_lock **, int);
4358c2ecf20Sopenharmony_ci	bool (*lm_breaker_owns_lease)(struct file_lock *);
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_cilocking rules:
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci======================	=============	=================	=========
4408c2ecf20Sopenharmony_ciops			inode->i_lock	blocked_lock_lock	may block
4418c2ecf20Sopenharmony_ci======================	=============	=================	=========
4428c2ecf20Sopenharmony_cilm_notify:		yes		yes			no
4438c2ecf20Sopenharmony_cilm_grant:		no		no			no
4448c2ecf20Sopenharmony_cilm_break:		yes		no			no
4458c2ecf20Sopenharmony_cilm_change		yes		no			no
4468c2ecf20Sopenharmony_cilm_breaker_owns_lease:	no		no			no
4478c2ecf20Sopenharmony_ci======================	=============	=================	=========
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_cibuffer_head
4508c2ecf20Sopenharmony_ci===========
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ciprototypes::
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	void (*b_end_io)(struct buffer_head *bh, int uptodate);
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_cilocking rules:
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_cicalled from interrupts. In other words, extreme care is needed here.
4598c2ecf20Sopenharmony_cibh is locked, but that's all warranties we have here. Currently only RAID1,
4608c2ecf20Sopenharmony_cihighmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
4618c2ecf20Sopenharmony_cicall this method upon the IO completion.
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ciblock_device_operations
4648c2ecf20Sopenharmony_ci=======================
4658c2ecf20Sopenharmony_ciprototypes::
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci	int (*open) (struct block_device *, fmode_t);
4688c2ecf20Sopenharmony_ci	int (*release) (struct gendisk *, fmode_t);
4698c2ecf20Sopenharmony_ci	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
4708c2ecf20Sopenharmony_ci	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
4718c2ecf20Sopenharmony_ci	int (*direct_access) (struct block_device *, sector_t, void **,
4728c2ecf20Sopenharmony_ci				unsigned long *);
4738c2ecf20Sopenharmony_ci	void (*unlock_native_capacity) (struct gendisk *);
4748c2ecf20Sopenharmony_ci	int (*revalidate_disk) (struct gendisk *);
4758c2ecf20Sopenharmony_ci	int (*getgeo)(struct block_device *, struct hd_geometry *);
4768c2ecf20Sopenharmony_ci	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_cilocking rules:
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci======================= ===================
4818c2ecf20Sopenharmony_ciops			bd_mutex
4828c2ecf20Sopenharmony_ci======================= ===================
4838c2ecf20Sopenharmony_ciopen:			yes
4848c2ecf20Sopenharmony_cirelease:		yes
4858c2ecf20Sopenharmony_ciioctl:			no
4868c2ecf20Sopenharmony_cicompat_ioctl:		no
4878c2ecf20Sopenharmony_cidirect_access:		no
4888c2ecf20Sopenharmony_ciunlock_native_capacity:	no
4898c2ecf20Sopenharmony_cirevalidate_disk:	no
4908c2ecf20Sopenharmony_cigetgeo:			no
4918c2ecf20Sopenharmony_ciswap_slot_free_notify:	no	(see below)
4928c2ecf20Sopenharmony_ci======================= ===================
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ciswap_slot_free_notify is called with swap_lock and sometimes the page lock
4958c2ecf20Sopenharmony_ciheld.
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci
4988c2ecf20Sopenharmony_cifile_operations
4998c2ecf20Sopenharmony_ci===============
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ciprototypes::
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	loff_t (*llseek) (struct file *, loff_t, int);
5048c2ecf20Sopenharmony_ci	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
5058c2ecf20Sopenharmony_ci	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
5068c2ecf20Sopenharmony_ci	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
5078c2ecf20Sopenharmony_ci	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
5088c2ecf20Sopenharmony_ci	int (*iterate) (struct file *, struct dir_context *);
5098c2ecf20Sopenharmony_ci	int (*iterate_shared) (struct file *, struct dir_context *);
5108c2ecf20Sopenharmony_ci	__poll_t (*poll) (struct file *, struct poll_table_struct *);
5118c2ecf20Sopenharmony_ci	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
5128c2ecf20Sopenharmony_ci	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
5138c2ecf20Sopenharmony_ci	int (*mmap) (struct file *, struct vm_area_struct *);
5148c2ecf20Sopenharmony_ci	int (*open) (struct inode *, struct file *);
5158c2ecf20Sopenharmony_ci	int (*flush) (struct file *);
5168c2ecf20Sopenharmony_ci	int (*release) (struct inode *, struct file *);
5178c2ecf20Sopenharmony_ci	int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
5188c2ecf20Sopenharmony_ci	int (*fasync) (int, struct file *, int);
5198c2ecf20Sopenharmony_ci	int (*lock) (struct file *, int, struct file_lock *);
5208c2ecf20Sopenharmony_ci	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
5218c2ecf20Sopenharmony_ci			loff_t *);
5228c2ecf20Sopenharmony_ci	ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
5238c2ecf20Sopenharmony_ci			loff_t *);
5248c2ecf20Sopenharmony_ci	ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
5258c2ecf20Sopenharmony_ci			void __user *);
5268c2ecf20Sopenharmony_ci	ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
5278c2ecf20Sopenharmony_ci			loff_t *, int);
5288c2ecf20Sopenharmony_ci	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
5298c2ecf20Sopenharmony_ci			unsigned long, unsigned long, unsigned long);
5308c2ecf20Sopenharmony_ci	int (*check_flags)(int);
5318c2ecf20Sopenharmony_ci	int (*flock) (struct file *, int, struct file_lock *);
5328c2ecf20Sopenharmony_ci	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
5338c2ecf20Sopenharmony_ci			size_t, unsigned int);
5348c2ecf20Sopenharmony_ci	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
5358c2ecf20Sopenharmony_ci			size_t, unsigned int);
5368c2ecf20Sopenharmony_ci	int (*setlease)(struct file *, long, struct file_lock **, void **);
5378c2ecf20Sopenharmony_ci	long (*fallocate)(struct file *, int, loff_t, loff_t);
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_cilocking rules:
5408c2ecf20Sopenharmony_ci	All may block.
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci->llseek() locking has moved from llseek to the individual llseek
5438c2ecf20Sopenharmony_ciimplementations.  If your fs is not using generic_file_llseek, you
5448c2ecf20Sopenharmony_cineed to acquire and release the appropriate locks in your ->llseek().
5458c2ecf20Sopenharmony_ciFor many filesystems, it is probably safe to acquire the inode
5468c2ecf20Sopenharmony_cimutex or just to use i_size_read() instead.
5478c2ecf20Sopenharmony_ciNote: this does not protect the file->f_pos against concurrent modifications
5488c2ecf20Sopenharmony_cisince this is something the userspace has to take care about.
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_ci->iterate() is called with i_rwsem exclusive.
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci->iterate_shared() is called with i_rwsem at least shared.
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
5558c2ecf20Sopenharmony_ciMost instances call fasync_helper(), which does that maintenance, so it's
5568c2ecf20Sopenharmony_cinot normally something one needs to worry about.  Return values > 0 will be
5578c2ecf20Sopenharmony_cimapped to zero in the VFS layer.
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci->readdir() and ->ioctl() on directories must be changed. Ideally we would
5608c2ecf20Sopenharmony_cimove ->readdir() to inode_operations and use a separate method for directory
5618c2ecf20Sopenharmony_ci->ioctl() or kill the latter completely. One of the problems is that for
5628c2ecf20Sopenharmony_cianything that resembles union-mount we won't have a struct file for all
5638c2ecf20Sopenharmony_cicomponents. And there are other reasons why the current interface is a mess...
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci->read on directories probably must go away - we should just enforce -EISDIR
5668c2ecf20Sopenharmony_ciin sys_read() and friends.
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci->setlease operations should call generic_setlease() before or after setting
5698c2ecf20Sopenharmony_cithe lease within the individual filesystem to record the result of the
5708c2ecf20Sopenharmony_cioperation
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_cidquot_operations
5738c2ecf20Sopenharmony_ci================
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ciprototypes::
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci	int (*write_dquot) (struct dquot *);
5788c2ecf20Sopenharmony_ci	int (*acquire_dquot) (struct dquot *);
5798c2ecf20Sopenharmony_ci	int (*release_dquot) (struct dquot *);
5808c2ecf20Sopenharmony_ci	int (*mark_dirty) (struct dquot *);
5818c2ecf20Sopenharmony_ci	int (*write_info) (struct super_block *, int);
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ciThese operations are intended to be more or less wrapping functions that ensure
5848c2ecf20Sopenharmony_cia proper locking wrt the filesystem and call the generic quota operations.
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_ciWhat filesystem should expect from the generic quota functions:
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci==============	============	=========================
5898c2ecf20Sopenharmony_ciops		FS recursion	Held locks when called
5908c2ecf20Sopenharmony_ci==============	============	=========================
5918c2ecf20Sopenharmony_ciwrite_dquot:	yes		dqonoff_sem or dqptr_sem
5928c2ecf20Sopenharmony_ciacquire_dquot:	yes		dqonoff_sem or dqptr_sem
5938c2ecf20Sopenharmony_cirelease_dquot:	yes		dqonoff_sem or dqptr_sem
5948c2ecf20Sopenharmony_cimark_dirty:	no		-
5958c2ecf20Sopenharmony_ciwrite_info:	yes		dqonoff_sem
5968c2ecf20Sopenharmony_ci==============	============	=========================
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ciFS recursion means calling ->quota_read() and ->quota_write() from superblock
5998c2ecf20Sopenharmony_cioperations.
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ciMore details about quota locking can be found in fs/dquot.c.
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_civm_operations_struct
6048c2ecf20Sopenharmony_ci====================
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_ciprototypes::
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	void (*open)(struct vm_area_struct*);
6098c2ecf20Sopenharmony_ci	void (*close)(struct vm_area_struct*);
6108c2ecf20Sopenharmony_ci	vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *);
6118c2ecf20Sopenharmony_ci	vm_fault_t (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
6128c2ecf20Sopenharmony_ci	vm_fault_t (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
6138c2ecf20Sopenharmony_ci	int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_cilocking rules:
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci=============	=========	===========================
6188c2ecf20Sopenharmony_ciops		mmap_lock	PageLocked(page)
6198c2ecf20Sopenharmony_ci=============	=========	===========================
6208c2ecf20Sopenharmony_ciopen:		yes
6218c2ecf20Sopenharmony_ciclose:		yes
6228c2ecf20Sopenharmony_cifault:		yes		can return with page locked
6238c2ecf20Sopenharmony_cimap_pages:	yes
6248c2ecf20Sopenharmony_cipage_mkwrite:	yes		can return with page locked
6258c2ecf20Sopenharmony_cipfn_mkwrite:	yes
6268c2ecf20Sopenharmony_ciaccess:		yes
6278c2ecf20Sopenharmony_ci=============	=========	===========================
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci->fault() is called when a previously not present pte is about
6308c2ecf20Sopenharmony_cito be faulted in. The filesystem must find and return the page associated
6318c2ecf20Sopenharmony_ciwith the passed in "pgoff" in the vm_fault structure. If it is possible that
6328c2ecf20Sopenharmony_cithe page may be truncated and/or invalidated, then the filesystem must lock
6338c2ecf20Sopenharmony_cithe page, then ensure it is not already truncated (the page lock will block
6348c2ecf20Sopenharmony_cisubsequent truncate), and then return with VM_FAULT_LOCKED, and the page
6358c2ecf20Sopenharmony_cilocked. The VM will unlock the page.
6368c2ecf20Sopenharmony_ci
6378c2ecf20Sopenharmony_ci->map_pages() is called when VM asks to map easy accessible pages.
6388c2ecf20Sopenharmony_ciFilesystem should find and map pages associated with offsets from "start_pgoff"
6398c2ecf20Sopenharmony_citill "end_pgoff". ->map_pages() is called with page table locked and must
6408c2ecf20Sopenharmony_cinot block.  If it's not possible to reach a page without blocking,
6418c2ecf20Sopenharmony_cifilesystem should skip it. Filesystem should use do_set_pte() to setup
6428c2ecf20Sopenharmony_cipage table entry. Pointer to entry associated with the page is passed in
6438c2ecf20Sopenharmony_ci"pte" field in vm_fault structure. Pointers to entries for other offsets
6448c2ecf20Sopenharmony_cishould be calculated relative to "pte".
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci->page_mkwrite() is called when a previously read-only pte is
6478c2ecf20Sopenharmony_ciabout to become writeable. The filesystem again must ensure that there are
6488c2ecf20Sopenharmony_cino truncate/invalidate races, and then return with the page locked. If
6498c2ecf20Sopenharmony_cithe page has been truncated, the filesystem should not look up a new page
6508c2ecf20Sopenharmony_cilike the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
6518c2ecf20Sopenharmony_ciwill cause the VM to retry the fault.
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci->pfn_mkwrite() is the same as page_mkwrite but when the pte is
6548c2ecf20Sopenharmony_ciVM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
6558c2ecf20Sopenharmony_ciVM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
6568c2ecf20Sopenharmony_ciafter this call is to make the pte read-write, unless pfn_mkwrite returns
6578c2ecf20Sopenharmony_cian error.
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci->access() is called when get_user_pages() fails in
6608c2ecf20Sopenharmony_ciaccess_process_vm(), typically used to debug a process through
6618c2ecf20Sopenharmony_ci/proc/pid/mem or ptrace.  This function is needed only for
6628c2ecf20Sopenharmony_ciVM_IO | VM_PFNMAP VMAs.
6638c2ecf20Sopenharmony_ci
6648c2ecf20Sopenharmony_ci--------------------------------------------------------------------------------
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci			Dubious stuff
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ci(if you break something or notice that it is broken and do not fix it yourself
6698c2ecf20Sopenharmony_ci- at least put it here)
670