18c2ecf20Sopenharmony_ci======= 28c2ecf20Sopenharmony_ciLocking 38c2ecf20Sopenharmony_ci======= 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ciThe text below describes the locking rules for VFS-related methods. 68c2ecf20Sopenharmony_ciIt is (believed to be) up-to-date. *Please*, if you change anything in 78c2ecf20Sopenharmony_ciprototypes or locking protocols - update this file. And update the relevant 88c2ecf20Sopenharmony_ciinstances in the tree, don't leave that to maintainers of filesystems/devices/ 98c2ecf20Sopenharmony_cietc. At the very least, put the list of dubious cases in the end of this file. 108c2ecf20Sopenharmony_ciDon't turn it into log - maintainers of out-of-the-tree code are supposed to 118c2ecf20Sopenharmony_cibe able to use diff(1). 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ciThing currently missing here: socket operations. Alexey? 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_cidentry_operations 168c2ecf20Sopenharmony_ci================= 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ciprototypes:: 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci int (*d_revalidate)(struct dentry *, unsigned int); 218c2ecf20Sopenharmony_ci int (*d_weak_revalidate)(struct dentry *, unsigned int); 228c2ecf20Sopenharmony_ci int (*d_hash)(const struct dentry *, struct qstr *); 238c2ecf20Sopenharmony_ci int (*d_compare)(const struct dentry *, 248c2ecf20Sopenharmony_ci unsigned int, const char *, const struct qstr *); 258c2ecf20Sopenharmony_ci int (*d_delete)(struct dentry *); 268c2ecf20Sopenharmony_ci int (*d_init)(struct dentry *); 278c2ecf20Sopenharmony_ci void (*d_release)(struct dentry *); 288c2ecf20Sopenharmony_ci void (*d_iput)(struct dentry *, struct inode *); 298c2ecf20Sopenharmony_ci char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); 308c2ecf20Sopenharmony_ci struct vfsmount *(*d_automount)(struct path *path); 318c2ecf20Sopenharmony_ci int (*d_manage)(const struct path *, bool); 328c2ecf20Sopenharmony_ci struct dentry *(*d_real)(struct dentry *, const struct inode *); 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_cilocking rules: 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci================== =========== ======== ============== ======== 378c2ecf20Sopenharmony_ciops rename_lock ->d_lock may block rcu-walk 388c2ecf20Sopenharmony_ci================== =========== ======== ============== ======== 398c2ecf20Sopenharmony_cid_revalidate: no no yes (ref-walk) maybe 408c2ecf20Sopenharmony_cid_weak_revalidate: no no yes no 418c2ecf20Sopenharmony_cid_hash no no no maybe 428c2ecf20Sopenharmony_cid_compare: yes no no maybe 438c2ecf20Sopenharmony_cid_delete: no yes no no 448c2ecf20Sopenharmony_cid_init: no no yes no 458c2ecf20Sopenharmony_cid_release: no no yes no 468c2ecf20Sopenharmony_cid_prune: no yes no no 478c2ecf20Sopenharmony_cid_iput: no no yes no 488c2ecf20Sopenharmony_cid_dname: no no no no 498c2ecf20Sopenharmony_cid_automount: no no yes no 508c2ecf20Sopenharmony_cid_manage: no no yes (ref-walk) maybe 518c2ecf20Sopenharmony_cid_real no no yes no 528c2ecf20Sopenharmony_ci================== =========== ======== ============== ======== 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ciinode_operations 558c2ecf20Sopenharmony_ci================ 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ciprototypes:: 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci int (*create) (struct inode *,struct dentry *,umode_t, bool); 608c2ecf20Sopenharmony_ci struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); 618c2ecf20Sopenharmony_ci int (*link) (struct dentry *,struct inode *,struct dentry *); 628c2ecf20Sopenharmony_ci int (*unlink) (struct inode *,struct dentry *); 638c2ecf20Sopenharmony_ci int (*symlink) (struct inode *,struct dentry *,const char *); 648c2ecf20Sopenharmony_ci int (*mkdir) (struct inode *,struct dentry *,umode_t); 658c2ecf20Sopenharmony_ci int (*rmdir) (struct inode *,struct dentry *); 668c2ecf20Sopenharmony_ci int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); 678c2ecf20Sopenharmony_ci int (*rename) (struct inode *, struct dentry *, 688c2ecf20Sopenharmony_ci struct inode *, struct dentry *, unsigned int); 698c2ecf20Sopenharmony_ci int (*readlink) (struct dentry *, char __user *,int); 708c2ecf20Sopenharmony_ci const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *); 718c2ecf20Sopenharmony_ci void (*truncate) (struct inode *); 728c2ecf20Sopenharmony_ci int (*permission) (struct inode *, int, unsigned int); 738c2ecf20Sopenharmony_ci int (*get_acl)(struct inode *, int); 748c2ecf20Sopenharmony_ci int (*setattr) (struct dentry *, struct iattr *); 758c2ecf20Sopenharmony_ci int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); 768c2ecf20Sopenharmony_ci ssize_t (*listxattr) (struct dentry *, char *, size_t); 778c2ecf20Sopenharmony_ci int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); 788c2ecf20Sopenharmony_ci void (*update_time)(struct inode *, struct timespec *, int); 798c2ecf20Sopenharmony_ci int (*atomic_open)(struct inode *, struct dentry *, 808c2ecf20Sopenharmony_ci struct file *, unsigned open_flag, 818c2ecf20Sopenharmony_ci umode_t create_mode); 828c2ecf20Sopenharmony_ci int (*tmpfile) (struct inode *, struct dentry *, umode_t); 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_cilocking rules: 858c2ecf20Sopenharmony_ci all may block 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci============ ============================================= 888c2ecf20Sopenharmony_ciops i_rwsem(inode) 898c2ecf20Sopenharmony_ci============ ============================================= 908c2ecf20Sopenharmony_cilookup: shared 918c2ecf20Sopenharmony_cicreate: exclusive 928c2ecf20Sopenharmony_cilink: exclusive (both) 938c2ecf20Sopenharmony_cimknod: exclusive 948c2ecf20Sopenharmony_cisymlink: exclusive 958c2ecf20Sopenharmony_cimkdir: exclusive 968c2ecf20Sopenharmony_ciunlink: exclusive (both) 978c2ecf20Sopenharmony_cirmdir: exclusive (both)(see below) 988c2ecf20Sopenharmony_cirename: exclusive (both parents, some children) (see below) 998c2ecf20Sopenharmony_cireadlink: no 1008c2ecf20Sopenharmony_ciget_link: no 1018c2ecf20Sopenharmony_cisetattr: exclusive 1028c2ecf20Sopenharmony_cipermission: no (may not block if called in rcu-walk mode) 1038c2ecf20Sopenharmony_ciget_acl: no 1048c2ecf20Sopenharmony_cigetattr: no 1058c2ecf20Sopenharmony_cilistxattr: no 1068c2ecf20Sopenharmony_cifiemap: no 1078c2ecf20Sopenharmony_ciupdate_time: no 1088c2ecf20Sopenharmony_ciatomic_open: shared (exclusive if O_CREAT is set in open flags) 1098c2ecf20Sopenharmony_citmpfile: no 1108c2ecf20Sopenharmony_ci============ ============================================= 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem 1148c2ecf20Sopenharmony_ci exclusive on victim. 1158c2ecf20Sopenharmony_ci cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. 1168c2ecf20Sopenharmony_ci ->unlink() and ->rename() have ->i_rwsem exclusive on all non-directories 1178c2ecf20Sopenharmony_ci involved. 1188c2ecf20Sopenharmony_ci ->rename() has ->i_rwsem exclusive on any subdirectory that changes parent. 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ciSee Documentation/filesystems/directory-locking.rst for more detailed discussion 1218c2ecf20Sopenharmony_ciof the locking scheme for directory operations. 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_cixattr_handler operations 1248c2ecf20Sopenharmony_ci======================== 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ciprototypes:: 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci bool (*list)(struct dentry *dentry); 1298c2ecf20Sopenharmony_ci int (*get)(const struct xattr_handler *handler, struct dentry *dentry, 1308c2ecf20Sopenharmony_ci struct inode *inode, const char *name, void *buffer, 1318c2ecf20Sopenharmony_ci size_t size); 1328c2ecf20Sopenharmony_ci int (*set)(const struct xattr_handler *handler, struct dentry *dentry, 1338c2ecf20Sopenharmony_ci struct inode *inode, const char *name, const void *buffer, 1348c2ecf20Sopenharmony_ci size_t size, int flags); 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_cilocking rules: 1378c2ecf20Sopenharmony_ci all may block 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci===== ============== 1408c2ecf20Sopenharmony_ciops i_rwsem(inode) 1418c2ecf20Sopenharmony_ci===== ============== 1428c2ecf20Sopenharmony_cilist: no 1438c2ecf20Sopenharmony_ciget: no 1448c2ecf20Sopenharmony_ciset: exclusive 1458c2ecf20Sopenharmony_ci===== ============== 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_cisuper_operations 1488c2ecf20Sopenharmony_ci================ 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ciprototypes:: 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci struct inode *(*alloc_inode)(struct super_block *sb); 1538c2ecf20Sopenharmony_ci void (*free_inode)(struct inode *); 1548c2ecf20Sopenharmony_ci void (*destroy_inode)(struct inode *); 1558c2ecf20Sopenharmony_ci void (*dirty_inode) (struct inode *, int flags); 1568c2ecf20Sopenharmony_ci int (*write_inode) (struct inode *, struct writeback_control *wbc); 1578c2ecf20Sopenharmony_ci int (*drop_inode) (struct inode *); 1588c2ecf20Sopenharmony_ci void (*evict_inode) (struct inode *); 1598c2ecf20Sopenharmony_ci void (*put_super) (struct super_block *); 1608c2ecf20Sopenharmony_ci int (*sync_fs)(struct super_block *sb, int wait); 1618c2ecf20Sopenharmony_ci int (*freeze_fs) (struct super_block *); 1628c2ecf20Sopenharmony_ci int (*unfreeze_fs) (struct super_block *); 1638c2ecf20Sopenharmony_ci int (*statfs) (struct dentry *, struct kstatfs *); 1648c2ecf20Sopenharmony_ci int (*remount_fs) (struct super_block *, int *, char *); 1658c2ecf20Sopenharmony_ci void (*umount_begin) (struct super_block *); 1668c2ecf20Sopenharmony_ci int (*show_options)(struct seq_file *, struct dentry *); 1678c2ecf20Sopenharmony_ci ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 1688c2ecf20Sopenharmony_ci ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1698c2ecf20Sopenharmony_ci int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_cilocking rules: 1728c2ecf20Sopenharmony_ci All may block [not true, see below] 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci====================== ============ ======================== 1758c2ecf20Sopenharmony_ciops s_umount note 1768c2ecf20Sopenharmony_ci====================== ============ ======================== 1778c2ecf20Sopenharmony_cialloc_inode: 1788c2ecf20Sopenharmony_cifree_inode: called from RCU callback 1798c2ecf20Sopenharmony_cidestroy_inode: 1808c2ecf20Sopenharmony_cidirty_inode: 1818c2ecf20Sopenharmony_ciwrite_inode: 1828c2ecf20Sopenharmony_cidrop_inode: !!!inode->i_lock!!! 1838c2ecf20Sopenharmony_cievict_inode: 1848c2ecf20Sopenharmony_ciput_super: write 1858c2ecf20Sopenharmony_cisync_fs: read 1868c2ecf20Sopenharmony_cifreeze_fs: write 1878c2ecf20Sopenharmony_ciunfreeze_fs: write 1888c2ecf20Sopenharmony_cistatfs: maybe(read) (see below) 1898c2ecf20Sopenharmony_ciremount_fs: write 1908c2ecf20Sopenharmony_ciumount_begin: no 1918c2ecf20Sopenharmony_cishow_options: no (namespace_sem) 1928c2ecf20Sopenharmony_ciquota_read: no (see below) 1938c2ecf20Sopenharmony_ciquota_write: no (see below) 1948c2ecf20Sopenharmony_cibdev_try_to_free_page: no (see below) 1958c2ecf20Sopenharmony_ci====================== ============ ======================== 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci->statfs() has s_umount (shared) when called by ustat(2) (native or 1988c2ecf20Sopenharmony_cicompat), but that's an accident of bad API; s_umount is used to pin 1998c2ecf20Sopenharmony_cithe superblock down when we only have dev_t given us by userland to 2008c2ecf20Sopenharmony_ciidentify the superblock. Everything else (statfs(), fstatfs(), etc.) 2018c2ecf20Sopenharmony_cidoesn't hold it when calling ->statfs() - superblock is pinned down 2028c2ecf20Sopenharmony_ciby resolving the pathname passed to syscall. 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci->quota_read() and ->quota_write() functions are both guaranteed to 2058c2ecf20Sopenharmony_cibe the only ones operating on the quota file by the quota code (via 2068c2ecf20Sopenharmony_cidqio_sem) (unless an admin really wants to screw up something and 2078c2ecf20Sopenharmony_ciwrites to quota files with quotas on). For other details about locking 2088c2ecf20Sopenharmony_cisee also dquot_operations section. 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci->bdev_try_to_free_page is called from the ->releasepage handler of 2118c2ecf20Sopenharmony_cithe block device inode. See there for more details. 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_cifile_system_type 2148c2ecf20Sopenharmony_ci================ 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ciprototypes:: 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci struct dentry *(*mount) (struct file_system_type *, int, 2198c2ecf20Sopenharmony_ci const char *, void *); 2208c2ecf20Sopenharmony_ci void (*kill_sb) (struct super_block *); 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_cilocking rules: 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci======= ========= 2258c2ecf20Sopenharmony_ciops may block 2268c2ecf20Sopenharmony_ci======= ========= 2278c2ecf20Sopenharmony_cimount yes 2288c2ecf20Sopenharmony_cikill_sb yes 2298c2ecf20Sopenharmony_ci======= ========= 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci->mount() returns ERR_PTR or the root dentry; its superblock should be locked 2328c2ecf20Sopenharmony_cion return. 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci->kill_sb() takes a write-locked superblock, does all shutdown work on it, 2358c2ecf20Sopenharmony_ciunlocks and drops the reference. 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ciaddress_space_operations 2388c2ecf20Sopenharmony_ci======================== 2398c2ecf20Sopenharmony_ciprototypes:: 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci int (*writepage)(struct page *page, struct writeback_control *wbc); 2428c2ecf20Sopenharmony_ci int (*readpage)(struct file *, struct page *); 2438c2ecf20Sopenharmony_ci int (*writepages)(struct address_space *, struct writeback_control *); 2448c2ecf20Sopenharmony_ci int (*set_page_dirty)(struct page *page); 2458c2ecf20Sopenharmony_ci void (*readahead)(struct readahead_control *); 2468c2ecf20Sopenharmony_ci int (*readpages)(struct file *filp, struct address_space *mapping, 2478c2ecf20Sopenharmony_ci struct list_head *pages, unsigned nr_pages); 2488c2ecf20Sopenharmony_ci int (*write_begin)(struct file *, struct address_space *mapping, 2498c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned flags, 2508c2ecf20Sopenharmony_ci struct page **pagep, void **fsdata); 2518c2ecf20Sopenharmony_ci int (*write_end)(struct file *, struct address_space *mapping, 2528c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 2538c2ecf20Sopenharmony_ci struct page *page, void *fsdata); 2548c2ecf20Sopenharmony_ci sector_t (*bmap)(struct address_space *, sector_t); 2558c2ecf20Sopenharmony_ci void (*invalidatepage) (struct page *, unsigned int, unsigned int); 2568c2ecf20Sopenharmony_ci int (*releasepage) (struct page *, int); 2578c2ecf20Sopenharmony_ci void (*freepage)(struct page *); 2588c2ecf20Sopenharmony_ci int (*direct_IO)(struct kiocb *, struct iov_iter *iter); 2598c2ecf20Sopenharmony_ci bool (*isolate_page) (struct page *, isolate_mode_t); 2608c2ecf20Sopenharmony_ci int (*migratepage)(struct address_space *, struct page *, struct page *); 2618c2ecf20Sopenharmony_ci void (*putback_page) (struct page *); 2628c2ecf20Sopenharmony_ci int (*launder_page)(struct page *); 2638c2ecf20Sopenharmony_ci int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); 2648c2ecf20Sopenharmony_ci int (*error_remove_page)(struct address_space *, struct page *); 2658c2ecf20Sopenharmony_ci int (*swap_activate)(struct file *); 2668c2ecf20Sopenharmony_ci int (*swap_deactivate)(struct file *); 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_cilocking rules: 2698c2ecf20Sopenharmony_ci All except set_page_dirty and freepage may block 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci====================== ======================== ========= 2728c2ecf20Sopenharmony_ciops PageLocked(page) i_rwsem 2738c2ecf20Sopenharmony_ci====================== ======================== ========= 2748c2ecf20Sopenharmony_ciwritepage: yes, unlocks (see below) 2758c2ecf20Sopenharmony_cireadpage: yes, unlocks 2768c2ecf20Sopenharmony_ciwritepages: 2778c2ecf20Sopenharmony_ciset_page_dirty no 2788c2ecf20Sopenharmony_cireadahead: yes, unlocks 2798c2ecf20Sopenharmony_cireadpages: no 2808c2ecf20Sopenharmony_ciwrite_begin: locks the page exclusive 2818c2ecf20Sopenharmony_ciwrite_end: yes, unlocks exclusive 2828c2ecf20Sopenharmony_cibmap: 2838c2ecf20Sopenharmony_ciinvalidatepage: yes 2848c2ecf20Sopenharmony_cireleasepage: yes 2858c2ecf20Sopenharmony_cifreepage: yes 2868c2ecf20Sopenharmony_cidirect_IO: 2878c2ecf20Sopenharmony_ciisolate_page: yes 2888c2ecf20Sopenharmony_cimigratepage: yes (both) 2898c2ecf20Sopenharmony_ciputback_page: yes 2908c2ecf20Sopenharmony_cilaunder_page: yes 2918c2ecf20Sopenharmony_ciis_partially_uptodate: yes 2928c2ecf20Sopenharmony_cierror_remove_page: yes 2938c2ecf20Sopenharmony_ciswap_activate: no 2948c2ecf20Sopenharmony_ciswap_deactivate: no 2958c2ecf20Sopenharmony_ci====================== ======================== ========= 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci->write_begin(), ->write_end() and ->readpage() may be called from 2988c2ecf20Sopenharmony_cithe request handler (/dev/loop). 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci->readpage() unlocks the page, either synchronously or via I/O 3018c2ecf20Sopenharmony_cicompletion. 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci->readahead() unlocks the pages that I/O is attempted on like ->readpage(). 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci->readpages() populates the pagecache with the passed pages and starts 3068c2ecf20Sopenharmony_ciI/O against them. They come unlocked upon I/O completion. 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci->writepage() is used for two purposes: for "memory cleansing" and for 3098c2ecf20Sopenharmony_ci"sync". These are quite different operations and the behaviour may differ 3108c2ecf20Sopenharmony_cidepending upon the mode. 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ciIf writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then 3138c2ecf20Sopenharmony_ciit *must* start I/O against the page, even if that would involve 3148c2ecf20Sopenharmony_ciblocking on in-progress I/O. 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ciIf writepage is called for memory cleansing (sync_mode == 3178c2ecf20Sopenharmony_ciWBC_SYNC_NONE) then its role is to get as much writeout underway as 3188c2ecf20Sopenharmony_cipossible. So writepage should try to avoid blocking against 3198c2ecf20Sopenharmony_cicurrently-in-progress I/O. 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ciIf the filesystem is not called for "sync" and it determines that it 3228c2ecf20Sopenharmony_ciwould need to block against in-progress I/O to be able to start new I/O 3238c2ecf20Sopenharmony_ciagainst the page the filesystem should redirty the page with 3248c2ecf20Sopenharmony_ciredirty_page_for_writepage(), then unlock the page and return zero. 3258c2ecf20Sopenharmony_ciThis may also be done to avoid internal deadlocks, but rarely. 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ciIf the filesystem is called for sync then it must wait on any 3288c2ecf20Sopenharmony_ciin-progress I/O and then start new I/O. 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ciThe filesystem should unlock the page synchronously, before returning to the 3318c2ecf20Sopenharmony_cicaller, unless ->writepage() returns special WRITEPAGE_ACTIVATE 3328c2ecf20Sopenharmony_civalue. WRITEPAGE_ACTIVATE means that page cannot really be written out 3338c2ecf20Sopenharmony_cicurrently, and VM should stop calling ->writepage() on this page for some 3348c2ecf20Sopenharmony_citime. VM does this by moving page to the head of the active list, hence the 3358c2ecf20Sopenharmony_ciname. 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ciUnless the filesystem is going to redirty_page_for_writepage(), unlock the page 3388c2ecf20Sopenharmony_ciand return zero, writepage *must* run set_page_writeback() against the page, 3398c2ecf20Sopenharmony_cifollowed by unlocking it. Once set_page_writeback() has been run against the 3408c2ecf20Sopenharmony_cipage, write I/O can be submitted and the write I/O completion handler must run 3418c2ecf20Sopenharmony_ciend_page_writeback() once the I/O is complete. If no I/O is submitted, the 3428c2ecf20Sopenharmony_cifilesystem must run end_page_writeback() against the page before returning from 3438c2ecf20Sopenharmony_ciwritepage. 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ciThat is: after 2.5.12, pages which are under writeout are *not* locked. Note, 3468c2ecf20Sopenharmony_ciif the filesystem needs the page to be locked during writeout, that is ok, too, 3478c2ecf20Sopenharmony_cithe page is allowed to be unlocked at any point in time between the calls to 3488c2ecf20Sopenharmony_ciset_page_writeback() and end_page_writeback(). 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ciNote, failure to run either redirty_page_for_writepage() or the combination of 3518c2ecf20Sopenharmony_ciset_page_writeback()/end_page_writeback() on a page submitted to writepage 3528c2ecf20Sopenharmony_ciwill leave the page itself marked clean but it will be tagged as dirty in the 3538c2ecf20Sopenharmony_ciradix tree. This incoherency can lead to all sorts of hard-to-debug problems 3548c2ecf20Sopenharmony_ciin the filesystem like having dirty inodes at umount and losing written data. 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci->writepages() is used for periodic writeback and for syscall-initiated 3578c2ecf20Sopenharmony_cisync operations. The address_space should start I/O against at least 3588c2ecf20Sopenharmony_ci``*nr_to_write`` pages. ``*nr_to_write`` must be decremented for each page 3598c2ecf20Sopenharmony_ciwhich is written. The address_space implementation may write more (or less) 3608c2ecf20Sopenharmony_cipages than ``*nr_to_write`` asks for, but it should try to be reasonably close. 3618c2ecf20Sopenharmony_ciIf nr_to_write is NULL, all dirty pages must be written. 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ciwritepages should _only_ write pages which are present on 3648c2ecf20Sopenharmony_cimapping->io_pages. 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci->set_page_dirty() is called from various places in the kernel 3678c2ecf20Sopenharmony_ciwhen the target page is marked as needing writeback. It may be called 3688c2ecf20Sopenharmony_ciunder spinlock (it cannot block) and is sometimes called with the page 3698c2ecf20Sopenharmony_cinot locked. 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some 3728c2ecf20Sopenharmony_cifilesystems and by the swapper. The latter will eventually go away. Please, 3738c2ecf20Sopenharmony_cikeep it that way and don't breed new callers. 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci->invalidatepage() is called when the filesystem must attempt to drop 3768c2ecf20Sopenharmony_cisome or all of the buffers from the page when it is being truncated. It 3778c2ecf20Sopenharmony_cireturns zero on success. If ->invalidatepage is zero, the kernel uses 3788c2ecf20Sopenharmony_ciblock_invalidatepage() instead. 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci->releasepage() is called when the kernel is about to try to drop the 3818c2ecf20Sopenharmony_cibuffers from the page in preparation for freeing it. It returns zero to 3828c2ecf20Sopenharmony_ciindicate that the buffers are (or may be) freeable. If ->releasepage is zero, 3838c2ecf20Sopenharmony_cithe kernel assumes that the fs has no private interest in the buffers. 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci->freepage() is called when the kernel is done dropping the page 3868c2ecf20Sopenharmony_cifrom the page cache. 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci->launder_page() may be called prior to releasing a page if 3898c2ecf20Sopenharmony_ciit is still found to be dirty. It returns zero if the page was successfully 3908c2ecf20Sopenharmony_cicleaned, or an error value if not. Note that in order to prevent the page 3918c2ecf20Sopenharmony_cigetting mapped back in and redirtied, it needs to be kept locked 3928c2ecf20Sopenharmony_ciacross the entire operation. 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci->swap_activate will be called with a non-zero argument on 3958c2ecf20Sopenharmony_cifiles backing (non block device backed) swapfiles. A return value 3968c2ecf20Sopenharmony_ciof zero indicates success, in which case this file can be used for 3978c2ecf20Sopenharmony_cibacking swapspace. The swapspace operations will be proxied to the 3988c2ecf20Sopenharmony_ciaddress space operations. 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci->swap_deactivate() will be called in the sys_swapoff() 4018c2ecf20Sopenharmony_cipath after ->swap_activate() returned success. 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_cifile_lock_operations 4048c2ecf20Sopenharmony_ci==================== 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ciprototypes:: 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 4098c2ecf20Sopenharmony_ci void (*fl_release_private)(struct file_lock *); 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_cilocking rules: 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci=================== ============= ========= 4158c2ecf20Sopenharmony_ciops inode->i_lock may block 4168c2ecf20Sopenharmony_ci=================== ============= ========= 4178c2ecf20Sopenharmony_cifl_copy_lock: yes no 4188c2ecf20Sopenharmony_cifl_release_private: maybe maybe[1]_ 4198c2ecf20Sopenharmony_ci=================== ============= ========= 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci.. [1]: 4228c2ecf20Sopenharmony_ci ->fl_release_private for flock or POSIX locks is currently allowed 4238c2ecf20Sopenharmony_ci to block. Leases however can still be freed while the i_lock is held and 4248c2ecf20Sopenharmony_ci so fl_release_private called on a lease should not block. 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_cilock_manager_operations 4278c2ecf20Sopenharmony_ci======================= 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ciprototypes:: 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci void (*lm_notify)(struct file_lock *); /* unblock callback */ 4328c2ecf20Sopenharmony_ci int (*lm_grant)(struct file_lock *, struct file_lock *, int); 4338c2ecf20Sopenharmony_ci void (*lm_break)(struct file_lock *); /* break_lease callback */ 4348c2ecf20Sopenharmony_ci int (*lm_change)(struct file_lock **, int); 4358c2ecf20Sopenharmony_ci bool (*lm_breaker_owns_lease)(struct file_lock *); 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_cilocking rules: 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci====================== ============= ================= ========= 4408c2ecf20Sopenharmony_ciops inode->i_lock blocked_lock_lock may block 4418c2ecf20Sopenharmony_ci====================== ============= ================= ========= 4428c2ecf20Sopenharmony_cilm_notify: yes yes no 4438c2ecf20Sopenharmony_cilm_grant: no no no 4448c2ecf20Sopenharmony_cilm_break: yes no no 4458c2ecf20Sopenharmony_cilm_change yes no no 4468c2ecf20Sopenharmony_cilm_breaker_owns_lease: no no no 4478c2ecf20Sopenharmony_ci====================== ============= ================= ========= 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_cibuffer_head 4508c2ecf20Sopenharmony_ci=========== 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ciprototypes:: 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci void (*b_end_io)(struct buffer_head *bh, int uptodate); 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_cilocking rules: 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_cicalled from interrupts. In other words, extreme care is needed here. 4598c2ecf20Sopenharmony_cibh is locked, but that's all warranties we have here. Currently only RAID1, 4608c2ecf20Sopenharmony_cihighmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices 4618c2ecf20Sopenharmony_cicall this method upon the IO completion. 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ciblock_device_operations 4648c2ecf20Sopenharmony_ci======================= 4658c2ecf20Sopenharmony_ciprototypes:: 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci int (*open) (struct block_device *, fmode_t); 4688c2ecf20Sopenharmony_ci int (*release) (struct gendisk *, fmode_t); 4698c2ecf20Sopenharmony_ci int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 4708c2ecf20Sopenharmony_ci int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 4718c2ecf20Sopenharmony_ci int (*direct_access) (struct block_device *, sector_t, void **, 4728c2ecf20Sopenharmony_ci unsigned long *); 4738c2ecf20Sopenharmony_ci void (*unlock_native_capacity) (struct gendisk *); 4748c2ecf20Sopenharmony_ci int (*revalidate_disk) (struct gendisk *); 4758c2ecf20Sopenharmony_ci int (*getgeo)(struct block_device *, struct hd_geometry *); 4768c2ecf20Sopenharmony_ci void (*swap_slot_free_notify) (struct block_device *, unsigned long); 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_cilocking rules: 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ci======================= =================== 4818c2ecf20Sopenharmony_ciops bd_mutex 4828c2ecf20Sopenharmony_ci======================= =================== 4838c2ecf20Sopenharmony_ciopen: yes 4848c2ecf20Sopenharmony_cirelease: yes 4858c2ecf20Sopenharmony_ciioctl: no 4868c2ecf20Sopenharmony_cicompat_ioctl: no 4878c2ecf20Sopenharmony_cidirect_access: no 4888c2ecf20Sopenharmony_ciunlock_native_capacity: no 4898c2ecf20Sopenharmony_cirevalidate_disk: no 4908c2ecf20Sopenharmony_cigetgeo: no 4918c2ecf20Sopenharmony_ciswap_slot_free_notify: no (see below) 4928c2ecf20Sopenharmony_ci======================= =================== 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ciswap_slot_free_notify is called with swap_lock and sometimes the page lock 4958c2ecf20Sopenharmony_ciheld. 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_cifile_operations 4998c2ecf20Sopenharmony_ci=============== 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ciprototypes:: 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci loff_t (*llseek) (struct file *, loff_t, int); 5048c2ecf20Sopenharmony_ci ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); 5058c2ecf20Sopenharmony_ci ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 5068c2ecf20Sopenharmony_ci ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); 5078c2ecf20Sopenharmony_ci ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); 5088c2ecf20Sopenharmony_ci int (*iterate) (struct file *, struct dir_context *); 5098c2ecf20Sopenharmony_ci int (*iterate_shared) (struct file *, struct dir_context *); 5108c2ecf20Sopenharmony_ci __poll_t (*poll) (struct file *, struct poll_table_struct *); 5118c2ecf20Sopenharmony_ci long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 5128c2ecf20Sopenharmony_ci long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 5138c2ecf20Sopenharmony_ci int (*mmap) (struct file *, struct vm_area_struct *); 5148c2ecf20Sopenharmony_ci int (*open) (struct inode *, struct file *); 5158c2ecf20Sopenharmony_ci int (*flush) (struct file *); 5168c2ecf20Sopenharmony_ci int (*release) (struct inode *, struct file *); 5178c2ecf20Sopenharmony_ci int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); 5188c2ecf20Sopenharmony_ci int (*fasync) (int, struct file *, int); 5198c2ecf20Sopenharmony_ci int (*lock) (struct file *, int, struct file_lock *); 5208c2ecf20Sopenharmony_ci ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, 5218c2ecf20Sopenharmony_ci loff_t *); 5228c2ecf20Sopenharmony_ci ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, 5238c2ecf20Sopenharmony_ci loff_t *); 5248c2ecf20Sopenharmony_ci ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, 5258c2ecf20Sopenharmony_ci void __user *); 5268c2ecf20Sopenharmony_ci ssize_t (*sendpage) (struct file *, struct page *, int, size_t, 5278c2ecf20Sopenharmony_ci loff_t *, int); 5288c2ecf20Sopenharmony_ci unsigned long (*get_unmapped_area)(struct file *, unsigned long, 5298c2ecf20Sopenharmony_ci unsigned long, unsigned long, unsigned long); 5308c2ecf20Sopenharmony_ci int (*check_flags)(int); 5318c2ecf20Sopenharmony_ci int (*flock) (struct file *, int, struct file_lock *); 5328c2ecf20Sopenharmony_ci ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, 5338c2ecf20Sopenharmony_ci size_t, unsigned int); 5348c2ecf20Sopenharmony_ci ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, 5358c2ecf20Sopenharmony_ci size_t, unsigned int); 5368c2ecf20Sopenharmony_ci int (*setlease)(struct file *, long, struct file_lock **, void **); 5378c2ecf20Sopenharmony_ci long (*fallocate)(struct file *, int, loff_t, loff_t); 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_cilocking rules: 5408c2ecf20Sopenharmony_ci All may block. 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci->llseek() locking has moved from llseek to the individual llseek 5438c2ecf20Sopenharmony_ciimplementations. If your fs is not using generic_file_llseek, you 5448c2ecf20Sopenharmony_cineed to acquire and release the appropriate locks in your ->llseek(). 5458c2ecf20Sopenharmony_ciFor many filesystems, it is probably safe to acquire the inode 5468c2ecf20Sopenharmony_cimutex or just to use i_size_read() instead. 5478c2ecf20Sopenharmony_ciNote: this does not protect the file->f_pos against concurrent modifications 5488c2ecf20Sopenharmony_cisince this is something the userspace has to take care about. 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci->iterate() is called with i_rwsem exclusive. 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci->iterate_shared() is called with i_rwsem at least shared. 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags. 5558c2ecf20Sopenharmony_ciMost instances call fasync_helper(), which does that maintenance, so it's 5568c2ecf20Sopenharmony_cinot normally something one needs to worry about. Return values > 0 will be 5578c2ecf20Sopenharmony_cimapped to zero in the VFS layer. 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci->readdir() and ->ioctl() on directories must be changed. Ideally we would 5608c2ecf20Sopenharmony_cimove ->readdir() to inode_operations and use a separate method for directory 5618c2ecf20Sopenharmony_ci->ioctl() or kill the latter completely. One of the problems is that for 5628c2ecf20Sopenharmony_cianything that resembles union-mount we won't have a struct file for all 5638c2ecf20Sopenharmony_cicomponents. And there are other reasons why the current interface is a mess... 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci->read on directories probably must go away - we should just enforce -EISDIR 5668c2ecf20Sopenharmony_ciin sys_read() and friends. 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci->setlease operations should call generic_setlease() before or after setting 5698c2ecf20Sopenharmony_cithe lease within the individual filesystem to record the result of the 5708c2ecf20Sopenharmony_cioperation 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_cidquot_operations 5738c2ecf20Sopenharmony_ci================ 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ciprototypes:: 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci int (*write_dquot) (struct dquot *); 5788c2ecf20Sopenharmony_ci int (*acquire_dquot) (struct dquot *); 5798c2ecf20Sopenharmony_ci int (*release_dquot) (struct dquot *); 5808c2ecf20Sopenharmony_ci int (*mark_dirty) (struct dquot *); 5818c2ecf20Sopenharmony_ci int (*write_info) (struct super_block *, int); 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_ciThese operations are intended to be more or less wrapping functions that ensure 5848c2ecf20Sopenharmony_cia proper locking wrt the filesystem and call the generic quota operations. 5858c2ecf20Sopenharmony_ci 5868c2ecf20Sopenharmony_ciWhat filesystem should expect from the generic quota functions: 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci============== ============ ========================= 5898c2ecf20Sopenharmony_ciops FS recursion Held locks when called 5908c2ecf20Sopenharmony_ci============== ============ ========================= 5918c2ecf20Sopenharmony_ciwrite_dquot: yes dqonoff_sem or dqptr_sem 5928c2ecf20Sopenharmony_ciacquire_dquot: yes dqonoff_sem or dqptr_sem 5938c2ecf20Sopenharmony_cirelease_dquot: yes dqonoff_sem or dqptr_sem 5948c2ecf20Sopenharmony_cimark_dirty: no - 5958c2ecf20Sopenharmony_ciwrite_info: yes dqonoff_sem 5968c2ecf20Sopenharmony_ci============== ============ ========================= 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ciFS recursion means calling ->quota_read() and ->quota_write() from superblock 5998c2ecf20Sopenharmony_cioperations. 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ciMore details about quota locking can be found in fs/dquot.c. 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_civm_operations_struct 6048c2ecf20Sopenharmony_ci==================== 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ciprototypes:: 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci void (*open)(struct vm_area_struct*); 6098c2ecf20Sopenharmony_ci void (*close)(struct vm_area_struct*); 6108c2ecf20Sopenharmony_ci vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *); 6118c2ecf20Sopenharmony_ci vm_fault_t (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); 6128c2ecf20Sopenharmony_ci vm_fault_t (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *); 6138c2ecf20Sopenharmony_ci int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_cilocking rules: 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci============= ========= =========================== 6188c2ecf20Sopenharmony_ciops mmap_lock PageLocked(page) 6198c2ecf20Sopenharmony_ci============= ========= =========================== 6208c2ecf20Sopenharmony_ciopen: yes 6218c2ecf20Sopenharmony_ciclose: yes 6228c2ecf20Sopenharmony_cifault: yes can return with page locked 6238c2ecf20Sopenharmony_cimap_pages: yes 6248c2ecf20Sopenharmony_cipage_mkwrite: yes can return with page locked 6258c2ecf20Sopenharmony_cipfn_mkwrite: yes 6268c2ecf20Sopenharmony_ciaccess: yes 6278c2ecf20Sopenharmony_ci============= ========= =========================== 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci->fault() is called when a previously not present pte is about 6308c2ecf20Sopenharmony_cito be faulted in. The filesystem must find and return the page associated 6318c2ecf20Sopenharmony_ciwith the passed in "pgoff" in the vm_fault structure. If it is possible that 6328c2ecf20Sopenharmony_cithe page may be truncated and/or invalidated, then the filesystem must lock 6338c2ecf20Sopenharmony_cithe page, then ensure it is not already truncated (the page lock will block 6348c2ecf20Sopenharmony_cisubsequent truncate), and then return with VM_FAULT_LOCKED, and the page 6358c2ecf20Sopenharmony_cilocked. The VM will unlock the page. 6368c2ecf20Sopenharmony_ci 6378c2ecf20Sopenharmony_ci->map_pages() is called when VM asks to map easy accessible pages. 6388c2ecf20Sopenharmony_ciFilesystem should find and map pages associated with offsets from "start_pgoff" 6398c2ecf20Sopenharmony_citill "end_pgoff". ->map_pages() is called with page table locked and must 6408c2ecf20Sopenharmony_cinot block. If it's not possible to reach a page without blocking, 6418c2ecf20Sopenharmony_cifilesystem should skip it. Filesystem should use do_set_pte() to setup 6428c2ecf20Sopenharmony_cipage table entry. Pointer to entry associated with the page is passed in 6438c2ecf20Sopenharmony_ci"pte" field in vm_fault structure. Pointers to entries for other offsets 6448c2ecf20Sopenharmony_cishould be calculated relative to "pte". 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_ci->page_mkwrite() is called when a previously read-only pte is 6478c2ecf20Sopenharmony_ciabout to become writeable. The filesystem again must ensure that there are 6488c2ecf20Sopenharmony_cino truncate/invalidate races, and then return with the page locked. If 6498c2ecf20Sopenharmony_cithe page has been truncated, the filesystem should not look up a new page 6508c2ecf20Sopenharmony_cilike the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which 6518c2ecf20Sopenharmony_ciwill cause the VM to retry the fault. 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_ci->pfn_mkwrite() is the same as page_mkwrite but when the pte is 6548c2ecf20Sopenharmony_ciVM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is 6558c2ecf20Sopenharmony_ciVM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior 6568c2ecf20Sopenharmony_ciafter this call is to make the pte read-write, unless pfn_mkwrite returns 6578c2ecf20Sopenharmony_cian error. 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci->access() is called when get_user_pages() fails in 6608c2ecf20Sopenharmony_ciaccess_process_vm(), typically used to debug a process through 6618c2ecf20Sopenharmony_ci/proc/pid/mem or ptrace. This function is needed only for 6628c2ecf20Sopenharmony_ciVM_IO | VM_PFNMAP VMAs. 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci-------------------------------------------------------------------------------- 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci Dubious stuff 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci(if you break something or notice that it is broken and do not fix it yourself 6698c2ecf20Sopenharmony_ci- at least put it here) 670