xref: /kernel/linux/linux-5.10/fs/hmdfs/inode.h (revision 8c2ecf20)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * fs/hmdfs/inode.h
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8#ifndef INODE_H
9#define INODE_H
10
11#include "hmdfs.h"
12
13#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
14#include <linux/iversion.h>
15#endif
16
17enum {
18	HMDFS_REMOTE_INODE_NONE = 0,
19	HMDFS_REMOTE_INODE_STASHING,
20	HMDFS_REMOTE_INODE_RESTORING,
21};
22
23/*****************************************************************************
24 * fid
25 *****************************************************************************/
26
27/* Bits for fid_flags */
28enum {
29	HMDFS_FID_NEED_OPEN = 0,
30	HMDFS_FID_OPENING,
31};
32
33struct hmdfs_fid {
34	__u64 ver;
35	__u32 id;
36};
37
38/*
39 * Cache file is stored in file like following format:
40 *  ________________________________________________________________
41 * |meta file info| remote file(s) path |        file content       |
42 * |     head     |        path         |            data           |
43 *                ↑                     ↑
44 *             path_offs             data_offs
45 */
46struct hmdfs_cache_info {
47	/* Path start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
48	__u32 path_offs;
49	__u32 path_len;
50	__u32 path_cnt;
51	char *path_buf;
52	/* Stricky remote file(hardlink)s' path, split by '\0' */
53	char *path;
54	/* Data start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
55	__u32 data_offs;
56	/* # of pages need to be written to remote file during offline */
57	atomic64_t to_write_pgs;
58	/* # of pages written to remote file during offline */
59	atomic64_t written_pgs;
60	/* Stash file handler */
61	struct file *cache_file;
62};
63
64/*****************************************************************************
65 * inode info and it's inline helpers
66 *****************************************************************************/
67
68struct hmdfs_inode_info {
69	struct inode *lower_inode; // for local/merge inode
70	struct hmdfs_peer *conn;   // for remote inode
71	struct kref ref;
72	spinlock_t fid_lock;
73	struct hmdfs_fid fid;
74	unsigned long fid_flags;
75	wait_queue_head_t fid_wq;
76	__u8 inode_type; // deprecated: use ino system instead
77	atomic_t write_opened;
78
79	/* writeback list */
80	struct list_head wb_list;
81
82#ifdef CONFIG_HMDFS_FS_PERMISSION
83	__u16 perm;
84#endif
85	/*
86	 * lookup remote file will generate a local inode, this store the
87	 * combination of remote inode number and generation in such situation.
88	 * the uniqueness of local inode can be determined.
89	 */
90	__u64 remote_ino;
91#define CLOUD_RECORD_ID_LEN            33
92	__u8 cloud_record_id[CLOUD_RECORD_ID_LEN];
93#define CLOUD_DENTRY_RESERVED_LENGTH   3
94	__u8 reserved[CLOUD_DENTRY_RESERVED_LENGTH];
95	/*
96	 * if this value is not ULLONG_MAX, it means that remote getattr syscall
97	 * should return this value as inode size.
98	 */
99	__u64 getattr_isize;
100	/*
101	 * this value stores remote ctime, explicitly when remote file is opened
102	 */
103	struct hmdfs_time_t remote_ctime;
104	/*
105	 * this value stores the last time, aligned to dcache_precision, that
106	 * remote file was modified. It should be noted that this value won't
107	 * be effective if writecace_expire is set.
108	 */
109	struct hmdfs_time_t stable_ctime;
110	/*
111	 * If this value is set nonzero, pagecache should be truncated if the
112	 * time that the file is opened is beyond the value. Furthermore,
113	 * the functionality of stable_ctime won't be effective.
114	 */
115	unsigned long writecache_expire;
116	/*
117	 * This value record how many times the file has been written while file
118	 * is opened. 'writecache_expire' will set in close if this value is
119	 * nonzero.
120	 */
121	atomic64_t write_counter;
122	/*
123	 * will be linked to hmdfs_peer::wr_opened_inode_list
124	 * if the remote inode is writable-opened. And using
125	 * wr_opened_cnt to track possibly multiple writeable-open.
126	 */
127	struct list_head wr_opened_node;
128	atomic_t wr_opened_cnt;
129	spinlock_t stash_lock;
130	unsigned int stash_status;
131	struct hmdfs_cache_info *cache;
132	/* link to hmdfs_peer::stashed_inode_list when stashing completes */
133	struct list_head stash_node;
134	/*
135	 * The flush/fsync thread will hold the write lock while threads
136	 * calling writepage will hold the read lock. We use rwlock to
137	 * eliminate the cases that flush/fsync operations are done with
138	 * re-dirtied pages remain dirty.
139	 *
140	 * Here is the explanation in detail:
141	 *
142	 * During `writepage()`, the state of a re-dirtied page will switch
143	 * to the following states in sequence:
144	 * s1: page dirty + tree dirty
145	 * s2: page dirty + tree dirty <tag_pages_for_writeback>
146	 * s3: page clean + tree dirty <clear_page_dirty_for_io>
147	 * s4: page clean + tree clean + write back <set_page_writeback>
148	 * s5: page dirty + tree dirty + write back <redirty_page_for_writepage>
149	 * s6: page dirty + tree dirty <end_page_writeback>
150	 *
151	 * A page upon s4 will thus be ignored by the concurrent
152	 * `do_writepages()` contained by `close()`, `fsync()`, making it's
153	 * state inconsistent.
154	 *
155	 * To avoid such situation, we use per-file rwsems to prevent
156	 * concurrent in-flight `writepage` during `close()` or `fsync()`.
157	 *
158	 * Minimal overhead is brought in since rsems allow concurrent
159	 * `writepage` while `close()` or `fsync()` is natural to wait for
160	 * in-flight `writepage()`s to complete.
161	 *
162	 * NOTE that in the worst case, a process may wait for wsem for TIMEOUT
163	 * even if a signal is pending. But we've to wait there to iterate all
164	 * pages and make sure that no dirty page should remain.
165	 */
166	struct rw_semaphore wpage_sem;
167
168	// The real inode shared with vfs. ALWAYS PUT IT AT THE BOTTOM.
169	struct inode vfs_inode;
170};
171
172struct hmdfs_readdir_work {
173	struct list_head head;
174	struct dentry *dentry;
175	struct hmdfs_peer *con;
176	struct delayed_work dwork;
177};
178
179static inline struct hmdfs_inode_info *hmdfs_i(struct inode *inode)
180{
181	return container_of(inode, struct hmdfs_inode_info, vfs_inode);
182}
183
184static inline bool hmdfs_inode_is_stashing(const struct hmdfs_inode_info *info)
185{
186	const struct hmdfs_sb_info *sbi = hmdfs_sb(info->vfs_inode.i_sb);
187
188	/* Refer to comments in hmdfs_stash_remote_inode() */
189	return (hmdfs_is_stash_enabled(sbi) &&
190		smp_load_acquire(&info->stash_status)); // protect
191}
192
193static inline void hmdfs_remote_fetch_fid(struct hmdfs_inode_info *info,
194					  struct hmdfs_fid *fid)
195{
196	spin_lock(&info->fid_lock);
197	*fid = info->fid;
198	spin_unlock(&info->fid_lock);
199}
200
201/*****************************************************************************
202 * ino allocator
203 *****************************************************************************/
204
205enum HMDFS_ROOT {
206	HMDFS_ROOT_ANCESTOR = 1, // /
207	HMDFS_ROOT_DEV,		 // /device_view
208	HMDFS_ROOT_DEV_LOCAL,	 // /device_view/local
209	HMDFS_ROOT_DEV_REMOTE,	 // /device_view/remote
210	HMDFS_ROOT_DEV_CLOUD,	 // /device_view/cloud
211	HMDFS_ROOT_MERGE,	 // /merge_view
212	HMDFS_ROOT_MERGE_CLOUD,	 // /cloud_merge_view
213
214	HMDFS_ROOT_INVALID,
215};
216
217// delete layer, directory layer, not overlay layer
218enum HMDFS_LAYER_TYPE {
219	HMDFS_LAYER_ZERO = 0,	   // /
220	HMDFS_LAYER_FIRST_DEVICE,  // /device_view
221	HMDFS_LAYER_SECOND_LOCAL,  // /device_view/local
222	HMDFS_LAYER_SECOND_REMOTE, // /device_view/remote
223	HMDFS_LAYER_SECOND_CLOUD,  // /device_view/cloud
224	HMDFS_LAYER_OTHER_LOCAL,   // /device_view/local/xx
225	HMDFS_LAYER_OTHER_REMOTE,  // /device_view/remote/xx
226	HMDFS_LAYER_OTHER_CLOUD,   // /device_view/cloud/xx
227
228	HMDFS_LAYER_FIRST_MERGE, // /merge_view
229	HMDFS_LAYER_OTHER_MERGE, // /merge_view/xxx
230	HMDFS_LAYER_FIRST_MERGE_CLOUD, // /cloud_merge_view
231	HMDFS_LAYER_OTHER_MERGE_CLOUD, // /coud_merge_view/xxx
232	HMDFS_LAYER_INVALID,
233};
234
235struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino,
236				     struct inode *lo_i,
237				     struct hmdfs_peer *peer);
238struct inode *hmdfs_iget5_locked_merge(struct super_block *sb,
239				       struct dentry *fst_lo_d);
240struct inode *hmdfs_iget5_locked_cloud_merge(struct super_block *sb,
241					     struct dentry *fst_lo_d);
242
243struct inode *hmdfs_iget5_locked_local(struct super_block *sb,
244				       struct inode *lo_i);
245struct hmdfs_peer;
246struct inode *hmdfs_iget5_locked_remote(struct super_block *sb,
247					struct hmdfs_peer *peer,
248					uint64_t remote_ino);
249
250struct hmdfs_lookup_cloud_ret {
251	uint64_t i_size;
252	uint64_t i_mtime;
253	uint8_t record_id[CLOUD_RECORD_ID_LEN];
254	uint8_t reserved[CLOUD_DENTRY_RESERVED_LENGTH];
255	uint16_t i_mode;
256};
257
258struct inode *hmdfs_iget5_locked_cloud(struct super_block *sb,
259				       struct hmdfs_peer *peer,
260				       struct hmdfs_lookup_cloud_ret *res);
261
262void hmdfs_update_upper_file(struct file *upper_file, struct file *lower_file);
263uint32_t make_ino_raw_cloud(uint8_t *cloud_id);
264#endif // INODE_H
265