1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/hmdfs/main.c
4  *
5  * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6  */
7 
8 
9 #include "hmdfs.h"
10 
11 #include <linux/ctype.h>
12 #include <linux/module.h>
13 #include <linux/statfs.h>
14 #include <linux/xattr.h>
15 #include <linux/idr.h>
16 #if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17 #include <linux/prandom.h>
18 #else
19 #include <linux/random.h>
20 #endif
21 
22 #include "authority/authentication.h"
23 #include "hmdfs_server.h"
24 #include "comm/device_node.h"
25 #include "comm/message_verify.h"
26 #include "comm/protocol.h"
27 #include "comm/socket_adapter.h"
28 #include "hmdfs_merge_view.h"
29 #include "server_writeback.h"
30 #include "hmdfs_share.h"
31 
32 #include "comm/node_cb.h"
33 #include "stash.h"
34 
35 #define CREATE_TRACE_POINTS
36 #include "hmdfs_trace.h"
37 
38 #define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
39 
40 #define HMDFS_SB_SEQ_FROM 1
41 
42 struct hmdfs_mount_priv {
43 	const char *dev_name;
44 	const char *raw_data;
45 };
46 
47 struct syncfs_item {
48 	struct list_head list;
49 	struct completion done;
50 	bool need_abort;
51 };
52 
53 static DEFINE_IDA(hmdfs_sb_seq);
54 
hmdfs_alloc_sb_seq(void)55 static inline int hmdfs_alloc_sb_seq(void)
56 {
57 	return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
58 }
59 
hmdfs_free_sb_seq(unsigned int seq)60 static inline void hmdfs_free_sb_seq(unsigned int seq)
61 {
62 	if (!seq)
63 		return;
64 	ida_simple_remove(&hmdfs_sb_seq, seq);
65 }
66 
hmdfs_xattr_local_get(struct dentry *dentry, const char *name, void *value, size_t size)67 static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
68 				 void *value, size_t size)
69 {
70 	struct path lower_path;
71 	ssize_t res = 0;
72 
73 	hmdfs_get_lower_path(dentry, &lower_path);
74 	res = vfs_getxattr(lower_path.dentry, name, value, size);
75 	hmdfs_put_lower_path(&lower_path);
76 	return res;
77 }
78 
hmdfs_xattr_remote_get(struct dentry *dentry, const char *name, void *value, size_t size)79 static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
80 				  void *value, size_t size)
81 {
82 	struct inode *inode = d_inode(dentry);
83 	struct hmdfs_inode_info *info = hmdfs_i(inode);
84 	struct hmdfs_peer *conn = info->conn;
85 	char *send_buf = NULL;
86 	ssize_t res = 0;
87 
88 	send_buf = hmdfs_get_dentry_relative_path(dentry);
89 	if (!send_buf)
90 		return -ENOMEM;
91 
92 	res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
93 	kfree(send_buf);
94 	return res;
95 }
96 
hmdfs_xattr_merge_get(struct dentry *dentry, const char *name, void *value, size_t size)97 static int hmdfs_xattr_merge_get(struct dentry *dentry, const char *name,
98 				 void *value, size_t size)
99 {
100 	int err = 0;
101 	struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
102 
103 	if (!lower_dentry) {
104 		err = -EOPNOTSUPP;
105 		goto out;
106 	}
107 	err = hmdfs_xattr_local_get(lower_dentry, name, value, size);
108 out:
109 	dput(lower_dentry);
110 	return err;
111 }
112 
hmdfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size)113 static int hmdfs_xattr_get(const struct xattr_handler *handler,
114 			   struct dentry *dentry, struct inode *inode,
115 			   const char *name, void *value, size_t size)
116 {
117 	int res = 0;
118 	struct hmdfs_inode_info *info = hmdfs_i(inode);
119 	size_t r_size = size;
120 
121 	if (!hmdfs_support_xattr(dentry))
122 		return -EOPNOTSUPP;
123 
124 	if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
125 		return -EOPNOTSUPP;
126 
127 	if (size > HMDFS_XATTR_SIZE_MAX)
128 		r_size = HMDFS_XATTR_SIZE_MAX;
129 
130 	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
131 		res = hmdfs_xattr_local_get(dentry, name, value, r_size);
132 	else if (info->inode_type == HMDFS_LAYER_OTHER_REMOTE)
133 		res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
134 	else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE ||
135 		 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD)
136 		res = hmdfs_xattr_merge_get(dentry, name, value, r_size);
137 	else
138 		res = -EOPNOTSUPP;
139 
140 	if (res == -ERANGE && r_size != size) {
141 		hmdfs_info("no support xattr value size over than: %d",
142 			   HMDFS_XATTR_SIZE_MAX);
143 		res = -E2BIG;
144 	}
145 
146 	return res;
147 }
148 
hmdfs_xattr_local_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)149 static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
150 				 const void *value, size_t size, int flags)
151 {
152 	struct path lower_path;
153 	int res = 0;
154 
155 	hmdfs_get_lower_path(dentry, &lower_path);
156 	kuid_t tmp_uid = hmdfs_override_inode_uid(d_inode(lower_path.dentry));
157 	if (value) {
158 		res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
159 	} else {
160 		WARN_ON(flags != XATTR_REPLACE);
161 		res = vfs_removexattr(lower_path.dentry, name);
162 	}
163 	hmdfs_revert_inode_uid(d_inode(lower_path.dentry), tmp_uid);
164 
165 	hmdfs_put_lower_path(&lower_path);
166 	return res;
167 }
168 
hmdfs_xattr_remote_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)169 static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
170 				  const void *value, size_t size, int flags)
171 {
172 	struct inode *inode = d_inode(dentry);
173 	struct hmdfs_inode_info *info = hmdfs_i(inode);
174 	struct hmdfs_peer *conn = info->conn;
175 	char *send_buf = NULL;
176 	int res = 0;
177 
178 	send_buf = hmdfs_get_dentry_relative_path(dentry);
179 	if (!send_buf)
180 		return -ENOMEM;
181 
182 	res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
183 	kfree(send_buf);
184 	return res;
185 }
186 
hmdfs_xattr_merge_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)187 static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name,
188 				  const void *value, size_t size, int flags)
189 {
190 	int err = 0;
191 	struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
192 
193 	if (!lower_dentry) {
194 		err = -EOPNOTSUPP;
195 		goto out;
196 	}
197 	err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags);
198 out:
199 	dput(lower_dentry);
200 	return err;
201 }
202 
hmdfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags)203 static int hmdfs_xattr_set(const struct xattr_handler *handler,
204 			   struct dentry *dentry, struct inode *inode,
205 			   const char *name, const void *value,
206 			   size_t size, int flags)
207 {
208 	struct hmdfs_inode_info *info = hmdfs_i(inode);
209 
210 	if (!hmdfs_support_xattr(dentry))
211 		return -EOPNOTSUPP;
212 
213 	if (size > HMDFS_XATTR_SIZE_MAX) {
214 		hmdfs_info("no support too long xattr value: %zu", size);
215 		return -E2BIG;
216 	}
217 
218 	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
219 		return hmdfs_xattr_local_set(dentry, name, value, size, flags);
220 	else if (info->inode_type == HMDFS_LAYER_OTHER_REMOTE)
221 		return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
222 	else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE ||
223 		 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD)
224 		return hmdfs_xattr_merge_set(dentry, name, value, size, flags);
225 
226 	return -EOPNOTSUPP;
227 }
228 
229 const struct xattr_handler hmdfs_xattr_handler = {
230 	.prefix = "", /* catch all */
231 	.get = hmdfs_xattr_get,
232 	.set = hmdfs_xattr_set,
233 };
234 
235 static const struct xattr_handler *hmdfs_xattr_handlers[] = {
236 	&hmdfs_xattr_handler,
237 };
238 
239 #define HMDFS_NODE_EVT_CB_DELAY 2
240 
241 struct kmem_cache *hmdfs_inode_cachep;
242 struct kmem_cache *hmdfs_dentry_cachep;
243 
i_callback(struct rcu_head *head)244 static void i_callback(struct rcu_head *head)
245 {
246 	struct inode *inode = container_of(head, struct inode, i_rcu);
247 
248 	kmem_cache_free(hmdfs_inode_cachep,
249 			container_of(inode, struct hmdfs_inode_info,
250 				     vfs_inode));
251 }
252 
hmdfs_destroy_inode(struct inode *inode)253 static void hmdfs_destroy_inode(struct inode *inode)
254 {
255 	call_rcu(&inode->i_rcu, i_callback);
256 }
257 
hmdfs_evict_inode(struct inode *inode)258 static void hmdfs_evict_inode(struct inode *inode)
259 {
260 	struct hmdfs_inode_info *info = hmdfs_i(inode);
261 
262 	truncate_inode_pages(&inode->i_data, 0);
263 	clear_inode(inode);
264 	if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
265 	    info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
266 		return;
267 	if (info->inode_type == HMDFS_LAYER_ZERO ||
268 	    info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
269 	    info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
270 		iput(info->lower_inode);
271 		info->lower_inode = NULL;
272 	}
273 }
274 
hmdfs_put_super(struct super_block *sb)275 void hmdfs_put_super(struct super_block *sb)
276 {
277 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
278 	struct super_block *lower_sb = sbi->lower_sb;
279 
280 	hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
281 		   sbi->local_src);
282 
283 	hmdfs_cfn_destroy(sbi);
284 	hmdfs_unregister_sysfs(sbi);
285 	hmdfs_connections_stop(sbi);
286 	hmdfs_clear_share_table(sbi);
287 	hmdfs_destroy_server_writeback(sbi);
288 	hmdfs_exit_stash(sbi);
289 	atomic_dec(&lower_sb->s_active);
290 	put_cred(sbi->cred);
291 	if (sbi->system_cred)
292 		put_cred(sbi->system_cred);
293 	hmdfs_destroy_writeback(sbi);
294 	kfree(sbi->local_src);
295 	kfree(sbi->local_dst);
296 	kfree(sbi->real_dst);
297 	kfree(sbi->cache_dir);
298 	kfree(sbi->cloud_dir);
299 	kfifo_free(&sbi->notify_fifo);
300 	sb->s_fs_info = NULL;
301 	sbi->lower_sb = NULL;
302 	hmdfs_release_sysfs(sbi);
303 	/* After all access are completed */
304 	hmdfs_free_sb_seq(sbi->seq);
305 	kfree(sbi->s_server_statis);
306 	kfree(sbi->s_client_statis);
307 	kfree(sbi);
308 }
309 
hmdfs_alloc_inode(struct super_block *sb)310 static struct inode *hmdfs_alloc_inode(struct super_block *sb)
311 {
312 	struct hmdfs_inode_info *gi =
313 		kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
314 	if (!gi)
315 		return NULL;
316 	memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
317 	INIT_LIST_HEAD(&gi->wb_list);
318 	init_rwsem(&gi->wpage_sem);
319 	gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
320 	atomic64_set(&gi->write_counter, 0);
321 	gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
322 	spin_lock_init(&gi->fid_lock);
323 	INIT_LIST_HEAD(&gi->wr_opened_node);
324 	atomic_set(&gi->wr_opened_cnt, 0);
325 	init_waitqueue_head(&gi->fid_wq);
326 	INIT_LIST_HEAD(&gi->stash_node);
327 	spin_lock_init(&gi->stash_lock);
328 	return &gi->vfs_inode;
329 }
330 
hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)331 static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
332 {
333 	int error = 0;
334 	int ret = 0;
335 	char *dir_path = NULL;
336 	char *name_path = NULL;
337 	struct hmdfs_peer *con = NULL;
338 	struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
339 
340 	dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
341 	if (!dir_path) {
342 		error = -EACCES;
343 		goto rmdir_out;
344 	}
345 
346 	name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
347 	if (!name_path) {
348 		error = -EACCES;
349 		goto rmdir_out;
350 	}
351 	mutex_lock(&sbi->connections.node_lock);
352 	list_for_each_entry(con, &sbi->connections.node_list, list) {
353 		if (con->status == NODE_STAT_ONLINE) {
354 			peer_get(con);
355 			mutex_unlock(&sbi->connections.node_lock);
356 			hmdfs_debug("send MSG to remote devID %llu",
357 				    con->device_id);
358 			ret = hmdfs_send_statfs(con, name_path, buf);
359 			if (ret != 0)
360 				error = ret;
361 			peer_put(con);
362 			mutex_lock(&sbi->connections.node_lock);
363 		}
364 	}
365 	mutex_unlock(&sbi->connections.node_lock);
366 
367 rmdir_out:
368 	kfree(dir_path);
369 	kfree(name_path);
370 	return error;
371 }
372 
hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)373 static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
374 {
375 	int err = 0;
376 	struct path lower_path;
377 	struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
378 	struct super_block *sb = d_inode(dentry)->i_sb;
379 	struct hmdfs_sb_info *sbi = sb->s_fs_info;
380 
381 	trace_hmdfs_statfs(dentry, info->inode_type);
382 	// merge_view & merge_view/xxx & device_view assigned src_inode info
383 	if (hmdfs_i_merge(info) ||
384 	    (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
385 		err = kern_path(sbi->local_src, 0, &lower_path);
386 		if (err)
387 			goto out;
388 		err = vfs_statfs(&lower_path, buf);
389 		path_put(&lower_path);
390 	} else if (!IS_ERR_OR_NULL(info->lower_inode)) {
391 		hmdfs_get_lower_path(dentry, &lower_path);
392 		err = vfs_statfs(&lower_path, buf);
393 		hmdfs_put_lower_path(&lower_path);
394 	} else {
395 		err = hmdfs_remote_statfs(dentry, buf);
396 	}
397 
398 	buf->f_type = HMDFS_SUPER_MAGIC;
399 out:
400 	return err;
401 }
402 
hmdfs_show_options(struct seq_file *m, struct dentry *root)403 static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
404 {
405 	struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
406 
407 	if (sbi->s_case_sensitive)
408 		seq_puts(m, ",sensitive");
409 	else
410 		seq_puts(m, ",insensitive");
411 
412 	if (sbi->s_merge_switch)
413 		seq_puts(m, ",merge_enable");
414 	else
415 		seq_puts(m, ",merge_disable");
416 
417 	seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
418 	seq_printf(m, ",user_id=%u", sbi->user_id);
419 
420 	if (sbi->cache_dir)
421 		seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
422 	if (sbi->real_dst)
423 		seq_printf(m, ",real_dst=%s", sbi->real_dst);
424 	if (sbi->cloud_dir)
425 		seq_printf(m, ",cloud_dir=%s", sbi->cloud_dir);
426 
427 	seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
428 	seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
429 
430 	return 0;
431 }
432 
hmdfs_sync_fs(struct super_block *sb, int wait)433 static int hmdfs_sync_fs(struct super_block *sb, int wait)
434 {
435 	int time_left;
436 	int err = 0;
437 	struct hmdfs_peer *con = NULL;
438 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
439 	int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
440 	struct syncfs_item item, *entry = NULL, *tmp = NULL;
441 
442 	if (!wait)
443 		return 0;
444 
445 	trace_hmdfs_syncfs_enter(sbi);
446 
447 	spin_lock(&sbi->hsi.list_lock);
448 	if (!sbi->hsi.is_executing) {
449 		sbi->hsi.is_executing = true;
450 		item.need_abort = false;
451 		spin_unlock(&sbi->hsi.list_lock);
452 	} else {
453 		init_completion(&item.done);
454 		list_add_tail(&item.list, &sbi->hsi.wait_list);
455 		spin_unlock(&sbi->hsi.list_lock);
456 		wait_for_completion(&item.done);
457 	}
458 
459 	if (item.need_abort)
460 		goto out;
461 
462 	/*
463 	 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
464 	 * sure all remote syncfs calls return back or timeout by waiting,
465 	 * during the waiting period we must protect @sbi->remote_syncfs_count
466 	 * and @sbi->remote_syncfs_ret from concurrent executing.
467 	 */
468 
469 	spin_lock(&sbi->hsi.v_lock);
470 	sbi->hsi.version++;
471 	/*
472 	 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
473 	 * into spinlock protection area to avoid following scenario caused
474 	 * by out-of-order execution:
475 	 *
476 	 *            synfs                                  syncfs_cb
477 	 *  sbi->hsi.remote_ret = 0;
478 	 *  atomic_set(&sbi->hsi.wait_count, 0);
479 	 *                                               lock
480 	 *                                               version == old_version
481 	 *                                 sbi->hsi.remote_ret = resp->ret_code
482 	 *                                 atomic_dec(&sbi->hsi.wait_count);
483 	 *                                               unlock
484 	 *         lock
485 	 *         version = old_version + 1
486 	 *         unlock
487 	 *
488 	 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
489 	 * before spin lock which may compete with syncfs_cb(), making
490 	 * these two values' assignment protected by spinlock can fix this.
491 	 */
492 	sbi->hsi.remote_ret = 0;
493 	atomic_set(&sbi->hsi.wait_count, 0);
494 	spin_unlock(&sbi->hsi.v_lock);
495 
496 	mutex_lock(&sbi->connections.node_lock);
497 	list_for_each_entry(con, &sbi->connections.node_list, list) {
498 		/*
499 		 * Dirty data does not need to be synchronized to remote
500 		 * devices that go offline normally. It's okay to drop
501 		 * them.
502 		 */
503 		if (con->status != NODE_STAT_ONLINE)
504 			continue;
505 
506 		peer_get(con);
507 		mutex_unlock(&sbi->connections.node_lock);
508 
509 		/*
510 		 * There exists a gap between sync_inodes_sb() and sync_fs()
511 		 * which may race with remote writing, leading error count
512 		 * on @sb_dirty_count. The dirty data produced during the
513 		 * gap period won't be synced in next syncfs operation.
514 		 * To avoid this, we have to invoke sync_inodes_sb() again
515 		 * after getting @con->sb_dirty_count.
516 		 */
517 		con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
518 		sync_inodes_sb(sb);
519 
520 		if (!con->old_sb_dirty_count) {
521 			peer_put(con);
522 			mutex_lock(&sbi->connections.node_lock);
523 			continue;
524 		}
525 
526 		err = hmdfs_send_syncfs(con, syncfs_timeout);
527 		if (err) {
528 			hmdfs_warning("send syncfs failed with %d on node %llu",
529 				      err, con->device_id);
530 			sbi->hsi.remote_ret = err;
531 			peer_put(con);
532 			mutex_lock(&sbi->connections.node_lock);
533 			continue;
534 		}
535 
536 		atomic_inc(&sbi->hsi.wait_count);
537 
538 		peer_put(con);
539 		mutex_lock(&sbi->connections.node_lock);
540 	}
541 	mutex_unlock(&sbi->connections.node_lock);
542 
543 	/*
544 	 * Async work in background will make sure @sbi->remote_syncfs_count
545 	 * decreased to zero finally whether syncfs success or fail.
546 	 */
547 	time_left = wait_event_interruptible(
548 		sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
549 	if (time_left < 0) {
550 		hmdfs_warning("syncfs is interrupted by external signal");
551 		err = -EINTR;
552 	}
553 
554 	if (!err && sbi->hsi.remote_ret)
555 		err = sbi->hsi.remote_ret;
556 
557 	/* Abandon syncfs processes in pending_list */
558 	list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
559 		entry->need_abort = true;
560 		complete(&entry->done);
561 	}
562 	INIT_LIST_HEAD(&sbi->hsi.pending_list);
563 
564 	/* Pick the last syncfs process in wait_list */
565 	spin_lock(&sbi->hsi.list_lock);
566 	if (list_empty(&sbi->hsi.wait_list)) {
567 		sbi->hsi.is_executing = false;
568 	} else {
569 		entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
570 					list);
571 		list_del_init(&entry->list);
572 		list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
573 		entry->need_abort = false;
574 		complete(&entry->done);
575 	}
576 	spin_unlock(&sbi->hsi.list_lock);
577 
578 out:
579 	trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
580 				get_cmd_timeout(sbi, F_SYNCFS), err);
581 
582 	/* TODO: Return synfs err back to syscall */
583 
584 	return err;
585 }
586 
587 struct super_operations hmdfs_sops = {
588 	.alloc_inode = hmdfs_alloc_inode,
589 	.destroy_inode = hmdfs_destroy_inode,
590 	.evict_inode = hmdfs_evict_inode,
591 	.put_super = hmdfs_put_super,
592 	.statfs = hmdfs_statfs,
593 	.show_options = hmdfs_show_options,
594 	.sync_fs = hmdfs_sync_fs,
595 };
596 
init_once(void *obj)597 static void init_once(void *obj)
598 {
599 	struct hmdfs_inode_info *i = obj;
600 
601 	inode_init_once(&i->vfs_inode);
602 }
603 
hmdfs_init_caches(void)604 static int __init hmdfs_init_caches(void)
605 {
606 	int err = -ENOMEM;
607 
608 	hmdfs_inode_cachep =
609 		kmem_cache_create("hmdfs_inode_cache",
610 				  sizeof(struct hmdfs_inode_info), 0,
611 				  SLAB_RECLAIM_ACCOUNT, init_once);
612 	if (unlikely(!hmdfs_inode_cachep))
613 		goto out;
614 	hmdfs_dentry_cachep =
615 		kmem_cache_create("hmdfs_dentry_cache",
616 				  sizeof(struct hmdfs_dentry_info), 0,
617 				  SLAB_RECLAIM_ACCOUNT, NULL);
618 	if (unlikely(!hmdfs_dentry_cachep))
619 		goto out_des_ino;
620 	hmdfs_dentry_merge_cachep =
621 		kmem_cache_create("hmdfs_dentry_merge_cache",
622 				  sizeof(struct hmdfs_dentry_info_merge), 0,
623 				  SLAB_RECLAIM_ACCOUNT, NULL);
624 	if (unlikely(!hmdfs_dentry_merge_cachep))
625 		goto out_des_dc;
626 	return 0;
627 
628 out_des_dc:
629 	kmem_cache_destroy(hmdfs_dentry_cachep);
630 out_des_ino:
631 	kmem_cache_destroy(hmdfs_inode_cachep);
632 out:
633 	return err;
634 }
635 
hmdfs_destroy_caches(void)636 static void hmdfs_destroy_caches(void)
637 {
638 	rcu_barrier();
639 	kmem_cache_destroy(hmdfs_inode_cachep);
640 	hmdfs_inode_cachep = NULL;
641 	kmem_cache_destroy(hmdfs_dentry_cachep);
642 	hmdfs_dentry_cachep = NULL;
643 	kmem_cache_destroy(hmdfs_dentry_merge_cachep);
644 	hmdfs_dentry_merge_cachep = NULL;
645 }
646 
path_hash(const char *path, int len, bool case_sense)647 uint64_t path_hash(const char *path, int len, bool case_sense)
648 {
649 	uint64_t res = 0;
650 	const char *kp = path;
651 	char c;
652 	/* Mocklisp hash function. */
653 	while (*kp) {
654 		c = *kp;
655 		if (!case_sense)
656 			c = tolower(c);
657 		res = (res << 5) - res + (uint64_t)(c);
658 		kp++;
659 	}
660 	return res;
661 }
662 
get_full_path(struct path *path)663 static char *get_full_path(struct path *path)
664 {
665 	char *buf, *tmp;
666 	char *ret = NULL;
667 
668 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
669 	if (!buf)
670 		goto out;
671 
672 	tmp = d_path(path, buf, PATH_MAX);
673 	if (IS_ERR(tmp))
674 		goto out;
675 
676 	ret = kstrdup(tmp, GFP_KERNEL);
677 out:
678 	kfree(buf);
679 	return ret;
680 }
681 
hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)682 static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
683 {
684 	memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
685 
686 	set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
687 	set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
688 	set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
689 	set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
690 	set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
691 	set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
692 	set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
693 	set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
694 	set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
695 	set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
696 	set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
697 	set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
698 	set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
699 	set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
700 	set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
701 	set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
702 	set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
703 	set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
704 	set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
705 	set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
706 }
707 
hmdfs_init_sbi(struct hmdfs_sb_info *sbi)708 static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
709 {
710 	int ret;
711 
712 	ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
713 	if (ret)
714 		goto out;
715 
716 	/*
717 	 * We have to use dynamic memory since struct server/client_statistic
718 	 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
719 	 */
720 	sbi->s_server_statis =
721 		kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
722 	sbi->s_client_statis =
723 		kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
724 	if (!sbi->s_server_statis || !sbi->s_client_statis) {
725 		ret = -ENOMEM;
726 		goto out;
727 	}
728 
729 	ret = hmdfs_alloc_sb_seq();
730 	if (ret < 0) {
731 		hmdfs_err("no sb seq available err %d", ret);
732 		goto out;
733 	}
734 	sbi->seq = ret;
735 	ret = 0;
736 
737 	spin_lock_init(&sbi->notify_fifo_lock);
738 	mutex_init(&sbi->cmd_handler_mutex);
739 	sbi->s_case_sensitive = false;
740 	sbi->s_features = HMDFS_FEATURE_READPAGES |
741 			  HMDFS_FEATURE_READPAGES_OPEN |
742 			  HMDFS_ATOMIC_OPEN;
743 	sbi->s_merge_switch = false;
744 	sbi->s_cloud_disk_switch = false;
745 	sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
746 	sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
747 	sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
748 	sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
749 	hmdfs_init_cmd_timeout(sbi);
750 	sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
751 	sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
752 	sbi->s_offline_stash = true;
753 	sbi->s_dentry_cache = true;
754 	sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
755 	sbi->s_readpages_nr = HMDFS_READPAGES_NR_DEF;
756 	/* Initialize before hmdfs_register_sysfs() */
757 	atomic_set(&sbi->connections.conn_seq, 0);
758 	mutex_init(&sbi->connections.node_lock);
759 	INIT_LIST_HEAD(&sbi->connections.node_list);
760 
761 	ret = hmdfs_init_share_table(sbi);
762 	if (ret)
763 		goto out;
764 	init_waitqueue_head(&sbi->async_readdir_wq);
765 	INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
766 	INIT_LIST_HEAD(&sbi->async_readdir_work_list);
767 	spin_lock_init(&sbi->async_readdir_msg_lock);
768 	spin_lock_init(&sbi->async_readdir_work_lock);
769 
770 	return 0;
771 
772 out:
773 	return ret;
774 }
775 
hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, enum hmdfs_resp_type type, unsigned long start, unsigned long end)776 void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
777 			      enum hmdfs_resp_type type, unsigned long start,
778 			      unsigned long end)
779 {
780 	unsigned long duration;
781 
782 	switch (type) {
783 	case HMDFS_RESP_DELAY:
784 		sbi->s_client_statis[cmd].delay_resp_cnt++;
785 		break;
786 	case HMDFS_RESP_TIMEOUT:
787 		sbi->s_client_statis[cmd].timeout_cnt++;
788 		break;
789 	case HMDFS_RESP_NORMAL:
790 		duration = end - start;
791 		sbi->s_client_statis[cmd].total += duration;
792 		sbi->s_client_statis[cmd].resp_cnt++;
793 		if (sbi->s_client_statis[cmd].max < duration)
794 			sbi->s_client_statis[cmd].max = duration;
795 		break;
796 	default:
797 		hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
798 	}
799 }
800 
hmdfs_update_dst(struct hmdfs_sb_info *sbi)801 static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
802 {
803 	int err = 0;
804 	const char *path_local = UPDATE_LOCAL_DST;
805 	int len = 0;
806 
807 	sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
808 	if (!sbi->real_dst) {
809 		err = -ENOMEM;
810 		goto out_err;
811 	}
812 	kfree(sbi->local_dst);
813 	sbi->local_dst = NULL;
814 
815 	len = strlen(sbi->real_dst) + strlen(path_local) + 1;
816 	if (len > PATH_MAX) {
817 		err = -EINVAL;
818 		goto out_err;
819 	}
820 	sbi->local_dst = kmalloc(len, GFP_KERNEL);
821 	if (!sbi->local_dst) {
822 		err = -ENOMEM;
823 		goto out_err;
824 	}
825 	snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
826 		 "%s%s", sbi->real_dst, path_local);
827 out_err:
828 	return err;
829 }
830 
831 /*
832  * Generate boot cookie like following format:
833  *
834  * | random |   boot time(ms) |  0x00 |
835  * |--------|-----------------|-------|
836  *     16            33          15    (bits)
837  *
838  * This will make sure boot cookie is unique in a period
839  * 2^33 / 1000 / 3600 / 24 = 99.4(days).
840  */
hmdfs_gen_boot_cookie(void)841 uint64_t hmdfs_gen_boot_cookie(void)
842 {
843 	uint64_t now;
844 	uint16_t rand;
845 
846 	now = ktime_to_ms(ktime_get());
847 	prandom_bytes(&rand, sizeof(rand));
848 
849 	now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
850 	now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
851 
852 	return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
853 }
854 
hmdfs_fill_super(struct super_block *sb, void *data, int silent)855 static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
856 {
857 	struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
858 	const char *dev_name = priv->dev_name;
859 	const char *raw_data = priv->raw_data;
860 	struct hmdfs_sb_info *sbi;
861 	int err = 0;
862 	struct inode *root_inode;
863 	struct path lower_path;
864 	struct super_block *lower_sb;
865 	struct dentry *root_dentry;
866 	char ctrl_path[CTRL_PATH_MAX_LEN];
867 	uint64_t ctrl_hash;
868 
869 	if (!raw_data)
870 		return -EINVAL;
871 
872 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
873 	if (!sbi) {
874 		err = -ENOMEM;
875 		goto out_err;
876 	}
877 	err = hmdfs_init_sbi(sbi);
878 	if (err)
879 		goto out_freesbi;
880 	sbi->sb = sb;
881 	err = hmdfs_parse_options(sbi, raw_data);
882 	if (err)
883 		goto out_freesbi;
884 
885 	sb->s_fs_info = sbi;
886 	sb->s_magic = HMDFS_SUPER_MAGIC;
887 	sb->s_xattr = hmdfs_xattr_handlers;
888 	sb->s_op = &hmdfs_sops;
889 
890 	sbi->boot_cookie = hmdfs_gen_boot_cookie();
891 
892 	err = hmdfs_init_writeback(sbi);
893 	if (err)
894 		goto out_freesbi;
895 	err = hmdfs_init_server_writeback(sbi);
896 	if (err)
897 		goto out_freesbi;
898 
899 	err = hmdfs_init_stash(sbi);
900 	if (err)
901 		goto out_freesbi;
902 
903 	// add ctrl sysfs node
904 	ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
905 	scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
906 	hmdfs_debug("hash %llu", ctrl_hash);
907 	err = hmdfs_register_sysfs(ctrl_path, sbi);
908 	if (err)
909 		goto out_freesbi;
910 
911 	err = hmdfs_update_dst(sbi);
912 	if (err)
913 		goto out_unreg_sysfs;
914 
915 	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
916 			&lower_path);
917 	if (err) {
918 		hmdfs_err("open dev failed, errno = %d", err);
919 		goto out_unreg_sysfs;
920 	}
921 
922 	lower_sb = lower_path.dentry->d_sb;
923 	atomic_inc(&lower_sb->s_active);
924 	sbi->lower_sb = lower_sb;
925 	sbi->local_src = get_full_path(&lower_path);
926 	if (!sbi->local_src) {
927 		hmdfs_err("get local_src failed!");
928 		goto out_sput;
929 	}
930 
931 	sb->s_time_gran = lower_sb->s_time_gran;
932 	sb->s_maxbytes = lower_sb->s_maxbytes;
933 	sb->s_stack_depth = lower_sb->s_stack_depth + 1;
934 	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
935 		hmdfs_err("maximum fs stacking depth exceeded");
936 		err = -EINVAL;
937 		goto out_sput;
938 	}
939 	root_inode = fill_root_inode(sb, sbi, d_inode(lower_path.dentry));
940 	if (IS_ERR(root_inode)) {
941 		err = PTR_ERR(root_inode);
942 		goto out_sput;
943 	}
944 	hmdfs_root_inode_perm_init(root_inode);
945 	sb->s_root = root_dentry = d_make_root(root_inode);
946 	if (!root_dentry) {
947 		err = -ENOMEM;
948 		goto out_sput;
949 	}
950 	if (sbi->s_cloud_disk_switch)
951 		err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_SECOND_LOCAL);
952 	else
953 		err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
954 	if (err)
955 		goto out_freeroot;
956 	hmdfs_set_lower_path(root_dentry, &lower_path);
957 	sbi->cred = get_cred(current_cred());
958 	INIT_LIST_HEAD(&sbi->client_cache);
959 	INIT_LIST_HEAD(&sbi->server_cache);
960 	INIT_LIST_HEAD(&sbi->to_delete);
961 	mutex_init(&sbi->cache_list_lock);
962 	hmdfs_cfn_load(sbi);
963 
964 	/* Initialize syncfs info */
965 	spin_lock_init(&sbi->hsi.v_lock);
966 	init_waitqueue_head(&sbi->hsi.wq);
967 	sbi->hsi.version = 0;
968 	sbi->hsi.is_executing = false;
969 	INIT_LIST_HEAD(&sbi->hsi.wait_list);
970 	INIT_LIST_HEAD(&sbi->hsi.pending_list);
971 	spin_lock_init(&sbi->hsi.list_lock);
972 
973 	return err;
974 out_freeroot:
975 	dput(sb->s_root);
976 	sb->s_root = NULL;
977 out_sput:
978 	atomic_dec(&lower_sb->s_active);
979 	path_put(&lower_path);
980 out_unreg_sysfs:
981 	hmdfs_unregister_sysfs(sbi);
982 	hmdfs_release_sysfs(sbi);
983 out_freesbi:
984 	if (sbi) {
985 		sb->s_fs_info = NULL;
986 		hmdfs_clear_share_table(sbi);
987 		hmdfs_exit_stash(sbi);
988 		hmdfs_destroy_writeback(sbi);
989 		hmdfs_destroy_server_writeback(sbi);
990 		kfifo_free(&sbi->notify_fifo);
991 		hmdfs_free_sb_seq(sbi->seq);
992 		kfree(sbi->local_src);
993 		kfree(sbi->local_dst);
994 		kfree(sbi->real_dst);
995 		kfree(sbi->cache_dir);
996 		kfree(sbi->cloud_dir);
997 		kfree(sbi->s_server_statis);
998 		kfree(sbi->s_client_statis);
999 		kfree(sbi);
1000 	}
1001 out_err:
1002 	return err;
1003 }
1004 
hmdfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data)1005 static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
1006 				  const char *dev_name, void *raw_data)
1007 {
1008 	struct hmdfs_mount_priv priv = {
1009 		.dev_name = dev_name,
1010 		.raw_data = raw_data,
1011 	};
1012 
1013 	/* hmdfs needs a valid dev_name to get the lower_sb's metadata */
1014 	if (!dev_name || !*dev_name)
1015 		return ERR_PTR(-EINVAL);
1016 	return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
1017 }
1018 
1019 
hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)1020 static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
1021 {
1022 	struct sendmsg_wait_queue *msg_wq = NULL;
1023 	struct hmdfs_readdir_work *rw = NULL;
1024 	struct hmdfs_readdir_work *tmp = NULL;
1025 	struct list_head del_work;
1026 
1027 	/* cancel work that are not running */
1028 
1029 	INIT_LIST_HEAD(&del_work);
1030 	spin_lock(&sbi->async_readdir_work_lock);
1031 	list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
1032 		if (cancel_delayed_work(&rw->dwork))
1033 			list_move(&rw->head, &del_work);
1034 	}
1035 	spin_unlock(&sbi->async_readdir_work_lock);
1036 
1037 	list_for_each_entry_safe(rw, tmp, &del_work, head) {
1038 		dput(rw->dentry);
1039 		peer_put(rw->con);
1040 		kfree(rw);
1041 	}
1042 
1043 	/* wake up async readdir that are waiting for remote */
1044 	spin_lock(&sbi->async_readdir_msg_lock);
1045 	sbi->async_readdir_prohibit = true;
1046 	list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
1047 		hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
1048 	spin_unlock(&sbi->async_readdir_msg_lock);
1049 
1050 	/* wait for all async readdir to finish */
1051 	if (!list_empty(&sbi->async_readdir_work_list))
1052 		wait_event_interruptible_timeout(sbi->async_readdir_wq,
1053 			(list_empty(&sbi->async_readdir_work_list)), HZ);
1054 
1055 	WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1056 }
1057 
hmdfs_kill_super(struct super_block *sb)1058 static void hmdfs_kill_super(struct super_block *sb)
1059 {
1060 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1061 
1062 	/*
1063 	 * async readdir is holding ref for dentry, not for vfsmount. Thus
1064 	 * shrink_dcache_for_umount() will warn about dentry still in use
1065 	 * if async readdir is not done.
1066 	 */
1067 	if (sbi)
1068 		hmdfs_cancel_async_readdir(sbi);
1069 	kill_anon_super(sb);
1070 }
1071 
1072 static struct file_system_type hmdfs_fs_type = {
1073 	.owner = THIS_MODULE,
1074 	.name = "hmdfs",
1075 	.mount = hmdfs_mount,
1076 	.kill_sb = hmdfs_kill_super,
1077 };
1078 
hmdfs_init(void)1079 static int __init hmdfs_init(void)
1080 {
1081 	int err = 0;
1082 
1083 	err = hmdfs_init_caches();
1084 	if (err)
1085 		goto out_err;
1086 
1087 	hmdfs_node_evt_cb_init();
1088 
1089 	hmdfs_stash_add_node_evt_cb();
1090 	hmdfs_client_add_node_evt_cb();
1091 	hmdfs_server_add_node_evt_cb();
1092 
1093 	err = register_filesystem(&hmdfs_fs_type);
1094 	if (err) {
1095 		hmdfs_err("hmdfs register failed!");
1096 		goto out_err;
1097 	}
1098 
1099 	err = hmdfs_init_configfs();
1100 	if (err)
1101 		goto out_err;
1102 
1103 	err = hmdfs_sysfs_init();
1104 	if (err)
1105 		goto out_err;
1106 
1107 	hmdfs_message_verify_init();
1108 	return 0;
1109 out_err:
1110 	hmdfs_sysfs_exit();
1111 	hmdfs_exit_configfs();
1112 	unregister_filesystem(&hmdfs_fs_type);
1113 	hmdfs_destroy_caches();
1114 	hmdfs_err("hmdfs init failed!");
1115 	return err;
1116 }
1117 
hmdfs_exit(void)1118 static void __exit hmdfs_exit(void)
1119 {
1120 	hmdfs_sysfs_exit();
1121 	hmdfs_exit_configfs();
1122 	unregister_filesystem(&hmdfs_fs_type);
1123 	ida_destroy(&hmdfs_sb_seq);
1124 	hmdfs_destroy_caches();
1125 	hmdfs_info("hmdfs exited!");
1126 }
1127 
1128 module_init(hmdfs_init);
1129 module_exit(hmdfs_exit);
1130 
1131 EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1132 
1133 MODULE_LICENSE("GPL v2");
1134 MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1135 MODULE_DESCRIPTION("Harmony distributed file system");
1136