xref: /kernel/linux/linux-5.10/fs/hmdfs/main.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/hmdfs/main.c
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8
9#include "hmdfs.h"
10
11#include <linux/ctype.h>
12#include <linux/module.h>
13#include <linux/statfs.h>
14#include <linux/xattr.h>
15#include <linux/idr.h>
16#if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17#include <linux/prandom.h>
18#else
19#include <linux/random.h>
20#endif
21
22#include "authority/authentication.h"
23#include "hmdfs_server.h"
24#include "comm/device_node.h"
25#include "comm/message_verify.h"
26#include "comm/protocol.h"
27#include "comm/socket_adapter.h"
28#include "hmdfs_merge_view.h"
29#include "server_writeback.h"
30#include "hmdfs_share.h"
31
32#include "comm/node_cb.h"
33#include "stash.h"
34
35#define CREATE_TRACE_POINTS
36#include "hmdfs_trace.h"
37
38#define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
39
40#define HMDFS_SB_SEQ_FROM 1
41
42struct hmdfs_mount_priv {
43	const char *dev_name;
44	const char *raw_data;
45};
46
47struct syncfs_item {
48	struct list_head list;
49	struct completion done;
50	bool need_abort;
51};
52
53static DEFINE_IDA(hmdfs_sb_seq);
54
55static inline int hmdfs_alloc_sb_seq(void)
56{
57	return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
58}
59
60static inline void hmdfs_free_sb_seq(unsigned int seq)
61{
62	if (!seq)
63		return;
64	ida_simple_remove(&hmdfs_sb_seq, seq);
65}
66
67static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
68				 void *value, size_t size)
69{
70	struct path lower_path;
71	ssize_t res = 0;
72
73	hmdfs_get_lower_path(dentry, &lower_path);
74	res = vfs_getxattr(lower_path.dentry, name, value, size);
75	hmdfs_put_lower_path(&lower_path);
76	return res;
77}
78
79static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
80				  void *value, size_t size)
81{
82	struct inode *inode = d_inode(dentry);
83	struct hmdfs_inode_info *info = hmdfs_i(inode);
84	struct hmdfs_peer *conn = info->conn;
85	char *send_buf = NULL;
86	ssize_t res = 0;
87
88	send_buf = hmdfs_get_dentry_relative_path(dentry);
89	if (!send_buf)
90		return -ENOMEM;
91
92	res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
93	kfree(send_buf);
94	return res;
95}
96
97static int hmdfs_xattr_merge_get(struct dentry *dentry, const char *name,
98				 void *value, size_t size)
99{
100	int err = 0;
101	struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
102
103	if (!lower_dentry) {
104		err = -EOPNOTSUPP;
105		goto out;
106	}
107	err = hmdfs_xattr_local_get(lower_dentry, name, value, size);
108out:
109	dput(lower_dentry);
110	return err;
111}
112
113static int hmdfs_xattr_get(const struct xattr_handler *handler,
114			   struct dentry *dentry, struct inode *inode,
115			   const char *name, void *value, size_t size)
116{
117	int res = 0;
118	struct hmdfs_inode_info *info = hmdfs_i(inode);
119	size_t r_size = size;
120
121	if (!hmdfs_support_xattr(dentry))
122		return -EOPNOTSUPP;
123
124	if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
125		return -EOPNOTSUPP;
126
127	if (size > HMDFS_XATTR_SIZE_MAX)
128		r_size = HMDFS_XATTR_SIZE_MAX;
129
130	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
131		res = hmdfs_xattr_local_get(dentry, name, value, r_size);
132	else if (info->inode_type == HMDFS_LAYER_OTHER_REMOTE)
133		res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
134	else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE ||
135		 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD)
136		res = hmdfs_xattr_merge_get(dentry, name, value, r_size);
137	else
138		res = -EOPNOTSUPP;
139
140	if (res == -ERANGE && r_size != size) {
141		hmdfs_info("no support xattr value size over than: %d",
142			   HMDFS_XATTR_SIZE_MAX);
143		res = -E2BIG;
144	}
145
146	return res;
147}
148
149static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
150				 const void *value, size_t size, int flags)
151{
152	struct path lower_path;
153	int res = 0;
154
155	hmdfs_get_lower_path(dentry, &lower_path);
156	kuid_t tmp_uid = hmdfs_override_inode_uid(d_inode(lower_path.dentry));
157	if (value) {
158		res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
159	} else {
160		WARN_ON(flags != XATTR_REPLACE);
161		res = vfs_removexattr(lower_path.dentry, name);
162	}
163	hmdfs_revert_inode_uid(d_inode(lower_path.dentry), tmp_uid);
164
165	hmdfs_put_lower_path(&lower_path);
166	return res;
167}
168
169static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
170				  const void *value, size_t size, int flags)
171{
172	struct inode *inode = d_inode(dentry);
173	struct hmdfs_inode_info *info = hmdfs_i(inode);
174	struct hmdfs_peer *conn = info->conn;
175	char *send_buf = NULL;
176	int res = 0;
177
178	send_buf = hmdfs_get_dentry_relative_path(dentry);
179	if (!send_buf)
180		return -ENOMEM;
181
182	res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
183	kfree(send_buf);
184	return res;
185}
186
187static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name,
188				  const void *value, size_t size, int flags)
189{
190	int err = 0;
191	struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
192
193	if (!lower_dentry) {
194		err = -EOPNOTSUPP;
195		goto out;
196	}
197	err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags);
198out:
199	dput(lower_dentry);
200	return err;
201}
202
203static int hmdfs_xattr_set(const struct xattr_handler *handler,
204			   struct dentry *dentry, struct inode *inode,
205			   const char *name, const void *value,
206			   size_t size, int flags)
207{
208	struct hmdfs_inode_info *info = hmdfs_i(inode);
209
210	if (!hmdfs_support_xattr(dentry))
211		return -EOPNOTSUPP;
212
213	if (size > HMDFS_XATTR_SIZE_MAX) {
214		hmdfs_info("no support too long xattr value: %zu", size);
215		return -E2BIG;
216	}
217
218	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
219		return hmdfs_xattr_local_set(dentry, name, value, size, flags);
220	else if (info->inode_type == HMDFS_LAYER_OTHER_REMOTE)
221		return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
222	else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE ||
223		 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD)
224		return hmdfs_xattr_merge_set(dentry, name, value, size, flags);
225
226	return -EOPNOTSUPP;
227}
228
229const struct xattr_handler hmdfs_xattr_handler = {
230	.prefix = "", /* catch all */
231	.get = hmdfs_xattr_get,
232	.set = hmdfs_xattr_set,
233};
234
235static const struct xattr_handler *hmdfs_xattr_handlers[] = {
236	&hmdfs_xattr_handler,
237};
238
239#define HMDFS_NODE_EVT_CB_DELAY 2
240
241struct kmem_cache *hmdfs_inode_cachep;
242struct kmem_cache *hmdfs_dentry_cachep;
243
244static void i_callback(struct rcu_head *head)
245{
246	struct inode *inode = container_of(head, struct inode, i_rcu);
247
248	kmem_cache_free(hmdfs_inode_cachep,
249			container_of(inode, struct hmdfs_inode_info,
250				     vfs_inode));
251}
252
253static void hmdfs_destroy_inode(struct inode *inode)
254{
255	call_rcu(&inode->i_rcu, i_callback);
256}
257
258static void hmdfs_evict_inode(struct inode *inode)
259{
260	struct hmdfs_inode_info *info = hmdfs_i(inode);
261
262	truncate_inode_pages(&inode->i_data, 0);
263	clear_inode(inode);
264	if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
265	    info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
266		return;
267	if (info->inode_type == HMDFS_LAYER_ZERO ||
268	    info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
269	    info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
270		iput(info->lower_inode);
271		info->lower_inode = NULL;
272	}
273}
274
275void hmdfs_put_super(struct super_block *sb)
276{
277	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
278	struct super_block *lower_sb = sbi->lower_sb;
279
280	hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
281		   sbi->local_src);
282
283	hmdfs_cfn_destroy(sbi);
284	hmdfs_unregister_sysfs(sbi);
285	hmdfs_connections_stop(sbi);
286	hmdfs_clear_share_table(sbi);
287	hmdfs_destroy_server_writeback(sbi);
288	hmdfs_exit_stash(sbi);
289	atomic_dec(&lower_sb->s_active);
290	put_cred(sbi->cred);
291	if (sbi->system_cred)
292		put_cred(sbi->system_cred);
293	hmdfs_destroy_writeback(sbi);
294	kfree(sbi->local_src);
295	kfree(sbi->local_dst);
296	kfree(sbi->real_dst);
297	kfree(sbi->cache_dir);
298	kfree(sbi->cloud_dir);
299	kfifo_free(&sbi->notify_fifo);
300	sb->s_fs_info = NULL;
301	sbi->lower_sb = NULL;
302	hmdfs_release_sysfs(sbi);
303	/* After all access are completed */
304	hmdfs_free_sb_seq(sbi->seq);
305	kfree(sbi->s_server_statis);
306	kfree(sbi->s_client_statis);
307	kfree(sbi);
308}
309
310static struct inode *hmdfs_alloc_inode(struct super_block *sb)
311{
312	struct hmdfs_inode_info *gi =
313		kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
314	if (!gi)
315		return NULL;
316	memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
317	INIT_LIST_HEAD(&gi->wb_list);
318	init_rwsem(&gi->wpage_sem);
319	gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
320	atomic64_set(&gi->write_counter, 0);
321	gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
322	spin_lock_init(&gi->fid_lock);
323	INIT_LIST_HEAD(&gi->wr_opened_node);
324	atomic_set(&gi->wr_opened_cnt, 0);
325	init_waitqueue_head(&gi->fid_wq);
326	INIT_LIST_HEAD(&gi->stash_node);
327	spin_lock_init(&gi->stash_lock);
328	return &gi->vfs_inode;
329}
330
331static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
332{
333	int error = 0;
334	int ret = 0;
335	char *dir_path = NULL;
336	char *name_path = NULL;
337	struct hmdfs_peer *con = NULL;
338	struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
339
340	dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
341	if (!dir_path) {
342		error = -EACCES;
343		goto rmdir_out;
344	}
345
346	name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
347	if (!name_path) {
348		error = -EACCES;
349		goto rmdir_out;
350	}
351	mutex_lock(&sbi->connections.node_lock);
352	list_for_each_entry(con, &sbi->connections.node_list, list) {
353		if (con->status == NODE_STAT_ONLINE) {
354			peer_get(con);
355			mutex_unlock(&sbi->connections.node_lock);
356			hmdfs_debug("send MSG to remote devID %llu",
357				    con->device_id);
358			ret = hmdfs_send_statfs(con, name_path, buf);
359			if (ret != 0)
360				error = ret;
361			peer_put(con);
362			mutex_lock(&sbi->connections.node_lock);
363		}
364	}
365	mutex_unlock(&sbi->connections.node_lock);
366
367rmdir_out:
368	kfree(dir_path);
369	kfree(name_path);
370	return error;
371}
372
373static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
374{
375	int err = 0;
376	struct path lower_path;
377	struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
378	struct super_block *sb = d_inode(dentry)->i_sb;
379	struct hmdfs_sb_info *sbi = sb->s_fs_info;
380
381	trace_hmdfs_statfs(dentry, info->inode_type);
382	// merge_view & merge_view/xxx & device_view assigned src_inode info
383	if (hmdfs_i_merge(info) ||
384	    (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
385		err = kern_path(sbi->local_src, 0, &lower_path);
386		if (err)
387			goto out;
388		err = vfs_statfs(&lower_path, buf);
389		path_put(&lower_path);
390	} else if (!IS_ERR_OR_NULL(info->lower_inode)) {
391		hmdfs_get_lower_path(dentry, &lower_path);
392		err = vfs_statfs(&lower_path, buf);
393		hmdfs_put_lower_path(&lower_path);
394	} else {
395		err = hmdfs_remote_statfs(dentry, buf);
396	}
397
398	buf->f_type = HMDFS_SUPER_MAGIC;
399out:
400	return err;
401}
402
403static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
404{
405	struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
406
407	if (sbi->s_case_sensitive)
408		seq_puts(m, ",sensitive");
409	else
410		seq_puts(m, ",insensitive");
411
412	if (sbi->s_merge_switch)
413		seq_puts(m, ",merge_enable");
414	else
415		seq_puts(m, ",merge_disable");
416
417	seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
418	seq_printf(m, ",user_id=%u", sbi->user_id);
419
420	if (sbi->cache_dir)
421		seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
422	if (sbi->real_dst)
423		seq_printf(m, ",real_dst=%s", sbi->real_dst);
424	if (sbi->cloud_dir)
425		seq_printf(m, ",cloud_dir=%s", sbi->cloud_dir);
426
427	seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
428	seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
429
430	return 0;
431}
432
433static int hmdfs_sync_fs(struct super_block *sb, int wait)
434{
435	int time_left;
436	int err = 0;
437	struct hmdfs_peer *con = NULL;
438	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
439	int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
440	struct syncfs_item item, *entry = NULL, *tmp = NULL;
441
442	if (!wait)
443		return 0;
444
445	trace_hmdfs_syncfs_enter(sbi);
446
447	spin_lock(&sbi->hsi.list_lock);
448	if (!sbi->hsi.is_executing) {
449		sbi->hsi.is_executing = true;
450		item.need_abort = false;
451		spin_unlock(&sbi->hsi.list_lock);
452	} else {
453		init_completion(&item.done);
454		list_add_tail(&item.list, &sbi->hsi.wait_list);
455		spin_unlock(&sbi->hsi.list_lock);
456		wait_for_completion(&item.done);
457	}
458
459	if (item.need_abort)
460		goto out;
461
462	/*
463	 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
464	 * sure all remote syncfs calls return back or timeout by waiting,
465	 * during the waiting period we must protect @sbi->remote_syncfs_count
466	 * and @sbi->remote_syncfs_ret from concurrent executing.
467	 */
468
469	spin_lock(&sbi->hsi.v_lock);
470	sbi->hsi.version++;
471	/*
472	 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
473	 * into spinlock protection area to avoid following scenario caused
474	 * by out-of-order execution:
475	 *
476	 *            synfs                                  syncfs_cb
477	 *  sbi->hsi.remote_ret = 0;
478	 *  atomic_set(&sbi->hsi.wait_count, 0);
479	 *                                               lock
480	 *                                               version == old_version
481	 *                                 sbi->hsi.remote_ret = resp->ret_code
482	 *                                 atomic_dec(&sbi->hsi.wait_count);
483	 *                                               unlock
484	 *         lock
485	 *         version = old_version + 1
486	 *         unlock
487	 *
488	 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
489	 * before spin lock which may compete with syncfs_cb(), making
490	 * these two values' assignment protected by spinlock can fix this.
491	 */
492	sbi->hsi.remote_ret = 0;
493	atomic_set(&sbi->hsi.wait_count, 0);
494	spin_unlock(&sbi->hsi.v_lock);
495
496	mutex_lock(&sbi->connections.node_lock);
497	list_for_each_entry(con, &sbi->connections.node_list, list) {
498		/*
499		 * Dirty data does not need to be synchronized to remote
500		 * devices that go offline normally. It's okay to drop
501		 * them.
502		 */
503		if (con->status != NODE_STAT_ONLINE)
504			continue;
505
506		peer_get(con);
507		mutex_unlock(&sbi->connections.node_lock);
508
509		/*
510		 * There exists a gap between sync_inodes_sb() and sync_fs()
511		 * which may race with remote writing, leading error count
512		 * on @sb_dirty_count. The dirty data produced during the
513		 * gap period won't be synced in next syncfs operation.
514		 * To avoid this, we have to invoke sync_inodes_sb() again
515		 * after getting @con->sb_dirty_count.
516		 */
517		con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
518		sync_inodes_sb(sb);
519
520		if (!con->old_sb_dirty_count) {
521			peer_put(con);
522			mutex_lock(&sbi->connections.node_lock);
523			continue;
524		}
525
526		err = hmdfs_send_syncfs(con, syncfs_timeout);
527		if (err) {
528			hmdfs_warning("send syncfs failed with %d on node %llu",
529				      err, con->device_id);
530			sbi->hsi.remote_ret = err;
531			peer_put(con);
532			mutex_lock(&sbi->connections.node_lock);
533			continue;
534		}
535
536		atomic_inc(&sbi->hsi.wait_count);
537
538		peer_put(con);
539		mutex_lock(&sbi->connections.node_lock);
540	}
541	mutex_unlock(&sbi->connections.node_lock);
542
543	/*
544	 * Async work in background will make sure @sbi->remote_syncfs_count
545	 * decreased to zero finally whether syncfs success or fail.
546	 */
547	time_left = wait_event_interruptible(
548		sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
549	if (time_left < 0) {
550		hmdfs_warning("syncfs is interrupted by external signal");
551		err = -EINTR;
552	}
553
554	if (!err && sbi->hsi.remote_ret)
555		err = sbi->hsi.remote_ret;
556
557	/* Abandon syncfs processes in pending_list */
558	list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
559		entry->need_abort = true;
560		complete(&entry->done);
561	}
562	INIT_LIST_HEAD(&sbi->hsi.pending_list);
563
564	/* Pick the last syncfs process in wait_list */
565	spin_lock(&sbi->hsi.list_lock);
566	if (list_empty(&sbi->hsi.wait_list)) {
567		sbi->hsi.is_executing = false;
568	} else {
569		entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
570					list);
571		list_del_init(&entry->list);
572		list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
573		entry->need_abort = false;
574		complete(&entry->done);
575	}
576	spin_unlock(&sbi->hsi.list_lock);
577
578out:
579	trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
580				get_cmd_timeout(sbi, F_SYNCFS), err);
581
582	/* TODO: Return synfs err back to syscall */
583
584	return err;
585}
586
587struct super_operations hmdfs_sops = {
588	.alloc_inode = hmdfs_alloc_inode,
589	.destroy_inode = hmdfs_destroy_inode,
590	.evict_inode = hmdfs_evict_inode,
591	.put_super = hmdfs_put_super,
592	.statfs = hmdfs_statfs,
593	.show_options = hmdfs_show_options,
594	.sync_fs = hmdfs_sync_fs,
595};
596
597static void init_once(void *obj)
598{
599	struct hmdfs_inode_info *i = obj;
600
601	inode_init_once(&i->vfs_inode);
602}
603
604static int __init hmdfs_init_caches(void)
605{
606	int err = -ENOMEM;
607
608	hmdfs_inode_cachep =
609		kmem_cache_create("hmdfs_inode_cache",
610				  sizeof(struct hmdfs_inode_info), 0,
611				  SLAB_RECLAIM_ACCOUNT, init_once);
612	if (unlikely(!hmdfs_inode_cachep))
613		goto out;
614	hmdfs_dentry_cachep =
615		kmem_cache_create("hmdfs_dentry_cache",
616				  sizeof(struct hmdfs_dentry_info), 0,
617				  SLAB_RECLAIM_ACCOUNT, NULL);
618	if (unlikely(!hmdfs_dentry_cachep))
619		goto out_des_ino;
620	hmdfs_dentry_merge_cachep =
621		kmem_cache_create("hmdfs_dentry_merge_cache",
622				  sizeof(struct hmdfs_dentry_info_merge), 0,
623				  SLAB_RECLAIM_ACCOUNT, NULL);
624	if (unlikely(!hmdfs_dentry_merge_cachep))
625		goto out_des_dc;
626	return 0;
627
628out_des_dc:
629	kmem_cache_destroy(hmdfs_dentry_cachep);
630out_des_ino:
631	kmem_cache_destroy(hmdfs_inode_cachep);
632out:
633	return err;
634}
635
636static void hmdfs_destroy_caches(void)
637{
638	rcu_barrier();
639	kmem_cache_destroy(hmdfs_inode_cachep);
640	hmdfs_inode_cachep = NULL;
641	kmem_cache_destroy(hmdfs_dentry_cachep);
642	hmdfs_dentry_cachep = NULL;
643	kmem_cache_destroy(hmdfs_dentry_merge_cachep);
644	hmdfs_dentry_merge_cachep = NULL;
645}
646
647uint64_t path_hash(const char *path, int len, bool case_sense)
648{
649	uint64_t res = 0;
650	const char *kp = path;
651	char c;
652	/* Mocklisp hash function. */
653	while (*kp) {
654		c = *kp;
655		if (!case_sense)
656			c = tolower(c);
657		res = (res << 5) - res + (uint64_t)(c);
658		kp++;
659	}
660	return res;
661}
662
663static char *get_full_path(struct path *path)
664{
665	char *buf, *tmp;
666	char *ret = NULL;
667
668	buf = kmalloc(PATH_MAX, GFP_KERNEL);
669	if (!buf)
670		goto out;
671
672	tmp = d_path(path, buf, PATH_MAX);
673	if (IS_ERR(tmp))
674		goto out;
675
676	ret = kstrdup(tmp, GFP_KERNEL);
677out:
678	kfree(buf);
679	return ret;
680}
681
682static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
683{
684	memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
685
686	set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
687	set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
688	set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
689	set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
690	set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
691	set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
692	set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
693	set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
694	set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
695	set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
696	set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
697	set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
698	set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
699	set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
700	set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
701	set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
702	set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
703	set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
704	set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
705	set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
706}
707
708static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
709{
710	int ret;
711
712	ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
713	if (ret)
714		goto out;
715
716	/*
717	 * We have to use dynamic memory since struct server/client_statistic
718	 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
719	 */
720	sbi->s_server_statis =
721		kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
722	sbi->s_client_statis =
723		kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
724	if (!sbi->s_server_statis || !sbi->s_client_statis) {
725		ret = -ENOMEM;
726		goto out;
727	}
728
729	ret = hmdfs_alloc_sb_seq();
730	if (ret < 0) {
731		hmdfs_err("no sb seq available err %d", ret);
732		goto out;
733	}
734	sbi->seq = ret;
735	ret = 0;
736
737	spin_lock_init(&sbi->notify_fifo_lock);
738	mutex_init(&sbi->cmd_handler_mutex);
739	sbi->s_case_sensitive = false;
740	sbi->s_features = HMDFS_FEATURE_READPAGES |
741			  HMDFS_FEATURE_READPAGES_OPEN |
742			  HMDFS_ATOMIC_OPEN;
743	sbi->s_merge_switch = false;
744	sbi->s_cloud_disk_switch = false;
745	sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
746	sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
747	sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
748	sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
749	hmdfs_init_cmd_timeout(sbi);
750	sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
751	sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
752	sbi->s_offline_stash = true;
753	sbi->s_dentry_cache = true;
754	sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
755	sbi->s_readpages_nr = HMDFS_READPAGES_NR_DEF;
756	/* Initialize before hmdfs_register_sysfs() */
757	atomic_set(&sbi->connections.conn_seq, 0);
758	mutex_init(&sbi->connections.node_lock);
759	INIT_LIST_HEAD(&sbi->connections.node_list);
760
761	ret = hmdfs_init_share_table(sbi);
762	if (ret)
763		goto out;
764	init_waitqueue_head(&sbi->async_readdir_wq);
765	INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
766	INIT_LIST_HEAD(&sbi->async_readdir_work_list);
767	spin_lock_init(&sbi->async_readdir_msg_lock);
768	spin_lock_init(&sbi->async_readdir_work_lock);
769
770	return 0;
771
772out:
773	return ret;
774}
775
776void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
777			      enum hmdfs_resp_type type, unsigned long start,
778			      unsigned long end)
779{
780	unsigned long duration;
781
782	switch (type) {
783	case HMDFS_RESP_DELAY:
784		sbi->s_client_statis[cmd].delay_resp_cnt++;
785		break;
786	case HMDFS_RESP_TIMEOUT:
787		sbi->s_client_statis[cmd].timeout_cnt++;
788		break;
789	case HMDFS_RESP_NORMAL:
790		duration = end - start;
791		sbi->s_client_statis[cmd].total += duration;
792		sbi->s_client_statis[cmd].resp_cnt++;
793		if (sbi->s_client_statis[cmd].max < duration)
794			sbi->s_client_statis[cmd].max = duration;
795		break;
796	default:
797		hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
798	}
799}
800
801static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
802{
803	int err = 0;
804	const char *path_local = UPDATE_LOCAL_DST;
805	int len = 0;
806
807	sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
808	if (!sbi->real_dst) {
809		err = -ENOMEM;
810		goto out_err;
811	}
812	kfree(sbi->local_dst);
813	sbi->local_dst = NULL;
814
815	len = strlen(sbi->real_dst) + strlen(path_local) + 1;
816	if (len > PATH_MAX) {
817		err = -EINVAL;
818		goto out_err;
819	}
820	sbi->local_dst = kmalloc(len, GFP_KERNEL);
821	if (!sbi->local_dst) {
822		err = -ENOMEM;
823		goto out_err;
824	}
825	snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
826		 "%s%s", sbi->real_dst, path_local);
827out_err:
828	return err;
829}
830
831/*
832 * Generate boot cookie like following format:
833 *
834 * | random |   boot time(ms) |  0x00 |
835 * |--------|-----------------|-------|
836 *     16            33          15    (bits)
837 *
838 * This will make sure boot cookie is unique in a period
839 * 2^33 / 1000 / 3600 / 24 = 99.4(days).
840 */
841uint64_t hmdfs_gen_boot_cookie(void)
842{
843	uint64_t now;
844	uint16_t rand;
845
846	now = ktime_to_ms(ktime_get());
847	prandom_bytes(&rand, sizeof(rand));
848
849	now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
850	now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
851
852	return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
853}
854
855static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
856{
857	struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
858	const char *dev_name = priv->dev_name;
859	const char *raw_data = priv->raw_data;
860	struct hmdfs_sb_info *sbi;
861	int err = 0;
862	struct inode *root_inode;
863	struct path lower_path;
864	struct super_block *lower_sb;
865	struct dentry *root_dentry;
866	char ctrl_path[CTRL_PATH_MAX_LEN];
867	uint64_t ctrl_hash;
868
869	if (!raw_data)
870		return -EINVAL;
871
872	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
873	if (!sbi) {
874		err = -ENOMEM;
875		goto out_err;
876	}
877	err = hmdfs_init_sbi(sbi);
878	if (err)
879		goto out_freesbi;
880	sbi->sb = sb;
881	err = hmdfs_parse_options(sbi, raw_data);
882	if (err)
883		goto out_freesbi;
884
885	sb->s_fs_info = sbi;
886	sb->s_magic = HMDFS_SUPER_MAGIC;
887	sb->s_xattr = hmdfs_xattr_handlers;
888	sb->s_op = &hmdfs_sops;
889
890	sbi->boot_cookie = hmdfs_gen_boot_cookie();
891
892	err = hmdfs_init_writeback(sbi);
893	if (err)
894		goto out_freesbi;
895	err = hmdfs_init_server_writeback(sbi);
896	if (err)
897		goto out_freesbi;
898
899	err = hmdfs_init_stash(sbi);
900	if (err)
901		goto out_freesbi;
902
903	// add ctrl sysfs node
904	ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
905	scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
906	hmdfs_debug("hash %llu", ctrl_hash);
907	err = hmdfs_register_sysfs(ctrl_path, sbi);
908	if (err)
909		goto out_freesbi;
910
911	err = hmdfs_update_dst(sbi);
912	if (err)
913		goto out_unreg_sysfs;
914
915	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
916			&lower_path);
917	if (err) {
918		hmdfs_err("open dev failed, errno = %d", err);
919		goto out_unreg_sysfs;
920	}
921
922	lower_sb = lower_path.dentry->d_sb;
923	atomic_inc(&lower_sb->s_active);
924	sbi->lower_sb = lower_sb;
925	sbi->local_src = get_full_path(&lower_path);
926	if (!sbi->local_src) {
927		hmdfs_err("get local_src failed!");
928		goto out_sput;
929	}
930
931	sb->s_time_gran = lower_sb->s_time_gran;
932	sb->s_maxbytes = lower_sb->s_maxbytes;
933	sb->s_stack_depth = lower_sb->s_stack_depth + 1;
934	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
935		hmdfs_err("maximum fs stacking depth exceeded");
936		err = -EINVAL;
937		goto out_sput;
938	}
939	root_inode = fill_root_inode(sb, sbi, d_inode(lower_path.dentry));
940	if (IS_ERR(root_inode)) {
941		err = PTR_ERR(root_inode);
942		goto out_sput;
943	}
944	hmdfs_root_inode_perm_init(root_inode);
945	sb->s_root = root_dentry = d_make_root(root_inode);
946	if (!root_dentry) {
947		err = -ENOMEM;
948		goto out_sput;
949	}
950	if (sbi->s_cloud_disk_switch)
951		err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_SECOND_LOCAL);
952	else
953		err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
954	if (err)
955		goto out_freeroot;
956	hmdfs_set_lower_path(root_dentry, &lower_path);
957	sbi->cred = get_cred(current_cred());
958	INIT_LIST_HEAD(&sbi->client_cache);
959	INIT_LIST_HEAD(&sbi->server_cache);
960	INIT_LIST_HEAD(&sbi->to_delete);
961	mutex_init(&sbi->cache_list_lock);
962	hmdfs_cfn_load(sbi);
963
964	/* Initialize syncfs info */
965	spin_lock_init(&sbi->hsi.v_lock);
966	init_waitqueue_head(&sbi->hsi.wq);
967	sbi->hsi.version = 0;
968	sbi->hsi.is_executing = false;
969	INIT_LIST_HEAD(&sbi->hsi.wait_list);
970	INIT_LIST_HEAD(&sbi->hsi.pending_list);
971	spin_lock_init(&sbi->hsi.list_lock);
972
973	return err;
974out_freeroot:
975	dput(sb->s_root);
976	sb->s_root = NULL;
977out_sput:
978	atomic_dec(&lower_sb->s_active);
979	path_put(&lower_path);
980out_unreg_sysfs:
981	hmdfs_unregister_sysfs(sbi);
982	hmdfs_release_sysfs(sbi);
983out_freesbi:
984	if (sbi) {
985		sb->s_fs_info = NULL;
986		hmdfs_clear_share_table(sbi);
987		hmdfs_exit_stash(sbi);
988		hmdfs_destroy_writeback(sbi);
989		hmdfs_destroy_server_writeback(sbi);
990		kfifo_free(&sbi->notify_fifo);
991		hmdfs_free_sb_seq(sbi->seq);
992		kfree(sbi->local_src);
993		kfree(sbi->local_dst);
994		kfree(sbi->real_dst);
995		kfree(sbi->cache_dir);
996		kfree(sbi->cloud_dir);
997		kfree(sbi->s_server_statis);
998		kfree(sbi->s_client_statis);
999		kfree(sbi);
1000	}
1001out_err:
1002	return err;
1003}
1004
1005static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
1006				  const char *dev_name, void *raw_data)
1007{
1008	struct hmdfs_mount_priv priv = {
1009		.dev_name = dev_name,
1010		.raw_data = raw_data,
1011	};
1012
1013	/* hmdfs needs a valid dev_name to get the lower_sb's metadata */
1014	if (!dev_name || !*dev_name)
1015		return ERR_PTR(-EINVAL);
1016	return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
1017}
1018
1019
1020static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
1021{
1022	struct sendmsg_wait_queue *msg_wq = NULL;
1023	struct hmdfs_readdir_work *rw = NULL;
1024	struct hmdfs_readdir_work *tmp = NULL;
1025	struct list_head del_work;
1026
1027	/* cancel work that are not running */
1028
1029	INIT_LIST_HEAD(&del_work);
1030	spin_lock(&sbi->async_readdir_work_lock);
1031	list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
1032		if (cancel_delayed_work(&rw->dwork))
1033			list_move(&rw->head, &del_work);
1034	}
1035	spin_unlock(&sbi->async_readdir_work_lock);
1036
1037	list_for_each_entry_safe(rw, tmp, &del_work, head) {
1038		dput(rw->dentry);
1039		peer_put(rw->con);
1040		kfree(rw);
1041	}
1042
1043	/* wake up async readdir that are waiting for remote */
1044	spin_lock(&sbi->async_readdir_msg_lock);
1045	sbi->async_readdir_prohibit = true;
1046	list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
1047		hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
1048	spin_unlock(&sbi->async_readdir_msg_lock);
1049
1050	/* wait for all async readdir to finish */
1051	if (!list_empty(&sbi->async_readdir_work_list))
1052		wait_event_interruptible_timeout(sbi->async_readdir_wq,
1053			(list_empty(&sbi->async_readdir_work_list)), HZ);
1054
1055	WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1056}
1057
1058static void hmdfs_kill_super(struct super_block *sb)
1059{
1060	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1061
1062	/*
1063	 * async readdir is holding ref for dentry, not for vfsmount. Thus
1064	 * shrink_dcache_for_umount() will warn about dentry still in use
1065	 * if async readdir is not done.
1066	 */
1067	if (sbi)
1068		hmdfs_cancel_async_readdir(sbi);
1069	kill_anon_super(sb);
1070}
1071
1072static struct file_system_type hmdfs_fs_type = {
1073	.owner = THIS_MODULE,
1074	.name = "hmdfs",
1075	.mount = hmdfs_mount,
1076	.kill_sb = hmdfs_kill_super,
1077};
1078
1079static int __init hmdfs_init(void)
1080{
1081	int err = 0;
1082
1083	err = hmdfs_init_caches();
1084	if (err)
1085		goto out_err;
1086
1087	hmdfs_node_evt_cb_init();
1088
1089	hmdfs_stash_add_node_evt_cb();
1090	hmdfs_client_add_node_evt_cb();
1091	hmdfs_server_add_node_evt_cb();
1092
1093	err = register_filesystem(&hmdfs_fs_type);
1094	if (err) {
1095		hmdfs_err("hmdfs register failed!");
1096		goto out_err;
1097	}
1098
1099	err = hmdfs_init_configfs();
1100	if (err)
1101		goto out_err;
1102
1103	err = hmdfs_sysfs_init();
1104	if (err)
1105		goto out_err;
1106
1107	hmdfs_message_verify_init();
1108	return 0;
1109out_err:
1110	hmdfs_sysfs_exit();
1111	hmdfs_exit_configfs();
1112	unregister_filesystem(&hmdfs_fs_type);
1113	hmdfs_destroy_caches();
1114	hmdfs_err("hmdfs init failed!");
1115	return err;
1116}
1117
1118static void __exit hmdfs_exit(void)
1119{
1120	hmdfs_sysfs_exit();
1121	hmdfs_exit_configfs();
1122	unregister_filesystem(&hmdfs_fs_type);
1123	ida_destroy(&hmdfs_sb_seq);
1124	hmdfs_destroy_caches();
1125	hmdfs_info("hmdfs exited!");
1126}
1127
1128module_init(hmdfs_init);
1129module_exit(hmdfs_exit);
1130
1131EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1132
1133MODULE_LICENSE("GPL v2");
1134MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1135MODULE_DESCRIPTION("Harmony distributed file system");
1136