1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/hmdfs/main.c
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8
9 #include "hmdfs.h"
10
11 #include <linux/ctype.h>
12 #include <linux/module.h>
13 #include <linux/statfs.h>
14 #include <linux/xattr.h>
15 #include <linux/idr.h>
16 #if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17 #include <linux/prandom.h>
18 #else
19 #include <linux/random.h>
20 #endif
21
22 #include "authority/authentication.h"
23 #include "hmdfs_server.h"
24 #include "comm/device_node.h"
25 #include "comm/message_verify.h"
26 #include "comm/protocol.h"
27 #include "comm/socket_adapter.h"
28 #include "hmdfs_merge_view.h"
29 #include "server_writeback.h"
30 #include "hmdfs_share.h"
31
32 #include "comm/node_cb.h"
33 #include "stash.h"
34
35 #define CREATE_TRACE_POINTS
36 #include "hmdfs_trace.h"
37
38 #define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
39
40 #define HMDFS_SB_SEQ_FROM 1
41
42 struct hmdfs_mount_priv {
43 const char *dev_name;
44 const char *raw_data;
45 };
46
47 struct syncfs_item {
48 struct list_head list;
49 struct completion done;
50 bool need_abort;
51 };
52
53 static DEFINE_IDA(hmdfs_sb_seq);
54
hmdfs_alloc_sb_seq(void)55 static inline int hmdfs_alloc_sb_seq(void)
56 {
57 return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
58 }
59
hmdfs_free_sb_seq(unsigned int seq)60 static inline void hmdfs_free_sb_seq(unsigned int seq)
61 {
62 if (!seq)
63 return;
64 ida_simple_remove(&hmdfs_sb_seq, seq);
65 }
66
hmdfs_xattr_local_get(struct dentry *dentry, const char *name, void *value, size_t size)67 static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
68 void *value, size_t size)
69 {
70 struct path lower_path;
71 ssize_t res = 0;
72
73 hmdfs_get_lower_path(dentry, &lower_path);
74 res = vfs_getxattr(lower_path.dentry, name, value, size);
75 hmdfs_put_lower_path(&lower_path);
76 return res;
77 }
78
hmdfs_xattr_remote_get(struct dentry *dentry, const char *name, void *value, size_t size)79 static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
80 void *value, size_t size)
81 {
82 struct inode *inode = d_inode(dentry);
83 struct hmdfs_inode_info *info = hmdfs_i(inode);
84 struct hmdfs_peer *conn = info->conn;
85 char *send_buf = NULL;
86 ssize_t res = 0;
87
88 send_buf = hmdfs_get_dentry_relative_path(dentry);
89 if (!send_buf)
90 return -ENOMEM;
91
92 res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
93 kfree(send_buf);
94 return res;
95 }
96
hmdfs_xattr_merge_get(struct dentry *dentry, const char *name, void *value, size_t size)97 static int hmdfs_xattr_merge_get(struct dentry *dentry, const char *name,
98 void *value, size_t size)
99 {
100 int err = 0;
101 struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
102
103 if (!lower_dentry) {
104 err = -EOPNOTSUPP;
105 goto out;
106 }
107 err = hmdfs_xattr_local_get(lower_dentry, name, value, size);
108 out:
109 dput(lower_dentry);
110 return err;
111 }
112
hmdfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size)113 static int hmdfs_xattr_get(const struct xattr_handler *handler,
114 struct dentry *dentry, struct inode *inode,
115 const char *name, void *value, size_t size)
116 {
117 int res = 0;
118 struct hmdfs_inode_info *info = hmdfs_i(inode);
119 size_t r_size = size;
120
121 if (!hmdfs_support_xattr(dentry))
122 return -EOPNOTSUPP;
123
124 if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
125 return -EOPNOTSUPP;
126
127 if (size > HMDFS_XATTR_SIZE_MAX)
128 r_size = HMDFS_XATTR_SIZE_MAX;
129
130 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
131 res = hmdfs_xattr_local_get(dentry, name, value, r_size);
132 else if (info->inode_type == HMDFS_LAYER_OTHER_REMOTE)
133 res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
134 else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE ||
135 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD)
136 res = hmdfs_xattr_merge_get(dentry, name, value, r_size);
137 else
138 res = -EOPNOTSUPP;
139
140 if (res == -ERANGE && r_size != size) {
141 hmdfs_info("no support xattr value size over than: %d",
142 HMDFS_XATTR_SIZE_MAX);
143 res = -E2BIG;
144 }
145
146 return res;
147 }
148
hmdfs_xattr_local_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)149 static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
150 const void *value, size_t size, int flags)
151 {
152 struct path lower_path;
153 int res = 0;
154
155 hmdfs_get_lower_path(dentry, &lower_path);
156 kuid_t tmp_uid = hmdfs_override_inode_uid(d_inode(lower_path.dentry));
157 if (value) {
158 res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
159 } else {
160 WARN_ON(flags != XATTR_REPLACE);
161 res = vfs_removexattr(lower_path.dentry, name);
162 }
163 hmdfs_revert_inode_uid(d_inode(lower_path.dentry), tmp_uid);
164
165 hmdfs_put_lower_path(&lower_path);
166 return res;
167 }
168
hmdfs_xattr_remote_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)169 static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
170 const void *value, size_t size, int flags)
171 {
172 struct inode *inode = d_inode(dentry);
173 struct hmdfs_inode_info *info = hmdfs_i(inode);
174 struct hmdfs_peer *conn = info->conn;
175 char *send_buf = NULL;
176 int res = 0;
177
178 send_buf = hmdfs_get_dentry_relative_path(dentry);
179 if (!send_buf)
180 return -ENOMEM;
181
182 res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
183 kfree(send_buf);
184 return res;
185 }
186
hmdfs_xattr_merge_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)187 static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name,
188 const void *value, size_t size, int flags)
189 {
190 int err = 0;
191 struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
192
193 if (!lower_dentry) {
194 err = -EOPNOTSUPP;
195 goto out;
196 }
197 err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags);
198 out:
199 dput(lower_dentry);
200 return err;
201 }
202
hmdfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags)203 static int hmdfs_xattr_set(const struct xattr_handler *handler,
204 struct dentry *dentry, struct inode *inode,
205 const char *name, const void *value,
206 size_t size, int flags)
207 {
208 struct hmdfs_inode_info *info = hmdfs_i(inode);
209
210 if (!hmdfs_support_xattr(dentry))
211 return -EOPNOTSUPP;
212
213 if (size > HMDFS_XATTR_SIZE_MAX) {
214 hmdfs_info("no support too long xattr value: %zu", size);
215 return -E2BIG;
216 }
217
218 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
219 return hmdfs_xattr_local_set(dentry, name, value, size, flags);
220 else if (info->inode_type == HMDFS_LAYER_OTHER_REMOTE)
221 return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
222 else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE ||
223 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD)
224 return hmdfs_xattr_merge_set(dentry, name, value, size, flags);
225
226 return -EOPNOTSUPP;
227 }
228
229 const struct xattr_handler hmdfs_xattr_handler = {
230 .prefix = "", /* catch all */
231 .get = hmdfs_xattr_get,
232 .set = hmdfs_xattr_set,
233 };
234
235 static const struct xattr_handler *hmdfs_xattr_handlers[] = {
236 &hmdfs_xattr_handler,
237 };
238
239 #define HMDFS_NODE_EVT_CB_DELAY 2
240
241 struct kmem_cache *hmdfs_inode_cachep;
242 struct kmem_cache *hmdfs_dentry_cachep;
243
i_callback(struct rcu_head *head)244 static void i_callback(struct rcu_head *head)
245 {
246 struct inode *inode = container_of(head, struct inode, i_rcu);
247
248 kmem_cache_free(hmdfs_inode_cachep,
249 container_of(inode, struct hmdfs_inode_info,
250 vfs_inode));
251 }
252
hmdfs_destroy_inode(struct inode *inode)253 static void hmdfs_destroy_inode(struct inode *inode)
254 {
255 call_rcu(&inode->i_rcu, i_callback);
256 }
257
hmdfs_evict_inode(struct inode *inode)258 static void hmdfs_evict_inode(struct inode *inode)
259 {
260 struct hmdfs_inode_info *info = hmdfs_i(inode);
261
262 truncate_inode_pages(&inode->i_data, 0);
263 clear_inode(inode);
264 if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
265 info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
266 return;
267 if (info->inode_type == HMDFS_LAYER_ZERO ||
268 info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
269 info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
270 iput(info->lower_inode);
271 info->lower_inode = NULL;
272 }
273 }
274
hmdfs_put_super(struct super_block *sb)275 void hmdfs_put_super(struct super_block *sb)
276 {
277 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
278 struct super_block *lower_sb = sbi->lower_sb;
279
280 hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
281 sbi->local_src);
282
283 hmdfs_cfn_destroy(sbi);
284 hmdfs_unregister_sysfs(sbi);
285 hmdfs_connections_stop(sbi);
286 hmdfs_clear_share_table(sbi);
287 hmdfs_destroy_server_writeback(sbi);
288 hmdfs_exit_stash(sbi);
289 atomic_dec(&lower_sb->s_active);
290 put_cred(sbi->cred);
291 if (sbi->system_cred)
292 put_cred(sbi->system_cred);
293 hmdfs_destroy_writeback(sbi);
294 kfree(sbi->local_src);
295 kfree(sbi->local_dst);
296 kfree(sbi->real_dst);
297 kfree(sbi->cache_dir);
298 kfree(sbi->cloud_dir);
299 kfifo_free(&sbi->notify_fifo);
300 sb->s_fs_info = NULL;
301 sbi->lower_sb = NULL;
302 hmdfs_release_sysfs(sbi);
303 /* After all access are completed */
304 hmdfs_free_sb_seq(sbi->seq);
305 kfree(sbi->s_server_statis);
306 kfree(sbi->s_client_statis);
307 kfree(sbi);
308 }
309
hmdfs_alloc_inode(struct super_block *sb)310 static struct inode *hmdfs_alloc_inode(struct super_block *sb)
311 {
312 struct hmdfs_inode_info *gi =
313 kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
314 if (!gi)
315 return NULL;
316 memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
317 INIT_LIST_HEAD(&gi->wb_list);
318 init_rwsem(&gi->wpage_sem);
319 gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
320 atomic64_set(&gi->write_counter, 0);
321 gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
322 spin_lock_init(&gi->fid_lock);
323 INIT_LIST_HEAD(&gi->wr_opened_node);
324 atomic_set(&gi->wr_opened_cnt, 0);
325 init_waitqueue_head(&gi->fid_wq);
326 INIT_LIST_HEAD(&gi->stash_node);
327 spin_lock_init(&gi->stash_lock);
328 return &gi->vfs_inode;
329 }
330
hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)331 static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
332 {
333 int error = 0;
334 int ret = 0;
335 char *dir_path = NULL;
336 char *name_path = NULL;
337 struct hmdfs_peer *con = NULL;
338 struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
339
340 dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
341 if (!dir_path) {
342 error = -EACCES;
343 goto rmdir_out;
344 }
345
346 name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
347 if (!name_path) {
348 error = -EACCES;
349 goto rmdir_out;
350 }
351 mutex_lock(&sbi->connections.node_lock);
352 list_for_each_entry(con, &sbi->connections.node_list, list) {
353 if (con->status == NODE_STAT_ONLINE) {
354 peer_get(con);
355 mutex_unlock(&sbi->connections.node_lock);
356 hmdfs_debug("send MSG to remote devID %llu",
357 con->device_id);
358 ret = hmdfs_send_statfs(con, name_path, buf);
359 if (ret != 0)
360 error = ret;
361 peer_put(con);
362 mutex_lock(&sbi->connections.node_lock);
363 }
364 }
365 mutex_unlock(&sbi->connections.node_lock);
366
367 rmdir_out:
368 kfree(dir_path);
369 kfree(name_path);
370 return error;
371 }
372
hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)373 static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
374 {
375 int err = 0;
376 struct path lower_path;
377 struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
378 struct super_block *sb = d_inode(dentry)->i_sb;
379 struct hmdfs_sb_info *sbi = sb->s_fs_info;
380
381 trace_hmdfs_statfs(dentry, info->inode_type);
382 // merge_view & merge_view/xxx & device_view assigned src_inode info
383 if (hmdfs_i_merge(info) ||
384 (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
385 err = kern_path(sbi->local_src, 0, &lower_path);
386 if (err)
387 goto out;
388 err = vfs_statfs(&lower_path, buf);
389 path_put(&lower_path);
390 } else if (!IS_ERR_OR_NULL(info->lower_inode)) {
391 hmdfs_get_lower_path(dentry, &lower_path);
392 err = vfs_statfs(&lower_path, buf);
393 hmdfs_put_lower_path(&lower_path);
394 } else {
395 err = hmdfs_remote_statfs(dentry, buf);
396 }
397
398 buf->f_type = HMDFS_SUPER_MAGIC;
399 out:
400 return err;
401 }
402
hmdfs_show_options(struct seq_file *m, struct dentry *root)403 static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
404 {
405 struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
406
407 if (sbi->s_case_sensitive)
408 seq_puts(m, ",sensitive");
409 else
410 seq_puts(m, ",insensitive");
411
412 if (sbi->s_merge_switch)
413 seq_puts(m, ",merge_enable");
414 else
415 seq_puts(m, ",merge_disable");
416
417 seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
418 seq_printf(m, ",user_id=%u", sbi->user_id);
419
420 if (sbi->cache_dir)
421 seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
422 if (sbi->real_dst)
423 seq_printf(m, ",real_dst=%s", sbi->real_dst);
424 if (sbi->cloud_dir)
425 seq_printf(m, ",cloud_dir=%s", sbi->cloud_dir);
426
427 seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
428 seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
429
430 return 0;
431 }
432
hmdfs_sync_fs(struct super_block *sb, int wait)433 static int hmdfs_sync_fs(struct super_block *sb, int wait)
434 {
435 int time_left;
436 int err = 0;
437 struct hmdfs_peer *con = NULL;
438 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
439 int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
440 struct syncfs_item item, *entry = NULL, *tmp = NULL;
441
442 if (!wait)
443 return 0;
444
445 trace_hmdfs_syncfs_enter(sbi);
446
447 spin_lock(&sbi->hsi.list_lock);
448 if (!sbi->hsi.is_executing) {
449 sbi->hsi.is_executing = true;
450 item.need_abort = false;
451 spin_unlock(&sbi->hsi.list_lock);
452 } else {
453 init_completion(&item.done);
454 list_add_tail(&item.list, &sbi->hsi.wait_list);
455 spin_unlock(&sbi->hsi.list_lock);
456 wait_for_completion(&item.done);
457 }
458
459 if (item.need_abort)
460 goto out;
461
462 /*
463 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
464 * sure all remote syncfs calls return back or timeout by waiting,
465 * during the waiting period we must protect @sbi->remote_syncfs_count
466 * and @sbi->remote_syncfs_ret from concurrent executing.
467 */
468
469 spin_lock(&sbi->hsi.v_lock);
470 sbi->hsi.version++;
471 /*
472 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
473 * into spinlock protection area to avoid following scenario caused
474 * by out-of-order execution:
475 *
476 * synfs syncfs_cb
477 * sbi->hsi.remote_ret = 0;
478 * atomic_set(&sbi->hsi.wait_count, 0);
479 * lock
480 * version == old_version
481 * sbi->hsi.remote_ret = resp->ret_code
482 * atomic_dec(&sbi->hsi.wait_count);
483 * unlock
484 * lock
485 * version = old_version + 1
486 * unlock
487 *
488 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
489 * before spin lock which may compete with syncfs_cb(), making
490 * these two values' assignment protected by spinlock can fix this.
491 */
492 sbi->hsi.remote_ret = 0;
493 atomic_set(&sbi->hsi.wait_count, 0);
494 spin_unlock(&sbi->hsi.v_lock);
495
496 mutex_lock(&sbi->connections.node_lock);
497 list_for_each_entry(con, &sbi->connections.node_list, list) {
498 /*
499 * Dirty data does not need to be synchronized to remote
500 * devices that go offline normally. It's okay to drop
501 * them.
502 */
503 if (con->status != NODE_STAT_ONLINE)
504 continue;
505
506 peer_get(con);
507 mutex_unlock(&sbi->connections.node_lock);
508
509 /*
510 * There exists a gap between sync_inodes_sb() and sync_fs()
511 * which may race with remote writing, leading error count
512 * on @sb_dirty_count. The dirty data produced during the
513 * gap period won't be synced in next syncfs operation.
514 * To avoid this, we have to invoke sync_inodes_sb() again
515 * after getting @con->sb_dirty_count.
516 */
517 con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
518 sync_inodes_sb(sb);
519
520 if (!con->old_sb_dirty_count) {
521 peer_put(con);
522 mutex_lock(&sbi->connections.node_lock);
523 continue;
524 }
525
526 err = hmdfs_send_syncfs(con, syncfs_timeout);
527 if (err) {
528 hmdfs_warning("send syncfs failed with %d on node %llu",
529 err, con->device_id);
530 sbi->hsi.remote_ret = err;
531 peer_put(con);
532 mutex_lock(&sbi->connections.node_lock);
533 continue;
534 }
535
536 atomic_inc(&sbi->hsi.wait_count);
537
538 peer_put(con);
539 mutex_lock(&sbi->connections.node_lock);
540 }
541 mutex_unlock(&sbi->connections.node_lock);
542
543 /*
544 * Async work in background will make sure @sbi->remote_syncfs_count
545 * decreased to zero finally whether syncfs success or fail.
546 */
547 time_left = wait_event_interruptible(
548 sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
549 if (time_left < 0) {
550 hmdfs_warning("syncfs is interrupted by external signal");
551 err = -EINTR;
552 }
553
554 if (!err && sbi->hsi.remote_ret)
555 err = sbi->hsi.remote_ret;
556
557 /* Abandon syncfs processes in pending_list */
558 list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
559 entry->need_abort = true;
560 complete(&entry->done);
561 }
562 INIT_LIST_HEAD(&sbi->hsi.pending_list);
563
564 /* Pick the last syncfs process in wait_list */
565 spin_lock(&sbi->hsi.list_lock);
566 if (list_empty(&sbi->hsi.wait_list)) {
567 sbi->hsi.is_executing = false;
568 } else {
569 entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
570 list);
571 list_del_init(&entry->list);
572 list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
573 entry->need_abort = false;
574 complete(&entry->done);
575 }
576 spin_unlock(&sbi->hsi.list_lock);
577
578 out:
579 trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
580 get_cmd_timeout(sbi, F_SYNCFS), err);
581
582 /* TODO: Return synfs err back to syscall */
583
584 return err;
585 }
586
587 struct super_operations hmdfs_sops = {
588 .alloc_inode = hmdfs_alloc_inode,
589 .destroy_inode = hmdfs_destroy_inode,
590 .evict_inode = hmdfs_evict_inode,
591 .put_super = hmdfs_put_super,
592 .statfs = hmdfs_statfs,
593 .show_options = hmdfs_show_options,
594 .sync_fs = hmdfs_sync_fs,
595 };
596
init_once(void *obj)597 static void init_once(void *obj)
598 {
599 struct hmdfs_inode_info *i = obj;
600
601 inode_init_once(&i->vfs_inode);
602 }
603
hmdfs_init_caches(void)604 static int __init hmdfs_init_caches(void)
605 {
606 int err = -ENOMEM;
607
608 hmdfs_inode_cachep =
609 kmem_cache_create("hmdfs_inode_cache",
610 sizeof(struct hmdfs_inode_info), 0,
611 SLAB_RECLAIM_ACCOUNT, init_once);
612 if (unlikely(!hmdfs_inode_cachep))
613 goto out;
614 hmdfs_dentry_cachep =
615 kmem_cache_create("hmdfs_dentry_cache",
616 sizeof(struct hmdfs_dentry_info), 0,
617 SLAB_RECLAIM_ACCOUNT, NULL);
618 if (unlikely(!hmdfs_dentry_cachep))
619 goto out_des_ino;
620 hmdfs_dentry_merge_cachep =
621 kmem_cache_create("hmdfs_dentry_merge_cache",
622 sizeof(struct hmdfs_dentry_info_merge), 0,
623 SLAB_RECLAIM_ACCOUNT, NULL);
624 if (unlikely(!hmdfs_dentry_merge_cachep))
625 goto out_des_dc;
626 return 0;
627
628 out_des_dc:
629 kmem_cache_destroy(hmdfs_dentry_cachep);
630 out_des_ino:
631 kmem_cache_destroy(hmdfs_inode_cachep);
632 out:
633 return err;
634 }
635
hmdfs_destroy_caches(void)636 static void hmdfs_destroy_caches(void)
637 {
638 rcu_barrier();
639 kmem_cache_destroy(hmdfs_inode_cachep);
640 hmdfs_inode_cachep = NULL;
641 kmem_cache_destroy(hmdfs_dentry_cachep);
642 hmdfs_dentry_cachep = NULL;
643 kmem_cache_destroy(hmdfs_dentry_merge_cachep);
644 hmdfs_dentry_merge_cachep = NULL;
645 }
646
path_hash(const char *path, int len, bool case_sense)647 uint64_t path_hash(const char *path, int len, bool case_sense)
648 {
649 uint64_t res = 0;
650 const char *kp = path;
651 char c;
652 /* Mocklisp hash function. */
653 while (*kp) {
654 c = *kp;
655 if (!case_sense)
656 c = tolower(c);
657 res = (res << 5) - res + (uint64_t)(c);
658 kp++;
659 }
660 return res;
661 }
662
get_full_path(struct path *path)663 static char *get_full_path(struct path *path)
664 {
665 char *buf, *tmp;
666 char *ret = NULL;
667
668 buf = kmalloc(PATH_MAX, GFP_KERNEL);
669 if (!buf)
670 goto out;
671
672 tmp = d_path(path, buf, PATH_MAX);
673 if (IS_ERR(tmp))
674 goto out;
675
676 ret = kstrdup(tmp, GFP_KERNEL);
677 out:
678 kfree(buf);
679 return ret;
680 }
681
hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)682 static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
683 {
684 memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
685
686 set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
687 set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
688 set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
689 set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
690 set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
691 set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
692 set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
693 set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
694 set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
695 set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
696 set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
697 set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
698 set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
699 set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
700 set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
701 set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
702 set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
703 set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
704 set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
705 set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
706 }
707
hmdfs_init_sbi(struct hmdfs_sb_info *sbi)708 static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
709 {
710 int ret;
711
712 ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
713 if (ret)
714 goto out;
715
716 /*
717 * We have to use dynamic memory since struct server/client_statistic
718 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
719 */
720 sbi->s_server_statis =
721 kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
722 sbi->s_client_statis =
723 kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
724 if (!sbi->s_server_statis || !sbi->s_client_statis) {
725 ret = -ENOMEM;
726 goto out;
727 }
728
729 ret = hmdfs_alloc_sb_seq();
730 if (ret < 0) {
731 hmdfs_err("no sb seq available err %d", ret);
732 goto out;
733 }
734 sbi->seq = ret;
735 ret = 0;
736
737 spin_lock_init(&sbi->notify_fifo_lock);
738 mutex_init(&sbi->cmd_handler_mutex);
739 sbi->s_case_sensitive = false;
740 sbi->s_features = HMDFS_FEATURE_READPAGES |
741 HMDFS_FEATURE_READPAGES_OPEN |
742 HMDFS_ATOMIC_OPEN;
743 sbi->s_merge_switch = false;
744 sbi->s_cloud_disk_switch = false;
745 sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
746 sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
747 sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
748 sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
749 hmdfs_init_cmd_timeout(sbi);
750 sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
751 sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
752 sbi->s_offline_stash = true;
753 sbi->s_dentry_cache = true;
754 sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
755 sbi->s_readpages_nr = HMDFS_READPAGES_NR_DEF;
756 /* Initialize before hmdfs_register_sysfs() */
757 atomic_set(&sbi->connections.conn_seq, 0);
758 mutex_init(&sbi->connections.node_lock);
759 INIT_LIST_HEAD(&sbi->connections.node_list);
760
761 ret = hmdfs_init_share_table(sbi);
762 if (ret)
763 goto out;
764 init_waitqueue_head(&sbi->async_readdir_wq);
765 INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
766 INIT_LIST_HEAD(&sbi->async_readdir_work_list);
767 spin_lock_init(&sbi->async_readdir_msg_lock);
768 spin_lock_init(&sbi->async_readdir_work_lock);
769
770 return 0;
771
772 out:
773 return ret;
774 }
775
hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, enum hmdfs_resp_type type, unsigned long start, unsigned long end)776 void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
777 enum hmdfs_resp_type type, unsigned long start,
778 unsigned long end)
779 {
780 unsigned long duration;
781
782 switch (type) {
783 case HMDFS_RESP_DELAY:
784 sbi->s_client_statis[cmd].delay_resp_cnt++;
785 break;
786 case HMDFS_RESP_TIMEOUT:
787 sbi->s_client_statis[cmd].timeout_cnt++;
788 break;
789 case HMDFS_RESP_NORMAL:
790 duration = end - start;
791 sbi->s_client_statis[cmd].total += duration;
792 sbi->s_client_statis[cmd].resp_cnt++;
793 if (sbi->s_client_statis[cmd].max < duration)
794 sbi->s_client_statis[cmd].max = duration;
795 break;
796 default:
797 hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
798 }
799 }
800
hmdfs_update_dst(struct hmdfs_sb_info *sbi)801 static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
802 {
803 int err = 0;
804 const char *path_local = UPDATE_LOCAL_DST;
805 int len = 0;
806
807 sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
808 if (!sbi->real_dst) {
809 err = -ENOMEM;
810 goto out_err;
811 }
812 kfree(sbi->local_dst);
813 sbi->local_dst = NULL;
814
815 len = strlen(sbi->real_dst) + strlen(path_local) + 1;
816 if (len > PATH_MAX) {
817 err = -EINVAL;
818 goto out_err;
819 }
820 sbi->local_dst = kmalloc(len, GFP_KERNEL);
821 if (!sbi->local_dst) {
822 err = -ENOMEM;
823 goto out_err;
824 }
825 snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
826 "%s%s", sbi->real_dst, path_local);
827 out_err:
828 return err;
829 }
830
831 /*
832 * Generate boot cookie like following format:
833 *
834 * | random | boot time(ms) | 0x00 |
835 * |--------|-----------------|-------|
836 * 16 33 15 (bits)
837 *
838 * This will make sure boot cookie is unique in a period
839 * 2^33 / 1000 / 3600 / 24 = 99.4(days).
840 */
hmdfs_gen_boot_cookie(void)841 uint64_t hmdfs_gen_boot_cookie(void)
842 {
843 uint64_t now;
844 uint16_t rand;
845
846 now = ktime_to_ms(ktime_get());
847 prandom_bytes(&rand, sizeof(rand));
848
849 now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
850 now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
851
852 return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
853 }
854
hmdfs_fill_super(struct super_block *sb, void *data, int silent)855 static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
856 {
857 struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
858 const char *dev_name = priv->dev_name;
859 const char *raw_data = priv->raw_data;
860 struct hmdfs_sb_info *sbi;
861 int err = 0;
862 struct inode *root_inode;
863 struct path lower_path;
864 struct super_block *lower_sb;
865 struct dentry *root_dentry;
866 char ctrl_path[CTRL_PATH_MAX_LEN];
867 uint64_t ctrl_hash;
868
869 if (!raw_data)
870 return -EINVAL;
871
872 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
873 if (!sbi) {
874 err = -ENOMEM;
875 goto out_err;
876 }
877 err = hmdfs_init_sbi(sbi);
878 if (err)
879 goto out_freesbi;
880 sbi->sb = sb;
881 err = hmdfs_parse_options(sbi, raw_data);
882 if (err)
883 goto out_freesbi;
884
885 sb->s_fs_info = sbi;
886 sb->s_magic = HMDFS_SUPER_MAGIC;
887 sb->s_xattr = hmdfs_xattr_handlers;
888 sb->s_op = &hmdfs_sops;
889
890 sbi->boot_cookie = hmdfs_gen_boot_cookie();
891
892 err = hmdfs_init_writeback(sbi);
893 if (err)
894 goto out_freesbi;
895 err = hmdfs_init_server_writeback(sbi);
896 if (err)
897 goto out_freesbi;
898
899 err = hmdfs_init_stash(sbi);
900 if (err)
901 goto out_freesbi;
902
903 // add ctrl sysfs node
904 ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
905 scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
906 hmdfs_debug("hash %llu", ctrl_hash);
907 err = hmdfs_register_sysfs(ctrl_path, sbi);
908 if (err)
909 goto out_freesbi;
910
911 err = hmdfs_update_dst(sbi);
912 if (err)
913 goto out_unreg_sysfs;
914
915 err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
916 &lower_path);
917 if (err) {
918 hmdfs_err("open dev failed, errno = %d", err);
919 goto out_unreg_sysfs;
920 }
921
922 lower_sb = lower_path.dentry->d_sb;
923 atomic_inc(&lower_sb->s_active);
924 sbi->lower_sb = lower_sb;
925 sbi->local_src = get_full_path(&lower_path);
926 if (!sbi->local_src) {
927 hmdfs_err("get local_src failed!");
928 goto out_sput;
929 }
930
931 sb->s_time_gran = lower_sb->s_time_gran;
932 sb->s_maxbytes = lower_sb->s_maxbytes;
933 sb->s_stack_depth = lower_sb->s_stack_depth + 1;
934 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
935 hmdfs_err("maximum fs stacking depth exceeded");
936 err = -EINVAL;
937 goto out_sput;
938 }
939 root_inode = fill_root_inode(sb, sbi, d_inode(lower_path.dentry));
940 if (IS_ERR(root_inode)) {
941 err = PTR_ERR(root_inode);
942 goto out_sput;
943 }
944 hmdfs_root_inode_perm_init(root_inode);
945 sb->s_root = root_dentry = d_make_root(root_inode);
946 if (!root_dentry) {
947 err = -ENOMEM;
948 goto out_sput;
949 }
950 if (sbi->s_cloud_disk_switch)
951 err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_SECOND_LOCAL);
952 else
953 err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
954 if (err)
955 goto out_freeroot;
956 hmdfs_set_lower_path(root_dentry, &lower_path);
957 sbi->cred = get_cred(current_cred());
958 INIT_LIST_HEAD(&sbi->client_cache);
959 INIT_LIST_HEAD(&sbi->server_cache);
960 INIT_LIST_HEAD(&sbi->to_delete);
961 mutex_init(&sbi->cache_list_lock);
962 hmdfs_cfn_load(sbi);
963
964 /* Initialize syncfs info */
965 spin_lock_init(&sbi->hsi.v_lock);
966 init_waitqueue_head(&sbi->hsi.wq);
967 sbi->hsi.version = 0;
968 sbi->hsi.is_executing = false;
969 INIT_LIST_HEAD(&sbi->hsi.wait_list);
970 INIT_LIST_HEAD(&sbi->hsi.pending_list);
971 spin_lock_init(&sbi->hsi.list_lock);
972
973 return err;
974 out_freeroot:
975 dput(sb->s_root);
976 sb->s_root = NULL;
977 out_sput:
978 atomic_dec(&lower_sb->s_active);
979 path_put(&lower_path);
980 out_unreg_sysfs:
981 hmdfs_unregister_sysfs(sbi);
982 hmdfs_release_sysfs(sbi);
983 out_freesbi:
984 if (sbi) {
985 sb->s_fs_info = NULL;
986 hmdfs_clear_share_table(sbi);
987 hmdfs_exit_stash(sbi);
988 hmdfs_destroy_writeback(sbi);
989 hmdfs_destroy_server_writeback(sbi);
990 kfifo_free(&sbi->notify_fifo);
991 hmdfs_free_sb_seq(sbi->seq);
992 kfree(sbi->local_src);
993 kfree(sbi->local_dst);
994 kfree(sbi->real_dst);
995 kfree(sbi->cache_dir);
996 kfree(sbi->cloud_dir);
997 kfree(sbi->s_server_statis);
998 kfree(sbi->s_client_statis);
999 kfree(sbi);
1000 }
1001 out_err:
1002 return err;
1003 }
1004
hmdfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data)1005 static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
1006 const char *dev_name, void *raw_data)
1007 {
1008 struct hmdfs_mount_priv priv = {
1009 .dev_name = dev_name,
1010 .raw_data = raw_data,
1011 };
1012
1013 /* hmdfs needs a valid dev_name to get the lower_sb's metadata */
1014 if (!dev_name || !*dev_name)
1015 return ERR_PTR(-EINVAL);
1016 return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
1017 }
1018
1019
hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)1020 static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
1021 {
1022 struct sendmsg_wait_queue *msg_wq = NULL;
1023 struct hmdfs_readdir_work *rw = NULL;
1024 struct hmdfs_readdir_work *tmp = NULL;
1025 struct list_head del_work;
1026
1027 /* cancel work that are not running */
1028
1029 INIT_LIST_HEAD(&del_work);
1030 spin_lock(&sbi->async_readdir_work_lock);
1031 list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
1032 if (cancel_delayed_work(&rw->dwork))
1033 list_move(&rw->head, &del_work);
1034 }
1035 spin_unlock(&sbi->async_readdir_work_lock);
1036
1037 list_for_each_entry_safe(rw, tmp, &del_work, head) {
1038 dput(rw->dentry);
1039 peer_put(rw->con);
1040 kfree(rw);
1041 }
1042
1043 /* wake up async readdir that are waiting for remote */
1044 spin_lock(&sbi->async_readdir_msg_lock);
1045 sbi->async_readdir_prohibit = true;
1046 list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
1047 hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
1048 spin_unlock(&sbi->async_readdir_msg_lock);
1049
1050 /* wait for all async readdir to finish */
1051 if (!list_empty(&sbi->async_readdir_work_list))
1052 wait_event_interruptible_timeout(sbi->async_readdir_wq,
1053 (list_empty(&sbi->async_readdir_work_list)), HZ);
1054
1055 WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1056 }
1057
hmdfs_kill_super(struct super_block *sb)1058 static void hmdfs_kill_super(struct super_block *sb)
1059 {
1060 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1061
1062 /*
1063 * async readdir is holding ref for dentry, not for vfsmount. Thus
1064 * shrink_dcache_for_umount() will warn about dentry still in use
1065 * if async readdir is not done.
1066 */
1067 if (sbi)
1068 hmdfs_cancel_async_readdir(sbi);
1069 kill_anon_super(sb);
1070 }
1071
1072 static struct file_system_type hmdfs_fs_type = {
1073 .owner = THIS_MODULE,
1074 .name = "hmdfs",
1075 .mount = hmdfs_mount,
1076 .kill_sb = hmdfs_kill_super,
1077 };
1078
hmdfs_init(void)1079 static int __init hmdfs_init(void)
1080 {
1081 int err = 0;
1082
1083 err = hmdfs_init_caches();
1084 if (err)
1085 goto out_err;
1086
1087 hmdfs_node_evt_cb_init();
1088
1089 hmdfs_stash_add_node_evt_cb();
1090 hmdfs_client_add_node_evt_cb();
1091 hmdfs_server_add_node_evt_cb();
1092
1093 err = register_filesystem(&hmdfs_fs_type);
1094 if (err) {
1095 hmdfs_err("hmdfs register failed!");
1096 goto out_err;
1097 }
1098
1099 err = hmdfs_init_configfs();
1100 if (err)
1101 goto out_err;
1102
1103 err = hmdfs_sysfs_init();
1104 if (err)
1105 goto out_err;
1106
1107 hmdfs_message_verify_init();
1108 return 0;
1109 out_err:
1110 hmdfs_sysfs_exit();
1111 hmdfs_exit_configfs();
1112 unregister_filesystem(&hmdfs_fs_type);
1113 hmdfs_destroy_caches();
1114 hmdfs_err("hmdfs init failed!");
1115 return err;
1116 }
1117
hmdfs_exit(void)1118 static void __exit hmdfs_exit(void)
1119 {
1120 hmdfs_sysfs_exit();
1121 hmdfs_exit_configfs();
1122 unregister_filesystem(&hmdfs_fs_type);
1123 ida_destroy(&hmdfs_sb_seq);
1124 hmdfs_destroy_caches();
1125 hmdfs_info("hmdfs exited!");
1126 }
1127
1128 module_init(hmdfs_init);
1129 module_exit(hmdfs_exit);
1130
1131 EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1132
1133 MODULE_LICENSE("GPL v2");
1134 MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1135 MODULE_DESCRIPTION("Harmony distributed file system");
1136