1// SPDX-License-Identifier: GPL-2.0 2/* 3 * fs/hmdfs/main.c 4 * 5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd. 6 */ 7 8 9#include "hmdfs.h" 10 11#include <linux/ctype.h> 12#include <linux/module.h> 13#include <linux/statfs.h> 14#include <linux/xattr.h> 15#include <linux/idr.h> 16#if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE 17#include <linux/prandom.h> 18#else 19#include <linux/random.h> 20#endif 21 22#include "authority/authentication.h" 23#include "hmdfs_server.h" 24#include "comm/device_node.h" 25#include "comm/message_verify.h" 26#include "comm/protocol.h" 27#include "comm/socket_adapter.h" 28#include "hmdfs_merge_view.h" 29#include "server_writeback.h" 30#include "hmdfs_share.h" 31 32#include "comm/node_cb.h" 33#include "stash.h" 34 35#define CREATE_TRACE_POINTS 36#include "hmdfs_trace.h" 37 38#define HMDFS_BOOT_COOKIE_RAND_SHIFT 33 39 40#define HMDFS_SB_SEQ_FROM 1 41 42struct hmdfs_mount_priv { 43 const char *dev_name; 44 const char *raw_data; 45}; 46 47struct syncfs_item { 48 struct list_head list; 49 struct completion done; 50 bool need_abort; 51}; 52 53static DEFINE_IDA(hmdfs_sb_seq); 54 55static inline int hmdfs_alloc_sb_seq(void) 56{ 57 return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL); 58} 59 60static inline void hmdfs_free_sb_seq(unsigned int seq) 61{ 62 if (!seq) 63 return; 64 ida_simple_remove(&hmdfs_sb_seq, seq); 65} 66 67static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name, 68 void *value, size_t size) 69{ 70 struct path lower_path; 71 ssize_t res = 0; 72 73 hmdfs_get_lower_path(dentry, &lower_path); 74 res = vfs_getxattr(&nop_mnt_idmap, lower_path.dentry, name, value, size); 75 hmdfs_put_lower_path(&lower_path); 76 return res; 77} 78 79static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name, 80 void *value, size_t size) 81{ 82 struct inode *inode = d_inode(dentry); 83 struct hmdfs_inode_info *info = hmdfs_i(inode); 84 struct hmdfs_peer *conn = info->conn; 85 char *send_buf = NULL; 86 ssize_t res = 0; 87 88 send_buf = hmdfs_get_dentry_relative_path(dentry); 89 if (!send_buf) 90 return -ENOMEM; 91 92 res = hmdfs_send_getxattr(conn, send_buf, name, value, size); 93 kfree(send_buf); 94 return res; 95} 96 97static int hmdfs_xattr_merge_get(struct dentry *dentry, const char *name, 98 void *value, size_t size) 99{ 100 int err = 0; 101 struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL); 102 103 if (!lower_dentry) { 104 err = -EOPNOTSUPP; 105 goto out; 106 } 107 err = hmdfs_xattr_local_get(lower_dentry, name, value, size); 108out: 109 dput(lower_dentry); 110 return err; 111} 112 113static int hmdfs_xattr_get(const struct xattr_handler *handler, 114 struct dentry *dentry, struct inode *inode, 115 const char *name, void *value, size_t size) 116{ 117 int res = 0; 118 struct hmdfs_inode_info *info = hmdfs_i(inode); 119 size_t r_size = size; 120 121 if (!hmdfs_support_xattr(dentry)) 122 return -EOPNOTSUPP; 123 124 if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) 125 return -EOPNOTSUPP; 126 127 if (size > HMDFS_XATTR_SIZE_MAX) 128 r_size = HMDFS_XATTR_SIZE_MAX; 129 130 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) 131 res = hmdfs_xattr_local_get(dentry, name, value, r_size); 132 else if (info->inode_type == HMDFS_LAYER_OTHER_REMOTE) 133 res = hmdfs_xattr_remote_get(dentry, name, value, r_size); 134 else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE || 135 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD) 136 res = hmdfs_xattr_merge_get(dentry, name, value, r_size); 137 else 138 res = -EOPNOTSUPP; 139 140 if (res == -ERANGE && r_size != size) { 141 hmdfs_info("no support xattr value size over than: %d", 142 HMDFS_XATTR_SIZE_MAX); 143 res = -E2BIG; 144 } 145 146 return res; 147} 148 149static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name, 150 const void *value, size_t size, int flags) 151{ 152 struct path lower_path; 153 int res = 0; 154 155 hmdfs_get_lower_path(dentry, &lower_path); 156 kuid_t tmp_uid = hmdfs_override_inode_uid(d_inode(lower_path.dentry)); 157 if (value) { 158 res = vfs_setxattr(&nop_mnt_idmap, lower_path.dentry, name, value, size, flags); 159 } else { 160 WARN_ON(flags != XATTR_REPLACE); 161 res = vfs_removexattr(&nop_mnt_idmap, lower_path.dentry, name); 162 } 163 hmdfs_revert_inode_uid(d_inode(lower_path.dentry), tmp_uid); 164 165 hmdfs_put_lower_path(&lower_path); 166 return res; 167} 168 169static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name, 170 const void *value, size_t size, int flags) 171{ 172 struct inode *inode = d_inode(dentry); 173 struct hmdfs_inode_info *info = hmdfs_i(inode); 174 struct hmdfs_peer *conn = info->conn; 175 char *send_buf = NULL; 176 int res = 0; 177 178 send_buf = hmdfs_get_dentry_relative_path(dentry); 179 if (!send_buf) 180 return -ENOMEM; 181 182 res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags); 183 kfree(send_buf); 184 return res; 185} 186 187static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name, 188 const void *value, size_t size, int flags) 189{ 190 int err = 0; 191 struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL); 192 193 if (!lower_dentry) { 194 err = -EOPNOTSUPP; 195 goto out; 196 } 197 err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags); 198out: 199 dput(lower_dentry); 200 return err; 201} 202 203static int hmdfs_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, 204 struct dentry *dentry, struct inode *inode, 205 const char *name, const void *value, 206 size_t size, int flags) 207{ 208 struct hmdfs_inode_info *info = hmdfs_i(inode); 209 210 if (!hmdfs_support_xattr(dentry)) 211 return -EOPNOTSUPP; 212 213 if (size > HMDFS_XATTR_SIZE_MAX) { 214 hmdfs_info("no support too long xattr value: %zu", size); 215 return -E2BIG; 216 } 217 218 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) 219 return hmdfs_xattr_local_set(dentry, name, value, size, flags); 220 else if (info->inode_type == HMDFS_LAYER_OTHER_REMOTE) 221 return hmdfs_xattr_remote_set(dentry, name, value, size, flags); 222 else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE || 223 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD) 224 return hmdfs_xattr_merge_set(dentry, name, value, size, flags); 225 226 return -EOPNOTSUPP; 227} 228 229const struct xattr_handler hmdfs_xattr_handler = { 230 .prefix = "", /* catch all */ 231 .get = hmdfs_xattr_get, 232 .set = hmdfs_xattr_set, 233}; 234 235static const struct xattr_handler *hmdfs_xattr_handlers[] = { 236 &hmdfs_xattr_handler, 237}; 238 239#define HMDFS_NODE_EVT_CB_DELAY 2 240 241struct kmem_cache *hmdfs_inode_cachep; 242struct kmem_cache *hmdfs_dentry_cachep; 243 244static void i_callback(struct rcu_head *head) 245{ 246 struct inode *inode = container_of(head, struct inode, i_rcu); 247 248 kmem_cache_free(hmdfs_inode_cachep, 249 container_of(inode, struct hmdfs_inode_info, 250 vfs_inode)); 251} 252 253static void hmdfs_destroy_inode(struct inode *inode) 254{ 255 call_rcu(&inode->i_rcu, i_callback); 256} 257 258static void hmdfs_evict_inode(struct inode *inode) 259{ 260 struct hmdfs_inode_info *info = hmdfs_i(inode); 261 262 truncate_inode_pages(&inode->i_data, 0); 263 clear_inode(inode); 264 if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE || 265 info->inode_type == HMDFS_LAYER_SECOND_REMOTE) 266 return; 267 if (info->inode_type == HMDFS_LAYER_ZERO || 268 info->inode_type == HMDFS_LAYER_OTHER_LOCAL || 269 info->inode_type == HMDFS_LAYER_SECOND_LOCAL) { 270 iput(info->lower_inode); 271 info->lower_inode = NULL; 272 } 273} 274 275void hmdfs_put_super(struct super_block *sb) 276{ 277 struct hmdfs_sb_info *sbi = hmdfs_sb(sb); 278 struct super_block *lower_sb = sbi->lower_sb; 279 280 hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst, 281 sbi->local_src); 282 283 hmdfs_cfn_destroy(sbi); 284 hmdfs_unregister_sysfs(sbi); 285 hmdfs_connections_stop(sbi); 286 hmdfs_clear_share_table(sbi); 287 hmdfs_destroy_server_writeback(sbi); 288 hmdfs_exit_stash(sbi); 289 atomic_dec(&lower_sb->s_active); 290 put_cred(sbi->cred); 291 if (sbi->system_cred) 292 put_cred(sbi->system_cred); 293 hmdfs_destroy_writeback(sbi); 294 kfree(sbi->local_src); 295 kfree(sbi->local_dst); 296 kfree(sbi->real_dst); 297 kfree(sbi->cache_dir); 298 kfree(sbi->cloud_dir); 299 kfifo_free(&sbi->notify_fifo); 300 sb->s_fs_info = NULL; 301 sbi->lower_sb = NULL; 302 hmdfs_release_sysfs(sbi); 303 /* After all access are completed */ 304 hmdfs_free_sb_seq(sbi->seq); 305 kfree(sbi->s_server_statis); 306 kfree(sbi->s_client_statis); 307 kfree(sbi); 308} 309 310static struct inode *hmdfs_alloc_inode(struct super_block *sb) 311{ 312 struct hmdfs_inode_info *gi = 313 kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL); 314 if (!gi) 315 return NULL; 316 memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode)); 317 INIT_LIST_HEAD(&gi->wb_list); 318 init_rwsem(&gi->wpage_sem); 319 gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; 320 atomic64_set(&gi->write_counter, 0); 321 gi->fid.id = HMDFS_INODE_INVALID_FILE_ID; 322 spin_lock_init(&gi->fid_lock); 323 INIT_LIST_HEAD(&gi->wr_opened_node); 324 atomic_set(&gi->wr_opened_cnt, 0); 325 init_waitqueue_head(&gi->fid_wq); 326 INIT_LIST_HEAD(&gi->stash_node); 327 spin_lock_init(&gi->stash_lock); 328 return &gi->vfs_inode; 329} 330 331static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf) 332{ 333 int error = 0; 334 int ret = 0; 335 char *dir_path = NULL; 336 char *name_path = NULL; 337 struct hmdfs_peer *con = NULL; 338 struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb); 339 340 dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); 341 if (!dir_path) { 342 error = -EACCES; 343 goto rmdir_out; 344 } 345 346 name_path = hmdfs_connect_path(dir_path, dentry->d_name.name); 347 if (!name_path) { 348 error = -EACCES; 349 goto rmdir_out; 350 } 351 mutex_lock(&sbi->connections.node_lock); 352 list_for_each_entry(con, &sbi->connections.node_list, list) { 353 if (con->status == NODE_STAT_ONLINE) { 354 peer_get(con); 355 mutex_unlock(&sbi->connections.node_lock); 356 hmdfs_debug("send MSG to remote devID %llu", 357 con->device_id); 358 ret = hmdfs_send_statfs(con, name_path, buf); 359 if (ret != 0) 360 error = ret; 361 peer_put(con); 362 mutex_lock(&sbi->connections.node_lock); 363 } 364 } 365 mutex_unlock(&sbi->connections.node_lock); 366 367rmdir_out: 368 kfree(dir_path); 369 kfree(name_path); 370 return error; 371} 372 373static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf) 374{ 375 int err = 0; 376 struct path lower_path; 377 struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); 378 struct super_block *sb = d_inode(dentry)->i_sb; 379 struct hmdfs_sb_info *sbi = sb->s_fs_info; 380 381 trace_hmdfs_statfs(dentry, info->inode_type); 382 // merge_view & merge_view/xxx & device_view assigned src_inode info 383 if (hmdfs_i_merge(info) || 384 (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) { 385 err = kern_path(sbi->local_src, 0, &lower_path); 386 if (err) 387 goto out; 388 err = vfs_statfs(&lower_path, buf); 389 path_put(&lower_path); 390 } else if (!IS_ERR_OR_NULL(info->lower_inode)) { 391 hmdfs_get_lower_path(dentry, &lower_path); 392 err = vfs_statfs(&lower_path, buf); 393 hmdfs_put_lower_path(&lower_path); 394 } else { 395 err = hmdfs_remote_statfs(dentry, buf); 396 } 397 398 buf->f_type = HMDFS_SUPER_MAGIC; 399out: 400 return err; 401} 402 403static int hmdfs_show_options(struct seq_file *m, struct dentry *root) 404{ 405 struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb); 406 407 if (sbi->s_case_sensitive) 408 seq_puts(m, ",sensitive"); 409 else 410 seq_puts(m, ",insensitive"); 411 412 if (sbi->s_merge_switch) 413 seq_puts(m, ",merge_enable"); 414 else 415 seq_puts(m, ",merge_disable"); 416 417 seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages); 418 seq_printf(m, ",user_id=%u", sbi->user_id); 419 420 if (sbi->cache_dir) 421 seq_printf(m, ",cache_dir=%s", sbi->cache_dir); 422 if (sbi->real_dst) 423 seq_printf(m, ",real_dst=%s", sbi->real_dst); 424 if (sbi->cloud_dir) 425 seq_printf(m, ",cloud_dir=%s", sbi->cloud_dir); 426 427 seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_"); 428 seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_"); 429 430 return 0; 431} 432 433static int hmdfs_sync_fs(struct super_block *sb, int wait) 434{ 435 int time_left; 436 int err = 0; 437 struct hmdfs_peer *con = NULL; 438 struct hmdfs_sb_info *sbi = hmdfs_sb(sb); 439 int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS); 440 struct syncfs_item item, *entry = NULL, *tmp = NULL; 441 442 if (!wait) 443 return 0; 444 445 trace_hmdfs_syncfs_enter(sbi); 446 447 spin_lock(&sbi->hsi.list_lock); 448 if (!sbi->hsi.is_executing) { 449 sbi->hsi.is_executing = true; 450 item.need_abort = false; 451 spin_unlock(&sbi->hsi.list_lock); 452 } else { 453 init_completion(&item.done); 454 list_add_tail(&item.list, &sbi->hsi.wait_list); 455 spin_unlock(&sbi->hsi.list_lock); 456 wait_for_completion(&item.done); 457 } 458 459 if (item.need_abort) 460 goto out; 461 462 /* 463 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make 464 * sure all remote syncfs calls return back or timeout by waiting, 465 * during the waiting period we must protect @sbi->remote_syncfs_count 466 * and @sbi->remote_syncfs_ret from concurrent executing. 467 */ 468 469 spin_lock(&sbi->hsi.v_lock); 470 sbi->hsi.version++; 471 /* 472 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count 473 * into spinlock protection area to avoid following scenario caused 474 * by out-of-order execution: 475 * 476 * synfs syncfs_cb 477 * sbi->hsi.remote_ret = 0; 478 * atomic_set(&sbi->hsi.wait_count, 0); 479 * lock 480 * version == old_version 481 * sbi->hsi.remote_ret = resp->ret_code 482 * atomic_dec(&sbi->hsi.wait_count); 483 * unlock 484 * lock 485 * version = old_version + 1 486 * unlock 487 * 488 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned 489 * before spin lock which may compete with syncfs_cb(), making 490 * these two values' assignment protected by spinlock can fix this. 491 */ 492 sbi->hsi.remote_ret = 0; 493 atomic_set(&sbi->hsi.wait_count, 0); 494 spin_unlock(&sbi->hsi.v_lock); 495 496 mutex_lock(&sbi->connections.node_lock); 497 list_for_each_entry(con, &sbi->connections.node_list, list) { 498 /* 499 * Dirty data does not need to be synchronized to remote 500 * devices that go offline normally. It's okay to drop 501 * them. 502 */ 503 if (con->status != NODE_STAT_ONLINE) 504 continue; 505 506 peer_get(con); 507 mutex_unlock(&sbi->connections.node_lock); 508 509 /* 510 * There exists a gap between sync_inodes_sb() and sync_fs() 511 * which may race with remote writing, leading error count 512 * on @sb_dirty_count. The dirty data produced during the 513 * gap period won't be synced in next syncfs operation. 514 * To avoid this, we have to invoke sync_inodes_sb() again 515 * after getting @con->sb_dirty_count. 516 */ 517 con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count); 518 sync_inodes_sb(sb); 519 520 if (!con->old_sb_dirty_count) { 521 peer_put(con); 522 mutex_lock(&sbi->connections.node_lock); 523 continue; 524 } 525 526 err = hmdfs_send_syncfs(con, syncfs_timeout); 527 if (err) { 528 hmdfs_warning("send syncfs failed with %d on node %llu", 529 err, con->device_id); 530 sbi->hsi.remote_ret = err; 531 peer_put(con); 532 mutex_lock(&sbi->connections.node_lock); 533 continue; 534 } 535 536 atomic_inc(&sbi->hsi.wait_count); 537 538 peer_put(con); 539 mutex_lock(&sbi->connections.node_lock); 540 } 541 mutex_unlock(&sbi->connections.node_lock); 542 543 /* 544 * Async work in background will make sure @sbi->remote_syncfs_count 545 * decreased to zero finally whether syncfs success or fail. 546 */ 547 time_left = wait_event_interruptible( 548 sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0); 549 if (time_left < 0) { 550 hmdfs_warning("syncfs is interrupted by external signal"); 551 err = -EINTR; 552 } 553 554 if (!err && sbi->hsi.remote_ret) 555 err = sbi->hsi.remote_ret; 556 557 /* Abandon syncfs processes in pending_list */ 558 list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) { 559 entry->need_abort = true; 560 complete(&entry->done); 561 } 562 INIT_LIST_HEAD(&sbi->hsi.pending_list); 563 564 /* Pick the last syncfs process in wait_list */ 565 spin_lock(&sbi->hsi.list_lock); 566 if (list_empty(&sbi->hsi.wait_list)) { 567 sbi->hsi.is_executing = false; 568 } else { 569 entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item, 570 list); 571 list_del_init(&entry->list); 572 list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list); 573 entry->need_abort = false; 574 complete(&entry->done); 575 } 576 spin_unlock(&sbi->hsi.list_lock); 577 578out: 579 trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count), 580 get_cmd_timeout(sbi, F_SYNCFS), err); 581 582 /* TODO: Return synfs err back to syscall */ 583 584 return err; 585} 586 587struct super_operations hmdfs_sops = { 588 .alloc_inode = hmdfs_alloc_inode, 589 .destroy_inode = hmdfs_destroy_inode, 590 .evict_inode = hmdfs_evict_inode, 591 .put_super = hmdfs_put_super, 592 .statfs = hmdfs_statfs, 593 .show_options = hmdfs_show_options, 594 .sync_fs = hmdfs_sync_fs, 595}; 596 597static void init_once(void *obj) 598{ 599 struct hmdfs_inode_info *i = obj; 600 601 inode_init_once(&i->vfs_inode); 602} 603 604static int __init hmdfs_init_caches(void) 605{ 606 int err = -ENOMEM; 607 608 hmdfs_inode_cachep = 609 kmem_cache_create("hmdfs_inode_cache", 610 sizeof(struct hmdfs_inode_info), 0, 611 SLAB_RECLAIM_ACCOUNT, init_once); 612 if (unlikely(!hmdfs_inode_cachep)) 613 goto out; 614 hmdfs_dentry_cachep = 615 kmem_cache_create("hmdfs_dentry_cache", 616 sizeof(struct hmdfs_dentry_info), 0, 617 SLAB_RECLAIM_ACCOUNT, NULL); 618 if (unlikely(!hmdfs_dentry_cachep)) 619 goto out_des_ino; 620 hmdfs_dentry_merge_cachep = 621 kmem_cache_create("hmdfs_dentry_merge_cache", 622 sizeof(struct hmdfs_dentry_info_merge), 0, 623 SLAB_RECLAIM_ACCOUNT, NULL); 624 if (unlikely(!hmdfs_dentry_merge_cachep)) 625 goto out_des_dc; 626 return 0; 627 628out_des_dc: 629 kmem_cache_destroy(hmdfs_dentry_cachep); 630out_des_ino: 631 kmem_cache_destroy(hmdfs_inode_cachep); 632out: 633 return err; 634} 635 636static void hmdfs_destroy_caches(void) 637{ 638 rcu_barrier(); 639 kmem_cache_destroy(hmdfs_inode_cachep); 640 hmdfs_inode_cachep = NULL; 641 kmem_cache_destroy(hmdfs_dentry_cachep); 642 hmdfs_dentry_cachep = NULL; 643 kmem_cache_destroy(hmdfs_dentry_merge_cachep); 644 hmdfs_dentry_merge_cachep = NULL; 645} 646 647uint64_t path_hash(const char *path, int len, bool case_sense) 648{ 649 uint64_t res = 0; 650 const char *kp = path; 651 char c; 652 /* Mocklisp hash function. */ 653 while (*kp) { 654 c = *kp; 655 if (!case_sense) 656 c = tolower(c); 657 res = (res << 5) - res + (uint64_t)(c); 658 kp++; 659 } 660 return res; 661} 662 663static char *get_full_path(struct path *path) 664{ 665 char *buf, *tmp; 666 char *ret = NULL; 667 668 buf = kmalloc(PATH_MAX, GFP_KERNEL); 669 if (!buf) 670 goto out; 671 672 tmp = d_path(path, buf, PATH_MAX); 673 if (IS_ERR(tmp)) 674 goto out; 675 676 ret = kstrdup(tmp, GFP_KERNEL); 677out: 678 kfree(buf); 679 return ret; 680} 681 682static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi) 683{ 684 memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout)); 685 686 set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON); 687 set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE); 688 set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON); 689 set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON); 690 set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S); 691 set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON); 692 set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON); 693 set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON); 694 set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON); 695 set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON); 696 set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON); 697 set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON); 698 set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE); 699 set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE); 700 set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON); 701 set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S); 702 set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S); 703 set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON); 704 set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON); 705 set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON); 706} 707 708static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi) 709{ 710 int ret; 711 712 ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL); 713 if (ret) 714 goto out; 715 716 /* 717 * We have to use dynamic memory since struct server/client_statistic 718 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h. 719 */ 720 sbi->s_server_statis = 721 kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL); 722 sbi->s_client_statis = 723 kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL); 724 if (!sbi->s_server_statis || !sbi->s_client_statis) { 725 ret = -ENOMEM; 726 goto out; 727 } 728 729 ret = hmdfs_alloc_sb_seq(); 730 if (ret < 0) { 731 hmdfs_err("no sb seq available err %d", ret); 732 goto out; 733 } 734 sbi->seq = ret; 735 ret = 0; 736 737 spin_lock_init(&sbi->notify_fifo_lock); 738 mutex_init(&sbi->cmd_handler_mutex); 739 sbi->s_case_sensitive = false; 740 sbi->s_features = HMDFS_FEATURE_READPAGES | 741 HMDFS_FEATURE_READPAGES_OPEN | 742 HMDFS_ATOMIC_OPEN; 743 sbi->s_merge_switch = false; 744 sbi->s_cloud_disk_switch = false; 745 sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD; 746 sbi->dcache_precision = DEFAULT_DCACHE_PRECISION; 747 sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT; 748 sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT; 749 hmdfs_init_cmd_timeout(sbi); 750 sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY; 751 sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE; 752 sbi->s_offline_stash = true; 753 sbi->s_dentry_cache = true; 754 sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS; 755 sbi->s_readpages_nr = HMDFS_READPAGES_NR_DEF; 756 /* Initialize before hmdfs_register_sysfs() */ 757 atomic_set(&sbi->connections.conn_seq, 0); 758 mutex_init(&sbi->connections.node_lock); 759 INIT_LIST_HEAD(&sbi->connections.node_list); 760 761 ret = hmdfs_init_share_table(sbi); 762 if (ret) 763 goto out; 764 init_waitqueue_head(&sbi->async_readdir_wq); 765 INIT_LIST_HEAD(&sbi->async_readdir_msg_list); 766 INIT_LIST_HEAD(&sbi->async_readdir_work_list); 767 spin_lock_init(&sbi->async_readdir_msg_lock); 768 spin_lock_init(&sbi->async_readdir_work_lock); 769 770 return 0; 771 772out: 773 return ret; 774} 775 776void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, 777 enum hmdfs_resp_type type, unsigned long start, 778 unsigned long end) 779{ 780 unsigned long duration; 781 782 switch (type) { 783 case HMDFS_RESP_DELAY: 784 sbi->s_client_statis[cmd].delay_resp_cnt++; 785 break; 786 case HMDFS_RESP_TIMEOUT: 787 sbi->s_client_statis[cmd].timeout_cnt++; 788 break; 789 case HMDFS_RESP_NORMAL: 790 duration = end - start; 791 sbi->s_client_statis[cmd].total += duration; 792 sbi->s_client_statis[cmd].resp_cnt++; 793 if (sbi->s_client_statis[cmd].max < duration) 794 sbi->s_client_statis[cmd].max = duration; 795 break; 796 default: 797 hmdfs_err("Wrong cmd %d with resp type %d", cmd, type); 798 } 799} 800 801static int hmdfs_update_dst(struct hmdfs_sb_info *sbi) 802{ 803 int err = 0; 804 const char *path_local = UPDATE_LOCAL_DST; 805 int len = 0; 806 807 sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL); 808 if (!sbi->real_dst) { 809 err = -ENOMEM; 810 goto out_err; 811 } 812 kfree(sbi->local_dst); 813 sbi->local_dst = NULL; 814 815 len = strlen(sbi->real_dst) + strlen(path_local) + 1; 816 if (len > PATH_MAX) { 817 err = -EINVAL; 818 goto out_err; 819 } 820 sbi->local_dst = kmalloc(len, GFP_KERNEL); 821 if (!sbi->local_dst) { 822 err = -ENOMEM; 823 goto out_err; 824 } 825 snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1, 826 "%s%s", sbi->real_dst, path_local); 827out_err: 828 return err; 829} 830 831/* 832 * Generate boot cookie like following format: 833 * 834 * | random | boot time(ms) | 0x00 | 835 * |--------|-----------------|-------| 836 * 16 33 15 (bits) 837 * 838 * This will make sure boot cookie is unique in a period 839 * 2^33 / 1000 / 3600 / 24 = 99.4(days). 840 */ 841uint64_t hmdfs_gen_boot_cookie(void) 842{ 843 uint64_t now; 844 uint16_t rand; 845 struct rnd_state rnd_state; 846 847 now = ktime_to_ms(ktime_get()); 848 prandom_bytes_state(&rnd_state, (void *)&rand, 2); 849 850 now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1; 851 now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT); 852 853 return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT; 854} 855 856static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) 857{ 858 struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data; 859 const char *dev_name = priv->dev_name; 860 const char *raw_data = priv->raw_data; 861 struct hmdfs_sb_info *sbi; 862 int err = 0; 863 struct inode *root_inode; 864 struct path lower_path; 865 struct super_block *lower_sb; 866 struct dentry *root_dentry; 867 char ctrl_path[CTRL_PATH_MAX_LEN]; 868 uint64_t ctrl_hash; 869 870 if (!raw_data) 871 return -EINVAL; 872 873 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 874 if (!sbi) { 875 err = -ENOMEM; 876 goto out_err; 877 } 878 err = hmdfs_init_sbi(sbi); 879 if (err) 880 goto out_freesbi; 881 sbi->sb = sb; 882 err = hmdfs_parse_options(sbi, raw_data); 883 if (err) 884 goto out_freesbi; 885 886 sb->s_fs_info = sbi; 887 sb->s_magic = HMDFS_SUPER_MAGIC; 888 sb->s_xattr = hmdfs_xattr_handlers; 889 sb->s_op = &hmdfs_sops; 890 891 sbi->boot_cookie = hmdfs_gen_boot_cookie(); 892 893 err = hmdfs_init_writeback(sbi); 894 if (err) 895 goto out_freesbi; 896 err = hmdfs_init_server_writeback(sbi); 897 if (err) 898 goto out_freesbi; 899 900 err = hmdfs_init_stash(sbi); 901 if (err) 902 goto out_freesbi; 903 904 // add ctrl sysfs node 905 ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true); 906 scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash); 907 hmdfs_debug("hash %llu", ctrl_hash); 908 err = hmdfs_register_sysfs(ctrl_path, sbi); 909 if (err) 910 goto out_freesbi; 911 912 err = hmdfs_update_dst(sbi); 913 if (err) 914 goto out_unreg_sysfs; 915 916 err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, 917 &lower_path); 918 if (err) { 919 hmdfs_err("open dev failed, errno = %d", err); 920 goto out_unreg_sysfs; 921 } 922 923 lower_sb = lower_path.dentry->d_sb; 924 atomic_inc(&lower_sb->s_active); 925 sbi->lower_sb = lower_sb; 926 sbi->local_src = get_full_path(&lower_path); 927 if (!sbi->local_src) { 928 hmdfs_err("get local_src failed!"); 929 goto out_sput; 930 } 931 932 sb->s_time_gran = lower_sb->s_time_gran; 933 sb->s_maxbytes = lower_sb->s_maxbytes; 934 sb->s_stack_depth = lower_sb->s_stack_depth + 1; 935 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 936 hmdfs_err("maximum fs stacking depth exceeded"); 937 err = -EINVAL; 938 goto out_sput; 939 } 940 root_inode = fill_root_inode(sb, sbi, d_inode(lower_path.dentry)); 941 if (IS_ERR(root_inode)) { 942 err = PTR_ERR(root_inode); 943 goto out_sput; 944 } 945 hmdfs_root_inode_perm_init(root_inode); 946 sb->s_root = root_dentry = d_make_root(root_inode); 947 if (!root_dentry) { 948 err = -ENOMEM; 949 goto out_sput; 950 } 951 if (sbi->s_cloud_disk_switch) 952 err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_SECOND_LOCAL); 953 else 954 err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO); 955 if (err) 956 goto out_freeroot; 957 hmdfs_set_lower_path(root_dentry, &lower_path); 958 sbi->cred = get_cred(current_cred()); 959 INIT_LIST_HEAD(&sbi->client_cache); 960 INIT_LIST_HEAD(&sbi->server_cache); 961 INIT_LIST_HEAD(&sbi->to_delete); 962 mutex_init(&sbi->cache_list_lock); 963 hmdfs_cfn_load(sbi); 964 965 /* Initialize syncfs info */ 966 spin_lock_init(&sbi->hsi.v_lock); 967 init_waitqueue_head(&sbi->hsi.wq); 968 sbi->hsi.version = 0; 969 sbi->hsi.is_executing = false; 970 INIT_LIST_HEAD(&sbi->hsi.wait_list); 971 INIT_LIST_HEAD(&sbi->hsi.pending_list); 972 spin_lock_init(&sbi->hsi.list_lock); 973 974 return err; 975out_freeroot: 976 dput(sb->s_root); 977 sb->s_root = NULL; 978out_sput: 979 atomic_dec(&lower_sb->s_active); 980 path_put(&lower_path); 981out_unreg_sysfs: 982 hmdfs_unregister_sysfs(sbi); 983 hmdfs_release_sysfs(sbi); 984out_freesbi: 985 if (sbi) { 986 sb->s_fs_info = NULL; 987 hmdfs_clear_share_table(sbi); 988 hmdfs_exit_stash(sbi); 989 hmdfs_destroy_writeback(sbi); 990 hmdfs_destroy_server_writeback(sbi); 991 kfifo_free(&sbi->notify_fifo); 992 hmdfs_free_sb_seq(sbi->seq); 993 kfree(sbi->local_src); 994 kfree(sbi->local_dst); 995 kfree(sbi->real_dst); 996 kfree(sbi->cache_dir); 997 kfree(sbi->cloud_dir); 998 kfree(sbi->s_server_statis); 999 kfree(sbi->s_client_statis); 1000 kfree(sbi); 1001 } 1002out_err: 1003 return err; 1004} 1005 1006static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags, 1007 const char *dev_name, void *raw_data) 1008{ 1009 struct hmdfs_mount_priv priv = { 1010 .dev_name = dev_name, 1011 .raw_data = raw_data, 1012 }; 1013 1014 /* hmdfs needs a valid dev_name to get the lower_sb's metadata */ 1015 if (!dev_name || !*dev_name) 1016 return ERR_PTR(-EINVAL); 1017 return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super); 1018} 1019 1020 1021static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi) 1022{ 1023 struct sendmsg_wait_queue *msg_wq = NULL; 1024 struct hmdfs_readdir_work *rw = NULL; 1025 struct hmdfs_readdir_work *tmp = NULL; 1026 struct list_head del_work; 1027 1028 /* cancel work that are not running */ 1029 1030 INIT_LIST_HEAD(&del_work); 1031 spin_lock(&sbi->async_readdir_work_lock); 1032 list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) { 1033 if (cancel_delayed_work(&rw->dwork)) 1034 list_move(&rw->head, &del_work); 1035 } 1036 spin_unlock(&sbi->async_readdir_work_lock); 1037 1038 list_for_each_entry_safe(rw, tmp, &del_work, head) { 1039 dput(rw->dentry); 1040 peer_put(rw->con); 1041 kfree(rw); 1042 } 1043 1044 /* wake up async readdir that are waiting for remote */ 1045 spin_lock(&sbi->async_readdir_msg_lock); 1046 sbi->async_readdir_prohibit = true; 1047 list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg) 1048 hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL); 1049 spin_unlock(&sbi->async_readdir_msg_lock); 1050 1051 /* wait for all async readdir to finish */ 1052 if (!list_empty(&sbi->async_readdir_work_list)) 1053 wait_event_interruptible_timeout(sbi->async_readdir_wq, 1054 (list_empty(&sbi->async_readdir_work_list)), HZ); 1055 1056 WARN_ON(!(list_empty(&sbi->async_readdir_work_list))); 1057} 1058 1059static void hmdfs_kill_super(struct super_block *sb) 1060{ 1061 struct hmdfs_sb_info *sbi = hmdfs_sb(sb); 1062 1063 /* 1064 * async readdir is holding ref for dentry, not for vfsmount. Thus 1065 * shrink_dcache_for_umount() will warn about dentry still in use 1066 * if async readdir is not done. 1067 */ 1068 if (sbi) 1069 hmdfs_cancel_async_readdir(sbi); 1070 kill_anon_super(sb); 1071} 1072 1073static struct file_system_type hmdfs_fs_type = { 1074 .owner = THIS_MODULE, 1075 .name = "hmdfs", 1076 .mount = hmdfs_mount, 1077 .kill_sb = hmdfs_kill_super, 1078}; 1079 1080static int __init hmdfs_init(void) 1081{ 1082 int err = 0; 1083 1084 err = hmdfs_init_caches(); 1085 if (err) 1086 goto out_err; 1087 1088 hmdfs_node_evt_cb_init(); 1089 1090 hmdfs_stash_add_node_evt_cb(); 1091 hmdfs_client_add_node_evt_cb(); 1092 hmdfs_server_add_node_evt_cb(); 1093 1094 err = register_filesystem(&hmdfs_fs_type); 1095 if (err) { 1096 hmdfs_err("hmdfs register failed!"); 1097 goto out_err; 1098 } 1099 1100 err = hmdfs_init_configfs(); 1101 if (err) 1102 goto out_err; 1103 1104 err = hmdfs_sysfs_init(); 1105 if (err) 1106 goto out_err; 1107 1108 hmdfs_message_verify_init(); 1109 return 0; 1110out_err: 1111 hmdfs_sysfs_exit(); 1112 hmdfs_exit_configfs(); 1113 unregister_filesystem(&hmdfs_fs_type); 1114 hmdfs_destroy_caches(); 1115 hmdfs_err("hmdfs init failed!"); 1116 return err; 1117} 1118 1119static void __exit hmdfs_exit(void) 1120{ 1121 hmdfs_sysfs_exit(); 1122 hmdfs_exit_configfs(); 1123 unregister_filesystem(&hmdfs_fs_type); 1124 ida_destroy(&hmdfs_sb_seq); 1125 hmdfs_destroy_caches(); 1126 hmdfs_info("hmdfs exited!"); 1127} 1128 1129module_init(hmdfs_init); 1130module_exit(hmdfs_exit); 1131 1132EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback); 1133 1134MODULE_LICENSE("GPL v2"); 1135MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao"); 1136MODULE_DESCRIPTION("Harmony distributed file system"); 1137