1// SPDX-License-Identifier: GPL-2.0 2/* 3 * fs/hmdfs/file_remote.c 4 * 5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd. 6 */ 7 8#include <linux/backing-dev.h> 9#include <linux/file.h> 10#include <linux/fs.h> 11#include <linux/namei.h> 12#include <linux/page-flags.h> 13#include <linux/pagemap.h> 14#include <linux/pagevec.h> 15#include <linux/sched/signal.h> 16#include <linux/slab.h> 17#include <linux/wait.h> 18 19#include "file_remote.h" 20 21#include "comm/socket_adapter.h" 22#include "hmdfs.h" 23#include "hmdfs_client.h" 24#include "hmdfs_dentryfile.h" 25#include "hmdfs_trace.h" 26 27static inline bool hmdfs_remote_write_cache_expired( 28 struct hmdfs_inode_info *info) 29{ 30 return time_after(jiffies, info->writecache_expire); 31} 32 33enum expire_reason { 34 ALL_GOOD = 0, 35 INO_DISMATCH = 1, 36 SIZE_OR_CTIME_DISMATCH = 2, 37 TIMER_EXPIRE = 3, 38 TIMER_WORKING = 4, 39 STABLE_CTIME_DISMATCH = 5, 40 KEEP_CACHE = 6, 41}; 42 43/* 44 * hmdfs_open_final_remote - Do final steps of opening a remote file, update 45 * local inode cache and decide whether of not to truncate inode pages. 46 * 47 * @info: hmdfs inode info 48 * @open_ret: values returned from remote when opening a remote file 49 * @keep_cache: keep local cache & i_size 50 */ 51static int hmdfs_open_final_remote(struct hmdfs_inode_info *info, 52 struct hmdfs_open_ret *open_ret, 53 struct file *file, bool keep_cache) 54{ 55 struct inode *inode = &info->vfs_inode; 56 bool truncate = false; 57 enum expire_reason reason = ALL_GOOD; 58 int ret = 0; 59 60 /* 61 * if remote inode number changed and lookup stale data, we'll return 62 * -ESTALE, and reopen the file with metedate from remote getattr. 63 */ 64 if (info->remote_ino != open_ret->ino) { 65 hmdfs_debug( 66 "got stale local inode, ino in local %llu, ino from open %llu", 67 info->remote_ino, open_ret->ino); 68 hmdfs_send_close(info->conn, &open_ret->fid); 69 reason = INO_DISMATCH; 70 ret = -ESTALE; 71 goto out; 72 } 73 74 if (keep_cache) { 75 reason = KEEP_CACHE; 76 trace_hmdfs_open_final_remote(info, open_ret, file, reason); 77 goto set_fid_out; 78 } 79 80 /* 81 * if remote size do not match local inode, or remote ctime do not match 82 * the last time same file was opened. 83 */ 84 if (inode->i_size != open_ret->file_size || 85 hmdfs_time_compare(&info->remote_ctime, &open_ret->remote_ctime)) { 86 truncate = true; 87 reason = SIZE_OR_CTIME_DISMATCH; 88 goto out; 89 } 90 91 /* 92 * If 'writecache_expire' is set, check if it expires. And skip the 93 * checking of stable_ctime. 94 */ 95 if (info->writecache_expire) { 96 truncate = hmdfs_remote_write_cache_expired(info); 97 if (truncate) 98 reason = TIMER_EXPIRE; 99 else 100 reason = TIMER_WORKING; 101 goto out; 102 } 103 104 /* the first time, or remote ctime is ahead of remote time */ 105 if (info->stable_ctime.tv_sec == 0 && info->stable_ctime.tv_nsec == 0) { 106 truncate = true; 107 reason = STABLE_CTIME_DISMATCH; 108 goto out; 109 } 110 111 /* 112 * - if last stable_ctime == stable_ctime, we do nothing. 113 * a. if ctime < stable_ctime, data is ensured to be uptodate, 114 * b. if ctime == stable_ctime, stale data might be accessed. This is 115 * acceptable since pagecache will be dropped later. 116 * c. ctime > stable_ctime is impossible. 117 * - if last stable_ctime < stable_ctime, we clear the cache. 118 * d. ctime != last stable_ctime is impossible 119 * e. ctime == last stable_ctime, this is possible to read again from 120 * b, thus we need to drop the cache. 121 * - if last stable_ctime > stable_ctime, we clear the cache. 122 * stable_ctime must be zero in this case, this is possible because 123 * system time might be changed. 124 */ 125 if (hmdfs_time_compare(&info->stable_ctime, &open_ret->stable_ctime)) { 126 truncate = true; 127 reason = STABLE_CTIME_DISMATCH; 128 goto out; 129 } 130 131out: 132 trace_hmdfs_open_final_remote(info, open_ret, file, reason); 133 if (ret) 134 return ret; 135 136 if (reason == SIZE_OR_CTIME_DISMATCH) { 137 inode->i_ctime = open_ret->remote_ctime; 138 info->remote_ctime = open_ret->remote_ctime; 139 } 140 141 if (truncate) { 142 info->writecache_expire = 0; 143 truncate_inode_pages(inode->i_mapping, 0); 144 } 145 146 atomic64_set(&info->write_counter, 0); 147 info->stable_ctime = open_ret->stable_ctime; 148 i_size_write(inode, open_ret->file_size); 149 info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; 150set_fid_out: 151 spin_lock(&info->fid_lock); 152 info->fid = open_ret->fid; 153 spin_unlock(&info->fid_lock); 154 return 0; 155} 156 157int hmdfs_do_open_remote(struct file *file, bool keep_cache) 158{ 159 struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); 160 struct hmdfs_peer *conn = info->conn; 161 struct hmdfs_open_ret open_ret; 162 __u8 file_type = hmdfs_d(file->f_path.dentry)->file_type; 163 char *send_buf; 164 int err = 0; 165 166 send_buf = hmdfs_get_dentry_relative_path(file->f_path.dentry); 167 if (!send_buf) { 168 err = -ENOMEM; 169 goto out_free; 170 } 171 err = hmdfs_send_open(conn, send_buf, file_type, &open_ret); 172 if (err) { 173 hmdfs_err("hmdfs_send_open return failed with %d", err); 174 goto out_free; 175 } 176 177 err = hmdfs_open_final_remote(info, &open_ret, file, keep_cache); 178 179out_free: 180 kfree(send_buf); 181 return err; 182} 183 184static inline bool hmdfs_remote_need_reopen(struct hmdfs_inode_info *info) 185{ 186 return test_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); 187} 188 189static inline bool hmdfs_remote_is_opening_file(struct hmdfs_inode_info *info) 190{ 191 return test_bit(HMDFS_FID_OPENING, &info->fid_flags); 192} 193 194static int hmdfs_remote_wait_opening_file(struct hmdfs_inode_info *info) 195{ 196 int err; 197 198 if (!hmdfs_remote_is_opening_file(info)) 199 return 0; 200 201 err = ___wait_event(info->fid_wq, hmdfs_remote_is_opening_file(info), 202 TASK_INTERRUPTIBLE, 0, 0, 203 spin_unlock(&info->fid_lock); 204 schedule(); 205 spin_lock(&info->fid_lock)); 206 if (err) 207 err = -EINTR; 208 209 return err; 210} 211 212static int hmdfs_remote_file_reopen(struct hmdfs_inode_info *info, 213 struct file *filp) 214{ 215 int err = 0; 216 struct hmdfs_peer *conn = info->conn; 217 struct inode *inode = NULL; 218 struct hmdfs_fid fid; 219 220 if (conn->status == NODE_STAT_OFFLINE) 221 return -EAGAIN; 222 223 spin_lock(&info->fid_lock); 224 err = hmdfs_remote_wait_opening_file(info); 225 if (err || !hmdfs_remote_need_reopen(info)) { 226 spin_unlock(&info->fid_lock); 227 goto out; 228 } 229 230 set_bit(HMDFS_FID_OPENING, &info->fid_flags); 231 fid = info->fid; 232 spin_unlock(&info->fid_lock); 233 234 inode = &info->vfs_inode; 235 inode_lock(inode); 236 /* 237 * Most closing cases are meaningless, except for one: 238 * read process A read process B 239 * err = -EBADF err = -EBADF (caused by re-online) 240 * set_need_reopen 241 * do reopen 242 * fid = new fid_1 [server hold fid_1] 243 * set need_reopen 244 * do reopen 245 * send close (fid_1) // In case of leak 246 * fid = new fid_2 247 */ 248 if (fid.id != HMDFS_INODE_INVALID_FILE_ID) 249 hmdfs_send_close(conn, &fid); 250 err = hmdfs_do_open_remote(filp, true); 251 inode_unlock(inode); 252 253 spin_lock(&info->fid_lock); 254 /* 255 * May make the bit set in offline handler lost, but server 256 * will tell us whether or not the newly-opened file id is 257 * generated before offline, if it is opened before offline, 258 * the operation on the file id will return -EBADF and 259 * HMDFS_FID_NEED_OPEN bit will be set again. 260 */ 261 if (!err) 262 clear_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); 263 clear_bit(HMDFS_FID_OPENING, &info->fid_flags); 264 spin_unlock(&info->fid_lock); 265 266 wake_up_interruptible_all(&info->fid_wq); 267out: 268 return err; 269} 270 271static int hmdfs_remote_check_and_reopen(struct hmdfs_inode_info *info, 272 struct file *filp) 273{ 274 if (!hmdfs_remote_need_reopen(info)) 275 return 0; 276 277 return hmdfs_remote_file_reopen(info, filp); 278} 279 280void hmdfs_do_close_remote(struct kref *kref) 281{ 282 struct hmdfs_inode_info *info = 283 container_of(kref, struct hmdfs_inode_info, ref); 284 struct hmdfs_fid fid; 285 286 hmdfs_remote_fetch_fid(info, &fid); 287 /* This function can return asynchronously */ 288 hmdfs_send_close(info->conn, &fid); 289} 290 291static inline bool hmdfs_remote_need_track_file(const struct hmdfs_sb_info *sbi, 292 fmode_t mode) 293{ 294 return (hmdfs_is_stash_enabled(sbi) && (mode & FMODE_WRITE)); 295} 296 297static void 298hmdfs_remote_del_wr_opened_inode_nolock(struct hmdfs_inode_info *info) 299{ 300 WARN_ON(list_empty(&info->wr_opened_node)); 301 if (atomic_dec_and_test(&info->wr_opened_cnt)) 302 list_del_init(&info->wr_opened_node); 303} 304 305void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, 306 struct hmdfs_inode_info *info) 307{ 308 spin_lock(&conn->wr_opened_inode_lock); 309 hmdfs_remote_del_wr_opened_inode_nolock(info); 310 spin_unlock(&conn->wr_opened_inode_lock); 311} 312 313void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, 314 struct hmdfs_inode_info *info) 315{ 316 if (list_empty(&info->wr_opened_node)) { 317 atomic_set(&info->wr_opened_cnt, 1); 318 list_add_tail(&info->wr_opened_node, 319 &conn->wr_opened_inode_list); 320 } else { 321 atomic_inc(&info->wr_opened_cnt); 322 } 323} 324 325static void hmdfs_remote_add_wr_opened_inode(struct hmdfs_peer *conn, 326 struct hmdfs_inode_info *info) 327{ 328 spin_lock(&conn->wr_opened_inode_lock); 329 hmdfs_remote_add_wr_opened_inode_nolock(conn, info); 330 spin_unlock(&conn->wr_opened_inode_lock); 331} 332 333int hmdfs_file_open_remote(struct inode *inode, struct file *file) 334{ 335 struct hmdfs_inode_info *info = hmdfs_i(inode); 336 struct kref *ref = &(info->ref); 337 int err = 0; 338 339 inode_lock(inode); 340 if (kref_read(ref) == 0) { 341 err = hmdfs_do_open_remote(file, false); 342 if (err == 0) 343 kref_init(ref); 344 } else { 345 kref_get(ref); 346 } 347 inode_unlock(inode); 348 349 if (!err && hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), 350 file->f_mode)) 351 hmdfs_remote_add_wr_opened_inode(info->conn, info); 352 353 return err; 354} 355 356static void hmdfs_set_writecache_expire(struct hmdfs_inode_info *info, 357 unsigned int seconds) 358{ 359 unsigned long new_expire = jiffies + (unsigned long)seconds * HZ; 360 361 /* 362 * When file has been written before closing, set pagecache expire 363 * if it has not been set yet. This is necessary because ctime might 364 * stay the same after overwrite. 365 */ 366 if (info->writecache_expire && 367 time_after(new_expire, info->writecache_expire)) 368 return; 369 370 info->writecache_expire = new_expire; 371} 372 373static void hmdfs_remote_keep_writecache(struct inode *inode, struct file *file) 374{ 375 struct hmdfs_inode_info *info = NULL; 376 struct kref *ref = NULL; 377 struct hmdfs_getattr_ret *getattr_ret = NULL; 378 unsigned int write_cache_timeout = 379 hmdfs_sb(inode->i_sb)->write_cache_timeout; 380 int err; 381 382 if (!write_cache_timeout) 383 return; 384 385 info = hmdfs_i(inode); 386 ref = &(info->ref); 387 /* 388 * don't do anything if file is still opening or file hasn't been 389 * written. 390 */ 391 if (kref_read(ref) > 0 || !atomic64_read(&info->write_counter)) 392 return; 393 394 /* 395 * If remote getattr failed, and we don't update ctime, 396 * pagecache will be truncated the next time file is opened. 397 */ 398 err = hmdfs_remote_getattr(info->conn, file_dentry(file), 0, 399 &getattr_ret); 400 if (err) { 401 hmdfs_err("remote getattr failed with err %d", err); 402 return; 403 } 404 405 if (!(getattr_ret->stat.result_mask & STATX_CTIME)) { 406 hmdfs_err("get remote ctime failed with mask 0x%x", 407 getattr_ret->stat.result_mask); 408 kfree(getattr_ret); 409 return; 410 } 411 /* 412 * update ctime from remote, in case that pagecahe will be 413 * truncated in next open. 414 */ 415 inode->i_ctime = getattr_ret->stat.ctime; 416 info->remote_ctime = getattr_ret->stat.ctime; 417 hmdfs_set_writecache_expire(info, write_cache_timeout); 418 kfree(getattr_ret); 419} 420 421int hmdfs_file_release_remote(struct inode *inode, struct file *file) 422{ 423 struct hmdfs_inode_info *info = hmdfs_i(inode); 424 425 if (hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), file->f_mode)) 426 hmdfs_remote_del_wr_opened_inode(info->conn, info); 427 428 inode_lock(inode); 429 kref_put(&info->ref, hmdfs_do_close_remote); 430 hmdfs_remote_keep_writecache(inode, file); 431 inode_unlock(inode); 432 433 return 0; 434} 435 436static int hmdfs_file_flush(struct file *file, fl_owner_t id) 437{ 438 int err = 0; 439 struct inode *inode = file_inode(file); 440 441 if (!(file->f_mode & FMODE_WRITE)) 442 return 0; 443 444 /* 445 * Continue regardless of whether file reopen fails or not, 446 * because there may be no dirty page. 447 */ 448 hmdfs_remote_check_and_reopen(hmdfs_i(inode), file); 449 450 /* 451 * Wait for wsem here would impact the performance greatly, so we 452 * overlap the time to issue as many wbs as we can, expecting async 453 * wbs are eliminated afterwards. 454 */ 455 filemap_fdatawrite(inode->i_mapping); 456 down_write(&hmdfs_i(inode)->wpage_sem); 457 err = filemap_write_and_wait(inode->i_mapping); 458 up_write(&hmdfs_i(inode)->wpage_sem); 459 return err; 460} 461 462static ssize_t hmdfs_file_read_iter_remote(struct kiocb *iocb, 463 struct iov_iter *iter) 464{ 465 struct file *filp = iocb->ki_filp; 466 struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); 467 struct file_ra_state *ra = NULL; 468 unsigned int rtt; 469 int err; 470 bool tried = false; 471 472retry: 473 err = hmdfs_remote_check_and_reopen(info, filp); 474 if (err) 475 return err; 476 477 ra = &filp->f_ra; 478 /* rtt is measured in 10 msecs */ 479 rtt = hmdfs_tcpi_rtt(info->conn) / 10000; 480 switch (rtt) { 481 case 0: 482 break; 483 case 1: 484 ra->ra_pages = 256; 485 break; 486 case 2: 487 ra->ra_pages = 512; 488 break; 489 default: 490 ra->ra_pages = 1024; 491 break; 492 } 493 494 err = generic_file_read_iter(iocb, iter); 495 if (err < 0 && !tried && hmdfs_remote_need_reopen(info)) { 496 /* Read from a stale fid, try read again once. */ 497 tried = true; 498 goto retry; 499 } 500 501 return err; 502} 503 504static inline bool hmdfs_is_file_unwritable(const struct hmdfs_inode_info *info, 505 bool check_stash) 506{ 507 return (check_stash && hmdfs_inode_is_stashing(info)) || 508 !hmdfs_is_node_online(info->conn); 509} 510 511static ssize_t __hmdfs_file_write_iter_remote(struct kiocb *iocb, 512 struct iov_iter *iter, 513 bool check_stash) 514{ 515 struct file *filp = iocb->ki_filp; 516 struct inode *inode = file_inode(filp); 517 struct hmdfs_inode_info *info = hmdfs_i(inode); 518 ssize_t ret; 519 520 if (hmdfs_is_file_unwritable(info, check_stash)) 521 return -EAGAIN; 522 523 ret = hmdfs_remote_check_and_reopen(info, filp); 524 if (ret) 525 return ret; 526 527 inode_lock(inode); 528 if (hmdfs_is_file_unwritable(info, check_stash)) { 529 ret = -EAGAIN; 530 goto out; 531 } 532 ret = generic_write_checks(iocb, iter); 533 if (ret > 0) 534 ret = __generic_file_write_iter(iocb, iter); 535out: 536 inode_unlock(inode); 537 538 if (ret > 0) 539 ret = generic_write_sync(iocb, ret); 540 return ret; 541} 542 543ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, 544 struct iov_iter *iter) 545{ 546 return __hmdfs_file_write_iter_remote(iocb, iter, false); 547} 548 549static ssize_t hmdfs_file_write_iter_remote(struct kiocb *iocb, 550 struct iov_iter *iter) 551{ 552 return __hmdfs_file_write_iter_remote(iocb, iter, true); 553} 554 555/* hmdfs not support mmap write remote file */ 556static vm_fault_t hmdfs_page_mkwrite(struct vm_fault *vmf) 557{ 558 return VM_FAULT_SIGBUS; 559} 560 561static const struct vm_operations_struct hmdfs_file_vm_ops = { 562 .fault = filemap_fault, 563 .map_pages = filemap_map_pages, 564 .page_mkwrite = hmdfs_page_mkwrite, 565}; 566 567static int hmdfs_file_mmap_remote(struct file *file, struct vm_area_struct *vma) 568{ 569 vma->vm_ops = &hmdfs_file_vm_ops; 570 file_accessed(file); 571 572 return 0; 573} 574 575static int hmdfs_file_fsync_remote(struct file *file, loff_t start, loff_t end, 576 int datasync) 577{ 578 struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); 579 struct hmdfs_peer *conn = info->conn; 580 struct hmdfs_fid fid; 581 int err; 582 583 trace_hmdfs_fsync_enter_remote(conn->sbi, conn->device_id, 584 info->remote_ino, datasync); 585 /* 586 * Continue regardless of whether file reopen fails or not, 587 * because there may be no dirty page. 588 */ 589 hmdfs_remote_check_and_reopen(info, file); 590 591 filemap_fdatawrite(file->f_mapping); 592 down_write(&info->wpage_sem); 593 err = file_write_and_wait_range(file, start, end); 594 up_write(&info->wpage_sem); 595 if (err) { 596 hmdfs_err("local fsync fail with %d", err); 597 goto out; 598 } 599 600 hmdfs_remote_fetch_fid(info, &fid); 601 err = hmdfs_send_fsync(conn, &fid, start, end, datasync); 602 if (err) 603 hmdfs_err("send fsync fail with %d", err); 604 605out: 606 trace_hmdfs_fsync_exit_remote(conn->sbi, conn->device_id, 607 info->remote_ino, 608 get_cmd_timeout(conn->sbi, F_FSYNC), err); 609 610 /* Compatible with POSIX retcode */ 611 if (err == -ETIME) 612 err = -EIO; 613 614 return err; 615} 616 617const struct file_operations hmdfs_dev_file_fops_remote = { 618 .owner = THIS_MODULE, 619 .llseek = generic_file_llseek, 620 .read_iter = hmdfs_file_read_iter_remote, 621 .write_iter = hmdfs_file_write_iter_remote, 622 .mmap = hmdfs_file_mmap_remote, 623 .open = hmdfs_file_open_remote, 624 .release = hmdfs_file_release_remote, 625 .flush = hmdfs_file_flush, 626 .fsync = hmdfs_file_fsync_remote, 627 .splice_read = generic_file_splice_read, 628 .splice_write = iter_file_splice_write, 629}; 630 631static void hmdfs_fill_page_zero(struct page *page) 632{ 633 void *addr = NULL; 634 635 addr = kmap(page); 636 memset(addr, 0, PAGE_SIZE); 637 kunmap(page); 638 SetPageUptodate(page); 639 unlock_page(page); 640} 641 642static int hmdfs_readpage_remote(struct file *file, struct page *page) 643{ 644 struct inode *inode = file_inode(file); 645 struct hmdfs_inode_info *info = hmdfs_i(inode); 646 loff_t isize = i_size_read(inode); 647 pgoff_t end_index = (isize - 1) >> PAGE_SHIFT; 648 struct hmdfs_fid fid; 649 650 if (!isize || page->index > end_index) { 651 hmdfs_fill_page_zero(page); 652 return 0; 653 } 654 655 if (!isize || page->index > end_index) { 656 hmdfs_fill_page_zero(page); 657 return 0; 658 } 659 660 hmdfs_remote_fetch_fid(info, &fid); 661 return hmdfs_client_readpage(info->conn, &fid, page); 662} 663 664uint32_t hmdfs_get_writecount(struct page *page) 665{ 666 uint32_t count = 0; 667 loff_t pos = (loff_t)page->index << HMDFS_PAGE_OFFSET; 668 struct inode *inode = page->mapping->host; 669 loff_t size = i_size_read(inode); 670 /* 671 * If page offset is greater than i_size, this is possible when 672 * writepage concurrent with truncate. In this case, we don't need to 673 * do remote writepage since it'll be truncated after the page is 674 * unlocked. 675 */ 676 if (pos >= size) 677 count = 0; 678 /* 679 * If the page about to write is beyond i_size, we can't write beyond 680 * i_size because remote file size will be wrong. 681 */ 682 else if (size < pos + HMDFS_PAGE_SIZE) 683 count = size - pos; 684 /* It's safe to write the whole page */ 685 else 686 count = HMDFS_PAGE_SIZE; 687 688 return count; 689} 690 691static bool allow_cur_thread_wpage(struct hmdfs_inode_info *info, 692 bool *rsem_held, bool sync_all) 693{ 694 WARN_ON(!rsem_held); 695 696 if (sync_all) { 697 *rsem_held = false; 698 return true; 699 } 700 *rsem_held = down_read_trylock(&info->wpage_sem); 701 return *rsem_held; 702} 703 704/** 705 * hmdfs_writepage_remote - writeback a dirty page to remote 706 * 707 * INFO: 708 * When asked to WB_SYNC_ALL, this function should leave with both the page and 709 * the radix tree node clean to achieve close-to-open consitency. Moreover, 710 * this shall never return -EIO to help filemap to iterate all dirty pages. 711 * 712 * INFO: 713 * When asked to WB_SYNC_NONE, this function should be mercy if faults(oom or 714 * bad pipe) happended to enable subsequent r/w & wb. 715 */ 716static int hmdfs_writepage_remote(struct page *page, 717 struct writeback_control *wbc) 718{ 719 struct inode *inode = page->mapping->host; 720 struct hmdfs_inode_info *info = hmdfs_i(inode); 721 struct hmdfs_sb_info *sbi = hmdfs_sb(inode->i_sb); 722 int ret = 0; 723 bool rsem_held = false; 724 bool sync = wbc->sync_mode == WB_SYNC_ALL; 725 struct hmdfs_writepage_context *param = NULL; 726 727 if (!allow_cur_thread_wpage(info, &rsem_held, sync)) 728 goto out_unlock; 729 730 set_page_writeback(page); 731 732 param = kzalloc(sizeof(*param), GFP_NOFS); 733 if (!param) { 734 ret = -ENOMEM; 735 goto out_endwb; 736 } 737 738 if (sync && hmdfs_usr_sig_pending(current)) { 739 ClearPageUptodate(page); 740 goto out_free; 741 } 742 param->count = hmdfs_get_writecount(page); 743 if (!param->count) 744 goto out_free; 745 param->rsem_held = rsem_held; 746 hmdfs_remote_fetch_fid(info, ¶m->fid); 747 param->sync_all = sync; 748 param->caller = current; 749 get_task_struct(current); 750 param->page = page; 751 param->timeout = jiffies + msecs_to_jiffies(sbi->wb_timeout_ms); 752 INIT_DELAYED_WORK(¶m->retry_dwork, hmdfs_remote_writepage_retry); 753 ret = hmdfs_remote_do_writepage(info->conn, param); 754 if (likely(!ret)) 755 return 0; 756 757 put_task_struct(current); 758out_free: 759 kfree(param); 760out_endwb: 761 end_page_writeback(page); 762 if (rsem_held) 763 up_read(&info->wpage_sem); 764out_unlock: 765 if (sync || !hmdfs_need_redirty_page(info, ret)) { 766 SetPageError(page); 767 mapping_set_error(page->mapping, ret); 768 } else { 769 redirty_page_for_writepage(wbc, page); 770 } 771 unlock_page(page); 772 return ret; 773} 774 775static void hmdfs_account_dirty_pages(struct address_space *mapping) 776{ 777 struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; 778 779 if (!sbi->h_wb->dirty_writeback_control) 780 return; 781 782 this_cpu_inc(*sbi->h_wb->bdp_ratelimits); 783} 784 785static int hmdfs_write_begin_remote(struct file *file, 786 struct address_space *mapping, loff_t pos, 787 unsigned int len, unsigned int flags, 788 struct page **pagep, void **fsdata) 789{ 790 pgoff_t index = ((unsigned long long)pos) >> PAGE_SHIFT; 791 struct inode *inode = file_inode(file); 792 struct page *page = NULL; 793 int ret = 0; 794 795start: 796 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 797 if (!page) 798 return -ENOMEM; 799 *pagep = page; 800 wait_on_page_writeback(page); 801 802 // If this page will be covered completely. 803 if (len == HMDFS_PAGE_SIZE || PageUptodate(page)) 804 return 0; 805 806 /* 807 * If data existed in this page will covered, 808 * we just need to clear this page. 809 */ 810 if (!((unsigned long long)pos & (HMDFS_PAGE_SIZE - 1)) && 811 (pos + len) >= i_size_read(inode)) { 812 zero_user_segment(page, len, HMDFS_PAGE_SIZE); 813 return 0; 814 } 815 /* 816 * We need readpage before write date to this page. 817 */ 818 ret = hmdfs_readpage_remote(file, page); 819 if (!ret) { 820 if (PageLocked(page)) { 821 ret = __lock_page_killable(page); 822 if (!ret) 823 unlock_page(page); 824 } 825 826 if (!ret && PageUptodate(page)) { 827 put_page(page); 828 goto start; 829 } 830 if (!ret) 831 ret = -EIO; 832 } 833 put_page(page); 834 return ret; 835} 836 837static int hmdfs_write_end_remote(struct file *file, 838 struct address_space *mapping, loff_t pos, 839 unsigned int len, unsigned int copied, 840 struct page *page, void *fsdata) 841{ 842 struct inode *inode = page->mapping->host; 843 844 if (!PageUptodate(page)) { 845 if (unlikely(copied != len)) 846 copied = 0; 847 else 848 SetPageUptodate(page); 849 } 850 if (!copied) 851 goto unlock_out; 852 853 if (!PageDirty(page)) { 854 hmdfs_account_dirty_pages(mapping); 855 set_page_dirty(page); 856 } 857 858 if (pos + copied > i_size_read(inode)) { 859 i_size_write(inode, pos + copied); 860 hmdfs_i(inode)->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; 861 } 862unlock_out: 863 unlock_page(page); 864 put_page(page); 865 866 /* hmdfs private writeback control */ 867 hmdfs_balance_dirty_pages_ratelimited(mapping); 868 return copied; 869} 870 871const struct address_space_operations hmdfs_dev_file_aops_remote = { 872 .readpage = hmdfs_readpage_remote, 873 .write_begin = hmdfs_write_begin_remote, 874 .write_end = hmdfs_write_end_remote, 875 .writepage = hmdfs_writepage_remote, 876 .set_page_dirty = __set_page_dirty_nobuffers, 877}; 878 879loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, 880 unsigned long offset) 881{ 882 loff_t pos; 883 884 pos = ((loff_t)dev_id << (POS_BIT_NUM - 1 - DEV_ID_BIT_NUM)) + 885 ((loff_t)group_id << OFFSET_BIT_NUM) + offset; 886 if (dev_id) 887 pos |= ((loff_t)1 << (POS_BIT_NUM - 1)); 888 return pos; 889} 890 891int analysis_dentry_file_from_con(struct hmdfs_sb_info *sbi, 892 struct file *file, 893 struct file *handler, 894 struct dir_context *ctx) 895{ 896 struct hmdfs_dentry_group *dentry_group = NULL; 897 loff_t pos = ctx->pos; 898 unsigned long dev_id = (unsigned long)((pos << 1) >> (POS_BIT_NUM - DEV_ID_BIT_NUM)); 899 unsigned long group_id = (unsigned long)((pos << (1 + DEV_ID_BIT_NUM)) >> 900 (POS_BIT_NUM - GROUP_ID_BIT_NUM)); 901 loff_t offset = pos & OFFSET_BIT_MASK; 902 int group_num = 0; 903 char *dentry_name = NULL; 904 int iterate_result = 0; 905 int i, j; 906 907 dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); 908 909 if (!dentry_group) 910 return -ENOMEM; 911 912 if (IS_ERR_OR_NULL(handler)) { 913 kfree(dentry_group); 914 return -ENOENT; 915 } 916 917 group_num = get_dentry_group_cnt(file_inode(handler)); 918 dentry_name = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); 919 if (!dentry_name) { 920 kfree(dentry_group); 921 return -ENOMEM; 922 } 923 924 for (i = group_id; i < group_num; i++) { 925 int ret = hmdfs_metainfo_read(sbi, handler, dentry_group, 926 sizeof(struct hmdfs_dentry_group), 927 i); 928 if (ret != sizeof(struct hmdfs_dentry_group)) { 929 hmdfs_err("read dentry group failed ret:%d", ret); 930 goto done; 931 } 932 933 for (j = offset; j < DENTRY_PER_GROUP; j++) { 934 int len; 935 int file_type = DT_UNKNOWN; 936 bool is_continue; 937 938 len = le16_to_cpu(dentry_group->nsl[j].namelen); 939 if (!test_bit_le(j, dentry_group->bitmap) || len == 0) 940 continue; 941 942 memset(dentry_name, 0, DENTRY_NAME_MAX_LEN); 943 // TODO: Support more file_type 944 if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) 945 file_type = DT_DIR; 946 else if (S_ISREG(le16_to_cpu( 947 dentry_group->nsl[j].i_mode))) 948 file_type = DT_REG; 949 else if (S_ISLNK(le16_to_cpu( 950 dentry_group->nsl[j].i_mode))) 951 file_type = DT_LNK; 952 953 strncat(dentry_name, dentry_group->filename[j], len); 954 pos = hmdfs_set_pos(dev_id, i, j); 955 is_continue = 956 dir_emit(ctx, dentry_name, len, 957 pos + INUNUMBER_START, file_type); 958 if (!is_continue) { 959 ctx->pos = pos; 960 iterate_result = 1; 961 goto done; 962 } 963 } 964 offset = 0; 965 } 966 967done: 968 kfree(dentry_name); 969 kfree(dentry_group); 970 return iterate_result; 971} 972 973int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, 974 struct dir_context *ctx) 975{ 976 int iterate_result = 0; 977 978 iterate_result = analysis_dentry_file_from_con( 979 con->sbi, file, file->private_data, ctx); 980 return iterate_result; 981} 982 983static int hmdfs_iterate_remote(struct file *file, struct dir_context *ctx) 984{ 985 int err = 0; 986 loff_t start_pos = ctx->pos; 987 struct hmdfs_peer *con = NULL; 988 struct hmdfs_dentry_info *di = hmdfs_d(file->f_path.dentry); 989 bool is_local = !((ctx->pos) >> (POS_BIT_NUM - 1)); 990 uint64_t dev_id = di->device_id; 991 992 if (ctx->pos == -1) 993 return 0; 994 if (is_local) 995 ctx->pos = hmdfs_set_pos(dev_id, 0, 0); 996 997 con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); 998 if (con) { 999 // ctx->pos = 0; 1000 err = hmdfs_dev_readdir_from_con(con, file, ctx); 1001 if (unlikely(!con)) { 1002 hmdfs_err("con is null"); 1003 goto done; 1004 } 1005 peer_put(con); 1006 if (err) 1007 goto done; 1008 } 1009 1010done: 1011 if (err <= 0) 1012 ctx->pos = -1; 1013 1014 trace_hmdfs_iterate_remote(file->f_path.dentry, start_pos, ctx->pos, 1015 err); 1016 return err; 1017} 1018 1019int hmdfs_dir_open_remote(struct inode *inode, struct file *file) 1020{ 1021 struct hmdfs_inode_info *info = hmdfs_i(inode); 1022 struct clearcache_item *cache_item = NULL; 1023 1024 if (info->conn) { 1025 if (!hmdfs_cache_revalidate(READ_ONCE(info->conn->conn_time), 1026 info->conn->device_id, 1027 file->f_path.dentry)) 1028 get_remote_dentry_file_sync(file->f_path.dentry, 1029 info->conn); 1030 cache_item = hmdfs_find_cache_item(info->conn->device_id, 1031 file->f_path.dentry); 1032 if (cache_item) { 1033 file->private_data = cache_item->filp; 1034 get_file(file->private_data); 1035 kref_put(&cache_item->ref, release_cache_item); 1036 return 0; 1037 } 1038 return -ENOENT; 1039 } 1040 return -ENOENT; 1041} 1042 1043static int hmdfs_dir_release_remote(struct inode *inode, struct file *file) 1044{ 1045 if (file->private_data) 1046 fput(file->private_data); 1047 file->private_data = NULL; 1048 return 0; 1049} 1050 1051const struct file_operations hmdfs_dev_dir_ops_remote = { 1052 .owner = THIS_MODULE, 1053 .iterate = hmdfs_iterate_remote, 1054 .open = hmdfs_dir_open_remote, 1055 .release = hmdfs_dir_release_remote, 1056 .fsync = __generic_file_fsync, 1057}; 1058