1// SPDX-License-Identifier: GPL-2.0 2/* 3 * fs/hmdfs/file_remote.c 4 * 5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd. 6 */ 7 8#include <linux/backing-dev.h> 9#include <linux/file.h> 10#include <linux/fs.h> 11#include <linux/namei.h> 12#include <linux/page-flags.h> 13#include <linux/pagemap.h> 14#include <linux/pagevec.h> 15#include <linux/sched/signal.h> 16#include <linux/slab.h> 17#include <linux/wait.h> 18 19#include "file_remote.h" 20 21#include "comm/socket_adapter.h" 22#include "hmdfs.h" 23#include "hmdfs_client.h" 24#include "hmdfs_dentryfile.h" 25#include "hmdfs_trace.h" 26 27static inline bool hmdfs_remote_write_cache_expired( 28 struct hmdfs_inode_info *info) 29{ 30 return time_after(jiffies, info->writecache_expire); 31} 32 33enum expire_reason { 34 ALL_GOOD = 0, 35 INO_DISMATCH = 1, 36 SIZE_OR_CTIME_DISMATCH = 2, 37 TIMER_EXPIRE = 3, 38 TIMER_WORKING = 4, 39 STABLE_CTIME_DISMATCH = 5, 40 KEEP_CACHE = 6, 41}; 42 43/* 44 * hmdfs_open_final_remote - Do final steps of opening a remote file, update 45 * local inode cache and decide whether of not to truncate inode pages. 46 * 47 * @info: hmdfs inode info 48 * @open_ret: values returned from remote when opening a remote file 49 * @keep_cache: keep local cache & i_size 50 */ 51static int hmdfs_open_final_remote(struct hmdfs_inode_info *info, 52 struct hmdfs_open_ret *open_ret, 53 struct file *file, bool keep_cache) 54{ 55 struct inode *inode = &info->vfs_inode; 56 bool truncate = false; 57 enum expire_reason reason = ALL_GOOD; 58 int ret = 0; 59 60 /* 61 * if remote inode number changed and lookup stale data, we'll return 62 * -ESTALE, and reopen the file with metedate from remote getattr. 63 */ 64 if (info->remote_ino != open_ret->ino) { 65 hmdfs_debug( 66 "got stale local inode, ino in local %llu, ino from open %llu", 67 info->remote_ino, open_ret->ino); 68 hmdfs_send_close(info->conn, &open_ret->fid); 69 reason = INO_DISMATCH; 70 ret = -ESTALE; 71 goto out; 72 } 73 74 if (keep_cache) { 75 reason = KEEP_CACHE; 76 trace_hmdfs_open_final_remote(info, open_ret, file, reason); 77 goto set_fid_out; 78 } 79 80 /* 81 * if remote size do not match local inode, or remote ctime do not match 82 * the last time same file was opened. 83 */ 84 if (inode->i_size != open_ret->file_size || 85 hmdfs_time_compare(&info->remote_ctime, &open_ret->remote_ctime)) { 86 truncate = true; 87 reason = SIZE_OR_CTIME_DISMATCH; 88 goto out; 89 } 90 91 /* 92 * If 'writecache_expire' is set, check if it expires. And skip the 93 * checking of stable_ctime. 94 */ 95 if (info->writecache_expire) { 96 truncate = hmdfs_remote_write_cache_expired(info); 97 if (truncate) 98 reason = TIMER_EXPIRE; 99 else 100 reason = TIMER_WORKING; 101 goto out; 102 } 103 104 /* the first time, or remote ctime is ahead of remote time */ 105 if (info->stable_ctime.tv_sec == 0 && info->stable_ctime.tv_nsec == 0) { 106 truncate = true; 107 reason = STABLE_CTIME_DISMATCH; 108 goto out; 109 } 110 111 /* 112 * - if last stable_ctime == stable_ctime, we do nothing. 113 * a. if ctime < stable_ctime, data is ensured to be uptodate, 114 * b. if ctime == stable_ctime, stale data might be accessed. This is 115 * acceptable since pagecache will be dropped later. 116 * c. ctime > stable_ctime is impossible. 117 * - if last stable_ctime < stable_ctime, we clear the cache. 118 * d. ctime != last stable_ctime is impossible 119 * e. ctime == last stable_ctime, this is possible to read again from 120 * b, thus we need to drop the cache. 121 * - if last stable_ctime > stable_ctime, we clear the cache. 122 * stable_ctime must be zero in this case, this is possible because 123 * system time might be changed. 124 */ 125 if (hmdfs_time_compare(&info->stable_ctime, &open_ret->stable_ctime)) { 126 truncate = true; 127 reason = STABLE_CTIME_DISMATCH; 128 goto out; 129 } 130 131out: 132 trace_hmdfs_open_final_remote(info, open_ret, file, reason); 133 if (ret) 134 return ret; 135 136 if (reason == SIZE_OR_CTIME_DISMATCH) { 137 inode->__i_ctime = open_ret->remote_ctime; 138 info->remote_ctime = open_ret->remote_ctime; 139 } 140 141 if (truncate) { 142 info->writecache_expire = 0; 143 truncate_inode_pages(inode->i_mapping, 0); 144 } 145 146 atomic64_set(&info->write_counter, 0); 147 info->stable_ctime = open_ret->stable_ctime; 148 i_size_write(inode, open_ret->file_size); 149 info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; 150set_fid_out: 151 spin_lock(&info->fid_lock); 152 info->fid = open_ret->fid; 153 spin_unlock(&info->fid_lock); 154 return 0; 155} 156 157int hmdfs_do_open_remote(struct file *file, bool keep_cache) 158{ 159 struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); 160 struct hmdfs_peer *conn = info->conn; 161 struct hmdfs_open_ret open_ret; 162 __u8 file_type = hmdfs_d(file->f_path.dentry)->file_type; 163 char *send_buf; 164 int err = 0; 165 166 send_buf = hmdfs_get_dentry_relative_path(file->f_path.dentry); 167 if (!send_buf) { 168 err = -ENOMEM; 169 goto out_free; 170 } 171 err = hmdfs_send_open(conn, send_buf, file_type, &open_ret); 172 if (err) { 173 hmdfs_err("hmdfs_send_open return failed with %d", err); 174 goto out_free; 175 } 176 177 err = hmdfs_open_final_remote(info, &open_ret, file, keep_cache); 178 179out_free: 180 kfree(send_buf); 181 return err; 182} 183 184static inline bool hmdfs_remote_need_reopen(struct hmdfs_inode_info *info) 185{ 186 return test_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); 187} 188 189static inline bool hmdfs_remote_is_opening_file(struct hmdfs_inode_info *info) 190{ 191 return test_bit(HMDFS_FID_OPENING, &info->fid_flags); 192} 193 194static int hmdfs_remote_wait_opening_file(struct hmdfs_inode_info *info) 195{ 196 int err; 197 198 if (!hmdfs_remote_is_opening_file(info)) 199 return 0; 200 201 err = ___wait_event(info->fid_wq, hmdfs_remote_is_opening_file(info), 202 TASK_INTERRUPTIBLE, 0, 0, 203 spin_unlock(&info->fid_lock); 204 schedule(); 205 spin_lock(&info->fid_lock)); 206 if (err) 207 err = -EINTR; 208 209 return err; 210} 211 212static int hmdfs_remote_file_reopen(struct hmdfs_inode_info *info, 213 struct file *filp) 214{ 215 int err = 0; 216 struct hmdfs_peer *conn = info->conn; 217 struct inode *inode = NULL; 218 struct hmdfs_fid fid; 219 220 if (conn->status == NODE_STAT_OFFLINE) 221 return -EAGAIN; 222 223 spin_lock(&info->fid_lock); 224 err = hmdfs_remote_wait_opening_file(info); 225 if (err || !hmdfs_remote_need_reopen(info)) { 226 spin_unlock(&info->fid_lock); 227 goto out; 228 } 229 230 set_bit(HMDFS_FID_OPENING, &info->fid_flags); 231 fid = info->fid; 232 spin_unlock(&info->fid_lock); 233 234 inode = &info->vfs_inode; 235 inode_lock(inode); 236 /* 237 * Most closing cases are meaningless, except for one: 238 * read process A read process B 239 * err = -EBADF err = -EBADF (caused by re-online) 240 * set_need_reopen 241 * do reopen 242 * fid = new fid_1 [server hold fid_1] 243 * set need_reopen 244 * do reopen 245 * send close (fid_1) // In case of leak 246 * fid = new fid_2 247 */ 248 if (fid.id != HMDFS_INODE_INVALID_FILE_ID) 249 hmdfs_send_close(conn, &fid); 250 err = hmdfs_do_open_remote(filp, true); 251 inode_unlock(inode); 252 253 spin_lock(&info->fid_lock); 254 /* 255 * May make the bit set in offline handler lost, but server 256 * will tell us whether or not the newly-opened file id is 257 * generated before offline, if it is opened before offline, 258 * the operation on the file id will return -EBADF and 259 * HMDFS_FID_NEED_OPEN bit will be set again. 260 */ 261 if (!err) 262 clear_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); 263 clear_bit(HMDFS_FID_OPENING, &info->fid_flags); 264 spin_unlock(&info->fid_lock); 265 266 wake_up_interruptible_all(&info->fid_wq); 267out: 268 return err; 269} 270 271static int hmdfs_remote_check_and_reopen(struct hmdfs_inode_info *info, 272 struct file *filp) 273{ 274 if (!hmdfs_remote_need_reopen(info)) 275 return 0; 276 277 return hmdfs_remote_file_reopen(info, filp); 278} 279 280void hmdfs_do_close_remote(struct kref *kref) 281{ 282 struct hmdfs_inode_info *info = 283 container_of(kref, struct hmdfs_inode_info, ref); 284 struct hmdfs_fid fid; 285 286 hmdfs_remote_fetch_fid(info, &fid); 287 /* This function can return asynchronously */ 288 hmdfs_send_close(info->conn, &fid); 289} 290 291static inline bool hmdfs_remote_need_track_file(const struct hmdfs_sb_info *sbi, 292 fmode_t mode) 293{ 294 return (hmdfs_is_stash_enabled(sbi) && (mode & FMODE_WRITE)); 295} 296 297static void 298hmdfs_remote_del_wr_opened_inode_nolock(struct hmdfs_inode_info *info) 299{ 300 WARN_ON(list_empty(&info->wr_opened_node)); 301 if (atomic_dec_and_test(&info->wr_opened_cnt)) 302 list_del_init(&info->wr_opened_node); 303} 304 305void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, 306 struct hmdfs_inode_info *info) 307{ 308 spin_lock(&conn->wr_opened_inode_lock); 309 hmdfs_remote_del_wr_opened_inode_nolock(info); 310 spin_unlock(&conn->wr_opened_inode_lock); 311} 312 313void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, 314 struct hmdfs_inode_info *info) 315{ 316 if (list_empty(&info->wr_opened_node)) { 317 atomic_set(&info->wr_opened_cnt, 1); 318 list_add_tail(&info->wr_opened_node, 319 &conn->wr_opened_inode_list); 320 } else { 321 atomic_inc(&info->wr_opened_cnt); 322 } 323} 324 325static void hmdfs_remote_add_wr_opened_inode(struct hmdfs_peer *conn, 326 struct hmdfs_inode_info *info) 327{ 328 spin_lock(&conn->wr_opened_inode_lock); 329 hmdfs_remote_add_wr_opened_inode_nolock(conn, info); 330 spin_unlock(&conn->wr_opened_inode_lock); 331} 332 333int hmdfs_file_open_remote(struct inode *inode, struct file *file) 334{ 335 struct hmdfs_inode_info *info = hmdfs_i(inode); 336 struct kref *ref = &(info->ref); 337 int err = 0; 338 339 inode_lock(inode); 340 if (kref_read(ref) == 0) { 341 err = hmdfs_do_open_remote(file, false); 342 if (err == 0) 343 kref_init(ref); 344 } else { 345 kref_get(ref); 346 } 347 inode_unlock(inode); 348 349 if (!err && hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), 350 file->f_mode)) 351 hmdfs_remote_add_wr_opened_inode(info->conn, info); 352 353 return err; 354} 355 356static void hmdfs_set_writecache_expire(struct hmdfs_inode_info *info, 357 unsigned int seconds) 358{ 359 unsigned long new_expire = jiffies + (unsigned long)seconds * HZ; 360 361 /* 362 * When file has been written before closing, set pagecache expire 363 * if it has not been set yet. This is necessary because ctime might 364 * stay the same after overwrite. 365 */ 366 if (info->writecache_expire && 367 time_after(new_expire, info->writecache_expire)) 368 return; 369 370 info->writecache_expire = new_expire; 371} 372 373static void hmdfs_remote_keep_writecache(struct inode *inode, struct file *file) 374{ 375 struct hmdfs_inode_info *info = NULL; 376 struct kref *ref = NULL; 377 struct hmdfs_getattr_ret *getattr_ret = NULL; 378 unsigned int write_cache_timeout = 379 hmdfs_sb(inode->i_sb)->write_cache_timeout; 380 int err; 381 382 if (!write_cache_timeout) 383 return; 384 385 info = hmdfs_i(inode); 386 ref = &(info->ref); 387 /* 388 * don't do anything if file is still opening or file hasn't been 389 * written. 390 */ 391 if (kref_read(ref) > 0 || !atomic64_read(&info->write_counter)) 392 return; 393 394 /* 395 * If remote getattr failed, and we don't update ctime, 396 * pagecache will be truncated the next time file is opened. 397 */ 398 err = hmdfs_remote_getattr(info->conn, file_dentry(file), 0, 399 &getattr_ret); 400 if (err) { 401 hmdfs_err("remote getattr failed with err %d", err); 402 return; 403 } 404 405 if (!(getattr_ret->stat.result_mask & STATX_CTIME)) { 406 hmdfs_err("get remote ctime failed with mask 0x%x", 407 getattr_ret->stat.result_mask); 408 kfree(getattr_ret); 409 return; 410 } 411 /* 412 * update ctime from remote, in case that pagecahe will be 413 * truncated in next open. 414 */ 415 inode->__i_ctime = getattr_ret->stat.ctime; 416 info->remote_ctime = getattr_ret->stat.ctime; 417 hmdfs_set_writecache_expire(info, write_cache_timeout); 418 kfree(getattr_ret); 419} 420 421int hmdfs_file_release_remote(struct inode *inode, struct file *file) 422{ 423 struct hmdfs_inode_info *info = hmdfs_i(inode); 424 425 if (hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), file->f_mode)) 426 hmdfs_remote_del_wr_opened_inode(info->conn, info); 427 428 inode_lock(inode); 429 kref_put(&info->ref, hmdfs_do_close_remote); 430 hmdfs_remote_keep_writecache(inode, file); 431 inode_unlock(inode); 432 433 return 0; 434} 435 436static int hmdfs_file_flush(struct file *file, fl_owner_t id) 437{ 438 int err = 0; 439 struct inode *inode = file_inode(file); 440 441 if (!(file->f_mode & FMODE_WRITE)) 442 return 0; 443 444 /* 445 * Continue regardless of whether file reopen fails or not, 446 * because there may be no dirty page. 447 */ 448 hmdfs_remote_check_and_reopen(hmdfs_i(inode), file); 449 450 /* 451 * Wait for wsem here would impact the performance greatly, so we 452 * overlap the time to issue as many wbs as we can, expecting async 453 * wbs are eliminated afterwards. 454 */ 455 filemap_fdatawrite(inode->i_mapping); 456 down_write(&hmdfs_i(inode)->wpage_sem); 457 err = filemap_write_and_wait(inode->i_mapping); 458 up_write(&hmdfs_i(inode)->wpage_sem); 459 return err; 460} 461 462static ssize_t hmdfs_file_read_iter_remote(struct kiocb *iocb, 463 struct iov_iter *iter) 464{ 465 struct file *filp = iocb->ki_filp; 466 struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); 467 struct file_ra_state *ra = NULL; 468 unsigned int rtt; 469 int err; 470 bool tried = false; 471 472retry: 473 err = hmdfs_remote_check_and_reopen(info, filp); 474 if (err) 475 return err; 476 477 ra = &filp->f_ra; 478 /* rtt is measured in 10 msecs */ 479 rtt = hmdfs_tcpi_rtt(info->conn) / 10000; 480 switch (rtt) { 481 case 0: 482 break; 483 case 1: 484 ra->ra_pages = 256; 485 break; 486 case 2: 487 ra->ra_pages = 512; 488 break; 489 default: 490 ra->ra_pages = 1024; 491 break; 492 } 493 494 err = generic_file_read_iter(iocb, iter); 495 if (err < 0 && !tried && hmdfs_remote_need_reopen(info)) { 496 /* Read from a stale fid, try read again once. */ 497 tried = true; 498 goto retry; 499 } 500 501 return err; 502} 503 504static inline bool hmdfs_is_file_unwritable(const struct hmdfs_inode_info *info, 505 bool check_stash) 506{ 507 return (check_stash && hmdfs_inode_is_stashing(info)) || 508 !hmdfs_is_node_online(info->conn); 509} 510 511static ssize_t __hmdfs_file_write_iter_remote(struct kiocb *iocb, 512 struct iov_iter *iter, 513 bool check_stash) 514{ 515 struct file *filp = iocb->ki_filp; 516 struct inode *inode = file_inode(filp); 517 struct hmdfs_inode_info *info = hmdfs_i(inode); 518 ssize_t ret; 519 520 if (hmdfs_is_file_unwritable(info, check_stash)) 521 return -EAGAIN; 522 523 ret = hmdfs_remote_check_and_reopen(info, filp); 524 if (ret) 525 return ret; 526 527 inode_lock(inode); 528 if (hmdfs_is_file_unwritable(info, check_stash)) { 529 ret = -EAGAIN; 530 goto out; 531 } 532 ret = generic_write_checks(iocb, iter); 533 if (ret > 0) 534 ret = __generic_file_write_iter(iocb, iter); 535out: 536 inode_unlock(inode); 537 538 if (ret > 0) 539 ret = generic_write_sync(iocb, ret); 540 return ret; 541} 542 543ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, 544 struct iov_iter *iter) 545{ 546 return __hmdfs_file_write_iter_remote(iocb, iter, false); 547} 548 549static ssize_t hmdfs_file_write_iter_remote(struct kiocb *iocb, 550 struct iov_iter *iter) 551{ 552 return __hmdfs_file_write_iter_remote(iocb, iter, true); 553} 554 555/* hmdfs not support mmap write remote file */ 556static vm_fault_t hmdfs_page_mkwrite(struct vm_fault *vmf) 557{ 558 return VM_FAULT_SIGBUS; 559} 560 561static const struct vm_operations_struct hmdfs_file_vm_ops = { 562 .fault = filemap_fault, 563 .map_pages = filemap_map_pages, 564 .page_mkwrite = hmdfs_page_mkwrite, 565}; 566 567static int hmdfs_file_mmap_remote(struct file *file, struct vm_area_struct *vma) 568{ 569 vma->vm_ops = &hmdfs_file_vm_ops; 570 file_accessed(file); 571 572 return 0; 573} 574 575static int hmdfs_file_fsync_remote(struct file *file, loff_t start, loff_t end, 576 int datasync) 577{ 578 struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); 579 struct hmdfs_peer *conn = info->conn; 580 struct hmdfs_fid fid; 581 int err; 582 583 trace_hmdfs_fsync_enter_remote(conn->sbi, conn->device_id, 584 info->remote_ino, datasync); 585 /* 586 * Continue regardless of whether file reopen fails or not, 587 * because there may be no dirty page. 588 */ 589 hmdfs_remote_check_and_reopen(info, file); 590 591 filemap_fdatawrite(file->f_mapping); 592 down_write(&info->wpage_sem); 593 err = file_write_and_wait_range(file, start, end); 594 up_write(&info->wpage_sem); 595 if (err) { 596 hmdfs_err("local fsync fail with %d", err); 597 goto out; 598 } 599 600 hmdfs_remote_fetch_fid(info, &fid); 601 err = hmdfs_send_fsync(conn, &fid, start, end, datasync); 602 if (err) 603 hmdfs_err("send fsync fail with %d", err); 604 605out: 606 trace_hmdfs_fsync_exit_remote(conn->sbi, conn->device_id, 607 info->remote_ino, 608 get_cmd_timeout(conn->sbi, F_FSYNC), err); 609 610 /* Compatible with POSIX retcode */ 611 if (err == -ETIME) 612 err = -EIO; 613 614 return err; 615} 616 617const struct file_operations hmdfs_dev_file_fops_remote = { 618 .owner = THIS_MODULE, 619 .llseek = generic_file_llseek, 620 .read_iter = hmdfs_file_read_iter_remote, 621 .write_iter = hmdfs_file_write_iter_remote, 622 .mmap = hmdfs_file_mmap_remote, 623 .open = hmdfs_file_open_remote, 624 .release = hmdfs_file_release_remote, 625 .flush = hmdfs_file_flush, 626 .fsync = hmdfs_file_fsync_remote, 627 .splice_read = copy_splice_read, 628 .splice_write = iter_file_splice_write, 629}; 630 631static void hmdfs_fill_page_zero(struct page *page) 632{ 633 void *addr = NULL; 634 635 addr = kmap(page); 636 memset(addr, 0, PAGE_SIZE); 637 kunmap(page); 638 SetPageUptodate(page); 639 unlock_page(page); 640} 641 642static int hmdfs_readpage_remote(struct file *file, struct page *page) 643{ 644 struct inode *inode = file_inode(file); 645 struct hmdfs_inode_info *info = hmdfs_i(inode); 646 loff_t isize = i_size_read(inode); 647 pgoff_t end_index = (isize - 1) >> PAGE_SHIFT; 648 struct hmdfs_fid fid; 649 650 if (!isize || page->index > end_index) { 651 hmdfs_fill_page_zero(page); 652 return 0; 653 } 654 655 if (!isize || page->index > end_index) { 656 hmdfs_fill_page_zero(page); 657 return 0; 658 } 659 660 hmdfs_remote_fetch_fid(info, &fid); 661 return hmdfs_client_readpage(info->conn, &fid, page); 662} 663 664static int hmdfs_read_folio(struct file *file, struct folio *folio) 665{ 666 struct page *page = &folio->page; 667 return hmdfs_readpage_remote(file, page); 668} 669 670uint32_t hmdfs_get_writecount(struct page *page) 671{ 672 uint32_t count = 0; 673 loff_t pos = (loff_t)page->index << HMDFS_PAGE_OFFSET; 674 struct inode *inode = page->mapping->host; 675 loff_t size = i_size_read(inode); 676 /* 677 * If page offset is greater than i_size, this is possible when 678 * writepage concurrent with truncate. In this case, we don't need to 679 * do remote writepage since it'll be truncated after the page is 680 * unlocked. 681 */ 682 if (pos >= size) 683 count = 0; 684 /* 685 * If the page about to write is beyond i_size, we can't write beyond 686 * i_size because remote file size will be wrong. 687 */ 688 else if (size < pos + HMDFS_PAGE_SIZE) 689 count = size - pos; 690 /* It's safe to write the whole page */ 691 else 692 count = HMDFS_PAGE_SIZE; 693 694 return count; 695} 696 697static bool allow_cur_thread_wpage(struct hmdfs_inode_info *info, 698 bool *rsem_held, bool sync_all) 699{ 700 WARN_ON(!rsem_held); 701 702 if (sync_all) { 703 *rsem_held = false; 704 return true; 705 } 706 *rsem_held = down_read_trylock(&info->wpage_sem); 707 return *rsem_held; 708} 709 710/** 711 * hmdfs_writepage_remote - writeback a dirty page to remote 712 * 713 * INFO: 714 * When asked to WB_SYNC_ALL, this function should leave with both the page and 715 * the radix tree node clean to achieve close-to-open consitency. Moreover, 716 * this shall never return -EIO to help filemap to iterate all dirty pages. 717 * 718 * INFO: 719 * When asked to WB_SYNC_NONE, this function should be mercy if faults(oom or 720 * bad pipe) happended to enable subsequent r/w & wb. 721 */ 722static int hmdfs_writepage_remote(struct page *page, 723 struct writeback_control *wbc) 724{ 725 struct inode *inode = page->mapping->host; 726 struct hmdfs_inode_info *info = hmdfs_i(inode); 727 struct hmdfs_sb_info *sbi = hmdfs_sb(inode->i_sb); 728 int ret = 0; 729 bool rsem_held = false; 730 bool sync = wbc->sync_mode == WB_SYNC_ALL; 731 struct hmdfs_writepage_context *param = NULL; 732 733 if (!allow_cur_thread_wpage(info, &rsem_held, sync)) 734 goto out_unlock; 735 736 set_page_writeback(page); 737 738 param = kzalloc(sizeof(*param), GFP_NOFS); 739 if (!param) { 740 ret = -ENOMEM; 741 goto out_endwb; 742 } 743 744 if (sync && hmdfs_usr_sig_pending(current)) { 745 ClearPageUptodate(page); 746 goto out_free; 747 } 748 param->count = hmdfs_get_writecount(page); 749 if (!param->count) 750 goto out_free; 751 param->rsem_held = rsem_held; 752 hmdfs_remote_fetch_fid(info, ¶m->fid); 753 param->sync_all = sync; 754 param->caller = current; 755 get_task_struct(current); 756 param->page = page; 757 param->timeout = jiffies + msecs_to_jiffies(sbi->wb_timeout_ms); 758 INIT_DELAYED_WORK(¶m->retry_dwork, hmdfs_remote_writepage_retry); 759 ret = hmdfs_remote_do_writepage(info->conn, param); 760 if (likely(!ret)) 761 return 0; 762 763 put_task_struct(current); 764out_free: 765 kfree(param); 766out_endwb: 767 end_page_writeback(page); 768 if (rsem_held) 769 up_read(&info->wpage_sem); 770out_unlock: 771 if (sync || !hmdfs_need_redirty_page(info, ret)) { 772 SetPageError(page); 773 mapping_set_error(page->mapping, ret); 774 } else { 775 redirty_page_for_writepage(wbc, page); 776 } 777 unlock_page(page); 778 return ret; 779} 780 781static void hmdfs_account_dirty_pages(struct address_space *mapping) 782{ 783 struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; 784 785 if (!sbi->h_wb->dirty_writeback_control) 786 return; 787 788 this_cpu_inc(*sbi->h_wb->bdp_ratelimits); 789} 790 791static int hmdfs_write_begin_remote(struct file *file, 792 struct address_space *mapping, loff_t pos, 793 unsigned int len, 794 struct page **pagep, void **fsdata) 795{ 796 pgoff_t index = ((unsigned long long)pos) >> PAGE_SHIFT; 797 struct inode *inode = file_inode(file); 798 struct page *page = NULL; 799 int ret = 0; 800 801start: 802 page = grab_cache_page_write_begin(mapping, index); 803 if (!page) 804 return -ENOMEM; 805 *pagep = page; 806 wait_on_page_writeback(page); 807 808 // If this page will be covered completely. 809 if (len == HMDFS_PAGE_SIZE || PageUptodate(page)) 810 return 0; 811 812 /* 813 * If data existed in this page will covered, 814 * we just need to clear this page. 815 */ 816 if (!((unsigned long long)pos & (HMDFS_PAGE_SIZE - 1)) && 817 (pos + len) >= i_size_read(inode)) { 818 zero_user_segment(page, len, HMDFS_PAGE_SIZE); 819 return 0; 820 } 821 /* 822 * We need readpage before write date to this page. 823 */ 824 ret = hmdfs_readpage_remote(file, page); 825 if (!ret) { 826 if (PageLocked(page)) { 827 ret = folio_lock_killable(page_folio(page)); 828 if (!ret) 829 unlock_page(page); 830 } 831 832 if (!ret && PageUptodate(page)) { 833 put_page(page); 834 goto start; 835 } 836 if (!ret) 837 ret = -EIO; 838 } 839 put_page(page); 840 return ret; 841} 842 843static int hmdfs_write_end_remote(struct file *file, 844 struct address_space *mapping, loff_t pos, 845 unsigned int len, unsigned int copied, 846 struct page *page, void *fsdata) 847{ 848 struct inode *inode = page->mapping->host; 849 850 if (!PageUptodate(page)) { 851 if (unlikely(copied != len)) 852 copied = 0; 853 else 854 SetPageUptodate(page); 855 } 856 if (!copied) 857 goto unlock_out; 858 859 if (!PageDirty(page)) { 860 hmdfs_account_dirty_pages(mapping); 861 set_page_dirty(page); 862 } 863 864 if (pos + copied > i_size_read(inode)) { 865 i_size_write(inode, pos + copied); 866 hmdfs_i(inode)->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; 867 } 868unlock_out: 869 unlock_page(page); 870 put_page(page); 871 872 /* hmdfs private writeback control */ 873 hmdfs_balance_dirty_pages_ratelimited(mapping); 874 return copied; 875} 876 877const struct address_space_operations hmdfs_dev_file_aops_remote = { 878 .read_folio = hmdfs_read_folio, 879 .write_begin = hmdfs_write_begin_remote, 880 .write_end = hmdfs_write_end_remote, 881 .writepage = hmdfs_writepage_remote, 882 .dirty_folio = filemap_dirty_folio, 883}; 884 885loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, 886 unsigned long offset) 887{ 888 loff_t pos; 889 890 pos = ((loff_t)dev_id << (POS_BIT_NUM - 1 - DEV_ID_BIT_NUM)) + 891 ((loff_t)group_id << OFFSET_BIT_NUM) + offset; 892 if (dev_id) 893 pos |= ((loff_t)1 << (POS_BIT_NUM - 1)); 894 return pos; 895} 896 897int analysis_dentry_file_from_con(struct hmdfs_sb_info *sbi, 898 struct file *file, 899 struct file *handler, 900 struct dir_context *ctx) 901{ 902 struct hmdfs_dentry_group *dentry_group = NULL; 903 loff_t pos = ctx->pos; 904 unsigned long dev_id = (unsigned long)((pos << 1) >> (POS_BIT_NUM - DEV_ID_BIT_NUM)); 905 unsigned long group_id = (unsigned long)((pos << (1 + DEV_ID_BIT_NUM)) >> 906 (POS_BIT_NUM - GROUP_ID_BIT_NUM)); 907 loff_t offset = pos & OFFSET_BIT_MASK; 908 int group_num = 0; 909 char *dentry_name = NULL; 910 int iterate_result = 0; 911 int i, j; 912 913 dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); 914 915 if (!dentry_group) 916 return -ENOMEM; 917 918 if (IS_ERR_OR_NULL(handler)) { 919 kfree(dentry_group); 920 return -ENOENT; 921 } 922 923 group_num = get_dentry_group_cnt(file_inode(handler)); 924 dentry_name = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); 925 if (!dentry_name) { 926 kfree(dentry_group); 927 return -ENOMEM; 928 } 929 930 for (i = group_id; i < group_num; i++) { 931 int ret = hmdfs_metainfo_read(sbi, handler, dentry_group, 932 sizeof(struct hmdfs_dentry_group), 933 i); 934 if (ret != sizeof(struct hmdfs_dentry_group)) { 935 hmdfs_err("read dentry group failed ret:%d", ret); 936 goto done; 937 } 938 939 for (j = offset; j < DENTRY_PER_GROUP; j++) { 940 int len; 941 int file_type = DT_UNKNOWN; 942 bool is_continue; 943 944 len = le16_to_cpu(dentry_group->nsl[j].namelen); 945 if (!test_bit_le(j, dentry_group->bitmap) || len == 0) 946 continue; 947 948 memset(dentry_name, 0, DENTRY_NAME_MAX_LEN); 949 // TODO: Support more file_type 950 if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) 951 file_type = DT_DIR; 952 else if (S_ISREG(le16_to_cpu( 953 dentry_group->nsl[j].i_mode))) 954 file_type = DT_REG; 955 else if (S_ISLNK(le16_to_cpu( 956 dentry_group->nsl[j].i_mode))) 957 file_type = DT_LNK; 958 959 strncat(dentry_name, dentry_group->filename[j], len); 960 pos = hmdfs_set_pos(dev_id, i, j); 961 is_continue = 962 dir_emit(ctx, dentry_name, len, 963 pos + INUNUMBER_START, file_type); 964 if (!is_continue) { 965 ctx->pos = pos; 966 iterate_result = 1; 967 goto done; 968 } 969 } 970 offset = 0; 971 } 972 973done: 974 kfree(dentry_name); 975 kfree(dentry_group); 976 return iterate_result; 977} 978 979int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, 980 struct dir_context *ctx) 981{ 982 int iterate_result = 0; 983 984 iterate_result = analysis_dentry_file_from_con( 985 con->sbi, file, file->private_data, ctx); 986 return iterate_result; 987} 988 989static int hmdfs_iterate_remote(struct file *file, struct dir_context *ctx) 990{ 991 int err = 0; 992 loff_t start_pos = ctx->pos; 993 struct hmdfs_peer *con = NULL; 994 struct hmdfs_dentry_info *di = hmdfs_d(file->f_path.dentry); 995 bool is_local = !((ctx->pos) >> (POS_BIT_NUM - 1)); 996 uint64_t dev_id = di->device_id; 997 998 if (ctx->pos == -1) 999 return 0; 1000 if (is_local) 1001 ctx->pos = hmdfs_set_pos(dev_id, 0, 0); 1002 1003 con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); 1004 if (con) { 1005 // ctx->pos = 0; 1006 err = hmdfs_dev_readdir_from_con(con, file, ctx); 1007 if (unlikely(!con)) { 1008 hmdfs_err("con is null"); 1009 goto done; 1010 } 1011 peer_put(con); 1012 if (err) 1013 goto done; 1014 } 1015 1016done: 1017 if (err <= 0) 1018 ctx->pos = -1; 1019 1020 trace_hmdfs_iterate_remote(file->f_path.dentry, start_pos, ctx->pos, 1021 err); 1022 return err; 1023} 1024 1025int hmdfs_dir_open_remote(struct inode *inode, struct file *file) 1026{ 1027 struct hmdfs_inode_info *info = hmdfs_i(inode); 1028 struct clearcache_item *cache_item = NULL; 1029 1030 if (info->conn) { 1031 if (!hmdfs_cache_revalidate(READ_ONCE(info->conn->conn_time), 1032 info->conn->device_id, 1033 file->f_path.dentry)) 1034 get_remote_dentry_file_sync(file->f_path.dentry, 1035 info->conn); 1036 cache_item = hmdfs_find_cache_item(info->conn->device_id, 1037 file->f_path.dentry); 1038 if (cache_item) { 1039 file->private_data = cache_item->filp; 1040 get_file(file->private_data); 1041 kref_put(&cache_item->ref, release_cache_item); 1042 return 0; 1043 } 1044 return -ENOENT; 1045 } 1046 return -ENOENT; 1047} 1048 1049static int hmdfs_dir_release_remote(struct inode *inode, struct file *file) 1050{ 1051 if (file->private_data) 1052 fput(file->private_data); 1053 file->private_data = NULL; 1054 return 0; 1055} 1056 1057const struct file_operations hmdfs_dev_dir_ops_remote = { 1058 .owner = THIS_MODULE, 1059 .iterate_shared = hmdfs_iterate_remote, 1060 .open = hmdfs_dir_open_remote, 1061 .release = hmdfs_dir_release_remote, 1062 .fsync = __generic_file_fsync, 1063}; 1064