// SPDX-License-Identifier: GPL-2.0 /* * fs/hmdfs/file_remote.c * * Copyright (c) 2020-2021 Huawei Device Co., Ltd. */ #include #include #include #include #include #include #include #include #include #include #include "file_remote.h" #include "comm/socket_adapter.h" #include "hmdfs.h" #include "hmdfs_client.h" #include "hmdfs_dentryfile.h" #include "hmdfs_trace.h" static inline bool hmdfs_remote_write_cache_expired( struct hmdfs_inode_info *info) { return time_after(jiffies, info->writecache_expire); } enum expire_reason { ALL_GOOD = 0, INO_DISMATCH = 1, SIZE_OR_CTIME_DISMATCH = 2, TIMER_EXPIRE = 3, TIMER_WORKING = 4, STABLE_CTIME_DISMATCH = 5, KEEP_CACHE = 6, }; /* * hmdfs_open_final_remote - Do final steps of opening a remote file, update * local inode cache and decide whether of not to truncate inode pages. * * @info: hmdfs inode info * @open_ret: values returned from remote when opening a remote file * @keep_cache: keep local cache & i_size */ static int hmdfs_open_final_remote(struct hmdfs_inode_info *info, struct hmdfs_open_ret *open_ret, struct file *file, bool keep_cache) { struct inode *inode = &info->vfs_inode; bool truncate = false; enum expire_reason reason = ALL_GOOD; int ret = 0; /* * if remote inode number changed and lookup stale data, we'll return * -ESTALE, and reopen the file with metedate from remote getattr. */ if (info->remote_ino != open_ret->ino) { hmdfs_debug( "got stale local inode, ino in local %llu, ino from open %llu", info->remote_ino, open_ret->ino); hmdfs_send_close(info->conn, &open_ret->fid); reason = INO_DISMATCH; ret = -ESTALE; goto out; } if (keep_cache) { reason = KEEP_CACHE; trace_hmdfs_open_final_remote(info, open_ret, file, reason); goto set_fid_out; } /* * if remote size do not match local inode, or remote ctime do not match * the last time same file was opened. */ if (inode->i_size != open_ret->file_size || hmdfs_time_compare(&info->remote_ctime, &open_ret->remote_ctime)) { truncate = true; reason = SIZE_OR_CTIME_DISMATCH; goto out; } /* * If 'writecache_expire' is set, check if it expires. And skip the * checking of stable_ctime. */ if (info->writecache_expire) { truncate = hmdfs_remote_write_cache_expired(info); if (truncate) reason = TIMER_EXPIRE; else reason = TIMER_WORKING; goto out; } /* the first time, or remote ctime is ahead of remote time */ if (info->stable_ctime.tv_sec == 0 && info->stable_ctime.tv_nsec == 0) { truncate = true; reason = STABLE_CTIME_DISMATCH; goto out; } /* * - if last stable_ctime == stable_ctime, we do nothing. * a. if ctime < stable_ctime, data is ensured to be uptodate, * b. if ctime == stable_ctime, stale data might be accessed. This is * acceptable since pagecache will be dropped later. * c. ctime > stable_ctime is impossible. * - if last stable_ctime < stable_ctime, we clear the cache. * d. ctime != last stable_ctime is impossible * e. ctime == last stable_ctime, this is possible to read again from * b, thus we need to drop the cache. * - if last stable_ctime > stable_ctime, we clear the cache. * stable_ctime must be zero in this case, this is possible because * system time might be changed. */ if (hmdfs_time_compare(&info->stable_ctime, &open_ret->stable_ctime)) { truncate = true; reason = STABLE_CTIME_DISMATCH; goto out; } out: trace_hmdfs_open_final_remote(info, open_ret, file, reason); if (ret) return ret; if (reason == SIZE_OR_CTIME_DISMATCH) { inode->__i_ctime = open_ret->remote_ctime; info->remote_ctime = open_ret->remote_ctime; } if (truncate) { info->writecache_expire = 0; truncate_inode_pages(inode->i_mapping, 0); } atomic64_set(&info->write_counter, 0); info->stable_ctime = open_ret->stable_ctime; i_size_write(inode, open_ret->file_size); info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; set_fid_out: spin_lock(&info->fid_lock); info->fid = open_ret->fid; spin_unlock(&info->fid_lock); return 0; } int hmdfs_do_open_remote(struct file *file, bool keep_cache) { struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); struct hmdfs_peer *conn = info->conn; struct hmdfs_open_ret open_ret; __u8 file_type = hmdfs_d(file->f_path.dentry)->file_type; char *send_buf; int err = 0; send_buf = hmdfs_get_dentry_relative_path(file->f_path.dentry); if (!send_buf) { err = -ENOMEM; goto out_free; } err = hmdfs_send_open(conn, send_buf, file_type, &open_ret); if (err) { hmdfs_err("hmdfs_send_open return failed with %d", err); goto out_free; } err = hmdfs_open_final_remote(info, &open_ret, file, keep_cache); out_free: kfree(send_buf); return err; } static inline bool hmdfs_remote_need_reopen(struct hmdfs_inode_info *info) { return test_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); } static inline bool hmdfs_remote_is_opening_file(struct hmdfs_inode_info *info) { return test_bit(HMDFS_FID_OPENING, &info->fid_flags); } static int hmdfs_remote_wait_opening_file(struct hmdfs_inode_info *info) { int err; if (!hmdfs_remote_is_opening_file(info)) return 0; err = ___wait_event(info->fid_wq, hmdfs_remote_is_opening_file(info), TASK_INTERRUPTIBLE, 0, 0, spin_unlock(&info->fid_lock); schedule(); spin_lock(&info->fid_lock)); if (err) err = -EINTR; return err; } static int hmdfs_remote_file_reopen(struct hmdfs_inode_info *info, struct file *filp) { int err = 0; struct hmdfs_peer *conn = info->conn; struct inode *inode = NULL; struct hmdfs_fid fid; if (conn->status == NODE_STAT_OFFLINE) return -EAGAIN; spin_lock(&info->fid_lock); err = hmdfs_remote_wait_opening_file(info); if (err || !hmdfs_remote_need_reopen(info)) { spin_unlock(&info->fid_lock); goto out; } set_bit(HMDFS_FID_OPENING, &info->fid_flags); fid = info->fid; spin_unlock(&info->fid_lock); inode = &info->vfs_inode; inode_lock(inode); /* * Most closing cases are meaningless, except for one: * read process A read process B * err = -EBADF err = -EBADF (caused by re-online) * set_need_reopen * do reopen * fid = new fid_1 [server hold fid_1] * set need_reopen * do reopen * send close (fid_1) // In case of leak * fid = new fid_2 */ if (fid.id != HMDFS_INODE_INVALID_FILE_ID) hmdfs_send_close(conn, &fid); err = hmdfs_do_open_remote(filp, true); inode_unlock(inode); spin_lock(&info->fid_lock); /* * May make the bit set in offline handler lost, but server * will tell us whether or not the newly-opened file id is * generated before offline, if it is opened before offline, * the operation on the file id will return -EBADF and * HMDFS_FID_NEED_OPEN bit will be set again. */ if (!err) clear_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); clear_bit(HMDFS_FID_OPENING, &info->fid_flags); spin_unlock(&info->fid_lock); wake_up_interruptible_all(&info->fid_wq); out: return err; } static int hmdfs_remote_check_and_reopen(struct hmdfs_inode_info *info, struct file *filp) { if (!hmdfs_remote_need_reopen(info)) return 0; return hmdfs_remote_file_reopen(info, filp); } void hmdfs_do_close_remote(struct kref *kref) { struct hmdfs_inode_info *info = container_of(kref, struct hmdfs_inode_info, ref); struct hmdfs_fid fid; hmdfs_remote_fetch_fid(info, &fid); /* This function can return asynchronously */ hmdfs_send_close(info->conn, &fid); } static inline bool hmdfs_remote_need_track_file(const struct hmdfs_sb_info *sbi, fmode_t mode) { return (hmdfs_is_stash_enabled(sbi) && (mode & FMODE_WRITE)); } static void hmdfs_remote_del_wr_opened_inode_nolock(struct hmdfs_inode_info *info) { WARN_ON(list_empty(&info->wr_opened_node)); if (atomic_dec_and_test(&info->wr_opened_cnt)) list_del_init(&info->wr_opened_node); } void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, struct hmdfs_inode_info *info) { spin_lock(&conn->wr_opened_inode_lock); hmdfs_remote_del_wr_opened_inode_nolock(info); spin_unlock(&conn->wr_opened_inode_lock); } void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, struct hmdfs_inode_info *info) { if (list_empty(&info->wr_opened_node)) { atomic_set(&info->wr_opened_cnt, 1); list_add_tail(&info->wr_opened_node, &conn->wr_opened_inode_list); } else { atomic_inc(&info->wr_opened_cnt); } } static void hmdfs_remote_add_wr_opened_inode(struct hmdfs_peer *conn, struct hmdfs_inode_info *info) { spin_lock(&conn->wr_opened_inode_lock); hmdfs_remote_add_wr_opened_inode_nolock(conn, info); spin_unlock(&conn->wr_opened_inode_lock); } int hmdfs_file_open_remote(struct inode *inode, struct file *file) { struct hmdfs_inode_info *info = hmdfs_i(inode); struct kref *ref = &(info->ref); int err = 0; inode_lock(inode); if (kref_read(ref) == 0) { err = hmdfs_do_open_remote(file, false); if (err == 0) kref_init(ref); } else { kref_get(ref); } inode_unlock(inode); if (!err && hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), file->f_mode)) hmdfs_remote_add_wr_opened_inode(info->conn, info); return err; } static void hmdfs_set_writecache_expire(struct hmdfs_inode_info *info, unsigned int seconds) { unsigned long new_expire = jiffies + (unsigned long)seconds * HZ; /* * When file has been written before closing, set pagecache expire * if it has not been set yet. This is necessary because ctime might * stay the same after overwrite. */ if (info->writecache_expire && time_after(new_expire, info->writecache_expire)) return; info->writecache_expire = new_expire; } static void hmdfs_remote_keep_writecache(struct inode *inode, struct file *file) { struct hmdfs_inode_info *info = NULL; struct kref *ref = NULL; struct hmdfs_getattr_ret *getattr_ret = NULL; unsigned int write_cache_timeout = hmdfs_sb(inode->i_sb)->write_cache_timeout; int err; if (!write_cache_timeout) return; info = hmdfs_i(inode); ref = &(info->ref); /* * don't do anything if file is still opening or file hasn't been * written. */ if (kref_read(ref) > 0 || !atomic64_read(&info->write_counter)) return; /* * If remote getattr failed, and we don't update ctime, * pagecache will be truncated the next time file is opened. */ err = hmdfs_remote_getattr(info->conn, file_dentry(file), 0, &getattr_ret); if (err) { hmdfs_err("remote getattr failed with err %d", err); return; } if (!(getattr_ret->stat.result_mask & STATX_CTIME)) { hmdfs_err("get remote ctime failed with mask 0x%x", getattr_ret->stat.result_mask); kfree(getattr_ret); return; } /* * update ctime from remote, in case that pagecahe will be * truncated in next open. */ inode->__i_ctime = getattr_ret->stat.ctime; info->remote_ctime = getattr_ret->stat.ctime; hmdfs_set_writecache_expire(info, write_cache_timeout); kfree(getattr_ret); } int hmdfs_file_release_remote(struct inode *inode, struct file *file) { struct hmdfs_inode_info *info = hmdfs_i(inode); if (hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), file->f_mode)) hmdfs_remote_del_wr_opened_inode(info->conn, info); inode_lock(inode); kref_put(&info->ref, hmdfs_do_close_remote); hmdfs_remote_keep_writecache(inode, file); inode_unlock(inode); return 0; } static int hmdfs_file_flush(struct file *file, fl_owner_t id) { int err = 0; struct inode *inode = file_inode(file); if (!(file->f_mode & FMODE_WRITE)) return 0; /* * Continue regardless of whether file reopen fails or not, * because there may be no dirty page. */ hmdfs_remote_check_and_reopen(hmdfs_i(inode), file); /* * Wait for wsem here would impact the performance greatly, so we * overlap the time to issue as many wbs as we can, expecting async * wbs are eliminated afterwards. */ filemap_fdatawrite(inode->i_mapping); down_write(&hmdfs_i(inode)->wpage_sem); err = filemap_write_and_wait(inode->i_mapping); up_write(&hmdfs_i(inode)->wpage_sem); return err; } static ssize_t hmdfs_file_read_iter_remote(struct kiocb *iocb, struct iov_iter *iter) { struct file *filp = iocb->ki_filp; struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); struct file_ra_state *ra = NULL; unsigned int rtt; int err; bool tried = false; retry: err = hmdfs_remote_check_and_reopen(info, filp); if (err) return err; ra = &filp->f_ra; /* rtt is measured in 10 msecs */ rtt = hmdfs_tcpi_rtt(info->conn) / 10000; switch (rtt) { case 0: break; case 1: ra->ra_pages = 256; break; case 2: ra->ra_pages = 512; break; default: ra->ra_pages = 1024; break; } err = generic_file_read_iter(iocb, iter); if (err < 0 && !tried && hmdfs_remote_need_reopen(info)) { /* Read from a stale fid, try read again once. */ tried = true; goto retry; } return err; } static inline bool hmdfs_is_file_unwritable(const struct hmdfs_inode_info *info, bool check_stash) { return (check_stash && hmdfs_inode_is_stashing(info)) || !hmdfs_is_node_online(info->conn); } static ssize_t __hmdfs_file_write_iter_remote(struct kiocb *iocb, struct iov_iter *iter, bool check_stash) { struct file *filp = iocb->ki_filp; struct inode *inode = file_inode(filp); struct hmdfs_inode_info *info = hmdfs_i(inode); ssize_t ret; if (hmdfs_is_file_unwritable(info, check_stash)) return -EAGAIN; ret = hmdfs_remote_check_and_reopen(info, filp); if (ret) return ret; inode_lock(inode); if (hmdfs_is_file_unwritable(info, check_stash)) { ret = -EAGAIN; goto out; } ret = generic_write_checks(iocb, iter); if (ret > 0) ret = __generic_file_write_iter(iocb, iter); out: inode_unlock(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; } ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, struct iov_iter *iter) { return __hmdfs_file_write_iter_remote(iocb, iter, false); } static ssize_t hmdfs_file_write_iter_remote(struct kiocb *iocb, struct iov_iter *iter) { return __hmdfs_file_write_iter_remote(iocb, iter, true); } /* hmdfs not support mmap write remote file */ static vm_fault_t hmdfs_page_mkwrite(struct vm_fault *vmf) { return VM_FAULT_SIGBUS; } static const struct vm_operations_struct hmdfs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = hmdfs_page_mkwrite, }; static int hmdfs_file_mmap_remote(struct file *file, struct vm_area_struct *vma) { vma->vm_ops = &hmdfs_file_vm_ops; file_accessed(file); return 0; } static int hmdfs_file_fsync_remote(struct file *file, loff_t start, loff_t end, int datasync) { struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); struct hmdfs_peer *conn = info->conn; struct hmdfs_fid fid; int err; trace_hmdfs_fsync_enter_remote(conn->sbi, conn->device_id, info->remote_ino, datasync); /* * Continue regardless of whether file reopen fails or not, * because there may be no dirty page. */ hmdfs_remote_check_and_reopen(info, file); filemap_fdatawrite(file->f_mapping); down_write(&info->wpage_sem); err = file_write_and_wait_range(file, start, end); up_write(&info->wpage_sem); if (err) { hmdfs_err("local fsync fail with %d", err); goto out; } hmdfs_remote_fetch_fid(info, &fid); err = hmdfs_send_fsync(conn, &fid, start, end, datasync); if (err) hmdfs_err("send fsync fail with %d", err); out: trace_hmdfs_fsync_exit_remote(conn->sbi, conn->device_id, info->remote_ino, get_cmd_timeout(conn->sbi, F_FSYNC), err); /* Compatible with POSIX retcode */ if (err == -ETIME) err = -EIO; return err; } const struct file_operations hmdfs_dev_file_fops_remote = { .owner = THIS_MODULE, .llseek = generic_file_llseek, .read_iter = hmdfs_file_read_iter_remote, .write_iter = hmdfs_file_write_iter_remote, .mmap = hmdfs_file_mmap_remote, .open = hmdfs_file_open_remote, .release = hmdfs_file_release_remote, .flush = hmdfs_file_flush, .fsync = hmdfs_file_fsync_remote, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, }; static void hmdfs_fill_page_zero(struct page *page) { void *addr = NULL; addr = kmap(page); memset(addr, 0, PAGE_SIZE); kunmap(page); SetPageUptodate(page); unlock_page(page); } static int hmdfs_readpage_remote(struct file *file, struct page *page) { struct inode *inode = file_inode(file); struct hmdfs_inode_info *info = hmdfs_i(inode); loff_t isize = i_size_read(inode); pgoff_t end_index = (isize - 1) >> PAGE_SHIFT; struct hmdfs_fid fid; if (!isize || page->index > end_index) { hmdfs_fill_page_zero(page); return 0; } if (!isize || page->index > end_index) { hmdfs_fill_page_zero(page); return 0; } hmdfs_remote_fetch_fid(info, &fid); return hmdfs_client_readpage(info->conn, &fid, page); } static int hmdfs_read_folio(struct file *file, struct folio *folio) { struct page *page = &folio->page; return hmdfs_readpage_remote(file, page); } uint32_t hmdfs_get_writecount(struct page *page) { uint32_t count = 0; loff_t pos = (loff_t)page->index << HMDFS_PAGE_OFFSET; struct inode *inode = page->mapping->host; loff_t size = i_size_read(inode); /* * If page offset is greater than i_size, this is possible when * writepage concurrent with truncate. In this case, we don't need to * do remote writepage since it'll be truncated after the page is * unlocked. */ if (pos >= size) count = 0; /* * If the page about to write is beyond i_size, we can't write beyond * i_size because remote file size will be wrong. */ else if (size < pos + HMDFS_PAGE_SIZE) count = size - pos; /* It's safe to write the whole page */ else count = HMDFS_PAGE_SIZE; return count; } static bool allow_cur_thread_wpage(struct hmdfs_inode_info *info, bool *rsem_held, bool sync_all) { WARN_ON(!rsem_held); if (sync_all) { *rsem_held = false; return true; } *rsem_held = down_read_trylock(&info->wpage_sem); return *rsem_held; } /** * hmdfs_writepage_remote - writeback a dirty page to remote * * INFO: * When asked to WB_SYNC_ALL, this function should leave with both the page and * the radix tree node clean to achieve close-to-open consitency. Moreover, * this shall never return -EIO to help filemap to iterate all dirty pages. * * INFO: * When asked to WB_SYNC_NONE, this function should be mercy if faults(oom or * bad pipe) happended to enable subsequent r/w & wb. */ static int hmdfs_writepage_remote(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; struct hmdfs_inode_info *info = hmdfs_i(inode); struct hmdfs_sb_info *sbi = hmdfs_sb(inode->i_sb); int ret = 0; bool rsem_held = false; bool sync = wbc->sync_mode == WB_SYNC_ALL; struct hmdfs_writepage_context *param = NULL; if (!allow_cur_thread_wpage(info, &rsem_held, sync)) goto out_unlock; set_page_writeback(page); param = kzalloc(sizeof(*param), GFP_NOFS); if (!param) { ret = -ENOMEM; goto out_endwb; } if (sync && hmdfs_usr_sig_pending(current)) { ClearPageUptodate(page); goto out_free; } param->count = hmdfs_get_writecount(page); if (!param->count) goto out_free; param->rsem_held = rsem_held; hmdfs_remote_fetch_fid(info, ¶m->fid); param->sync_all = sync; param->caller = current; get_task_struct(current); param->page = page; param->timeout = jiffies + msecs_to_jiffies(sbi->wb_timeout_ms); INIT_DELAYED_WORK(¶m->retry_dwork, hmdfs_remote_writepage_retry); ret = hmdfs_remote_do_writepage(info->conn, param); if (likely(!ret)) return 0; put_task_struct(current); out_free: kfree(param); out_endwb: end_page_writeback(page); if (rsem_held) up_read(&info->wpage_sem); out_unlock: if (sync || !hmdfs_need_redirty_page(info, ret)) { SetPageError(page); mapping_set_error(page->mapping, ret); } else { redirty_page_for_writepage(wbc, page); } unlock_page(page); return ret; } static void hmdfs_account_dirty_pages(struct address_space *mapping) { struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; if (!sbi->h_wb->dirty_writeback_control) return; this_cpu_inc(*sbi->h_wb->bdp_ratelimits); } static int hmdfs_write_begin_remote(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, struct page **pagep, void **fsdata) { pgoff_t index = ((unsigned long long)pos) >> PAGE_SHIFT; struct inode *inode = file_inode(file); struct page *page = NULL; int ret = 0; start: page = grab_cache_page_write_begin(mapping, index); if (!page) return -ENOMEM; *pagep = page; wait_on_page_writeback(page); // If this page will be covered completely. if (len == HMDFS_PAGE_SIZE || PageUptodate(page)) return 0; /* * If data existed in this page will covered, * we just need to clear this page. */ if (!((unsigned long long)pos & (HMDFS_PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) { zero_user_segment(page, len, HMDFS_PAGE_SIZE); return 0; } /* * We need readpage before write date to this page. */ ret = hmdfs_readpage_remote(file, page); if (!ret) { if (PageLocked(page)) { ret = folio_lock_killable(page_folio(page)); if (!ret) unlock_page(page); } if (!ret && PageUptodate(page)) { put_page(page); goto start; } if (!ret) ret = -EIO; } put_page(page); return ret; } static int hmdfs_write_end_remote(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int copied, struct page *page, void *fsdata) { struct inode *inode = page->mapping->host; if (!PageUptodate(page)) { if (unlikely(copied != len)) copied = 0; else SetPageUptodate(page); } if (!copied) goto unlock_out; if (!PageDirty(page)) { hmdfs_account_dirty_pages(mapping); set_page_dirty(page); } if (pos + copied > i_size_read(inode)) { i_size_write(inode, pos + copied); hmdfs_i(inode)->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; } unlock_out: unlock_page(page); put_page(page); /* hmdfs private writeback control */ hmdfs_balance_dirty_pages_ratelimited(mapping); return copied; } const struct address_space_operations hmdfs_dev_file_aops_remote = { .read_folio = hmdfs_read_folio, .write_begin = hmdfs_write_begin_remote, .write_end = hmdfs_write_end_remote, .writepage = hmdfs_writepage_remote, .dirty_folio = filemap_dirty_folio, }; loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, unsigned long offset) { loff_t pos; pos = ((loff_t)dev_id << (POS_BIT_NUM - 1 - DEV_ID_BIT_NUM)) + ((loff_t)group_id << OFFSET_BIT_NUM) + offset; if (dev_id) pos |= ((loff_t)1 << (POS_BIT_NUM - 1)); return pos; } int analysis_dentry_file_from_con(struct hmdfs_sb_info *sbi, struct file *file, struct file *handler, struct dir_context *ctx) { struct hmdfs_dentry_group *dentry_group = NULL; loff_t pos = ctx->pos; unsigned long dev_id = (unsigned long)((pos << 1) >> (POS_BIT_NUM - DEV_ID_BIT_NUM)); unsigned long group_id = (unsigned long)((pos << (1 + DEV_ID_BIT_NUM)) >> (POS_BIT_NUM - GROUP_ID_BIT_NUM)); loff_t offset = pos & OFFSET_BIT_MASK; int group_num = 0; char *dentry_name = NULL; int iterate_result = 0; int i, j; dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); if (!dentry_group) return -ENOMEM; if (IS_ERR_OR_NULL(handler)) { kfree(dentry_group); return -ENOENT; } group_num = get_dentry_group_cnt(file_inode(handler)); dentry_name = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); if (!dentry_name) { kfree(dentry_group); return -ENOMEM; } for (i = group_id; i < group_num; i++) { int ret = hmdfs_metainfo_read(sbi, handler, dentry_group, sizeof(struct hmdfs_dentry_group), i); if (ret != sizeof(struct hmdfs_dentry_group)) { hmdfs_err("read dentry group failed ret:%d", ret); goto done; } for (j = offset; j < DENTRY_PER_GROUP; j++) { int len; int file_type = DT_UNKNOWN; bool is_continue; len = le16_to_cpu(dentry_group->nsl[j].namelen); if (!test_bit_le(j, dentry_group->bitmap) || len == 0) continue; memset(dentry_name, 0, DENTRY_NAME_MAX_LEN); // TODO: Support more file_type if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) file_type = DT_DIR; else if (S_ISREG(le16_to_cpu( dentry_group->nsl[j].i_mode))) file_type = DT_REG; else if (S_ISLNK(le16_to_cpu( dentry_group->nsl[j].i_mode))) file_type = DT_LNK; strncat(dentry_name, dentry_group->filename[j], len); pos = hmdfs_set_pos(dev_id, i, j); is_continue = dir_emit(ctx, dentry_name, len, pos + INUNUMBER_START, file_type); if (!is_continue) { ctx->pos = pos; iterate_result = 1; goto done; } } offset = 0; } done: kfree(dentry_name); kfree(dentry_group); return iterate_result; } int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, struct dir_context *ctx) { int iterate_result = 0; iterate_result = analysis_dentry_file_from_con( con->sbi, file, file->private_data, ctx); return iterate_result; } static int hmdfs_iterate_remote(struct file *file, struct dir_context *ctx) { int err = 0; loff_t start_pos = ctx->pos; struct hmdfs_peer *con = NULL; struct hmdfs_dentry_info *di = hmdfs_d(file->f_path.dentry); bool is_local = !((ctx->pos) >> (POS_BIT_NUM - 1)); uint64_t dev_id = di->device_id; if (ctx->pos == -1) return 0; if (is_local) ctx->pos = hmdfs_set_pos(dev_id, 0, 0); con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); if (con) { // ctx->pos = 0; err = hmdfs_dev_readdir_from_con(con, file, ctx); if (unlikely(!con)) { hmdfs_err("con is null"); goto done; } peer_put(con); if (err) goto done; } done: if (err <= 0) ctx->pos = -1; trace_hmdfs_iterate_remote(file->f_path.dentry, start_pos, ctx->pos, err); return err; } int hmdfs_dir_open_remote(struct inode *inode, struct file *file) { struct hmdfs_inode_info *info = hmdfs_i(inode); struct clearcache_item *cache_item = NULL; if (info->conn) { if (!hmdfs_cache_revalidate(READ_ONCE(info->conn->conn_time), info->conn->device_id, file->f_path.dentry)) get_remote_dentry_file_sync(file->f_path.dentry, info->conn); cache_item = hmdfs_find_cache_item(info->conn->device_id, file->f_path.dentry); if (cache_item) { file->private_data = cache_item->filp; get_file(file->private_data); kref_put(&cache_item->ref, release_cache_item); return 0; } return -ENOENT; } return -ENOENT; } static int hmdfs_dir_release_remote(struct inode *inode, struct file *file) { if (file->private_data) fput(file->private_data); file->private_data = NULL; return 0; } const struct file_operations hmdfs_dev_dir_ops_remote = { .owner = THIS_MODULE, .iterate_shared = hmdfs_iterate_remote, .open = hmdfs_dir_open_remote, .release = hmdfs_dir_release_remote, .fsync = __generic_file_fsync, };