1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/hmdfs/stash.c
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8 #include <linux/kernel.h>
9 #include <linux/fs.h>
10 #include <linux/file.h>
11 #include <linux/dcache.h>
12 #include <linux/namei.h>
13 #include <linux/mount.h>
14 #include <linux/slab.h>
15 #include <linux/list.h>
16 #include <linux/pagemap.h>
17 #include <linux/sched/mm.h>
18 #include <linux/sched/task.h>
19 #include <linux/errseq.h>
20 #include <linux/crc32.h>
21
22 #include "stash.h"
23 #include "comm/node_cb.h"
24 #include "comm/protocol.h"
25 #include "comm/connection.h"
26 #include "file_remote.h"
27 #include "hmdfs_dentryfile.h"
28 #include "authority/authentication.h"
29
30 /* Head magic used to identify a stash file */
31 #define HMDFS_STASH_FILE_HEAD_MAGIC 0xF7AB06C3
32 /* Head and path in stash file are aligned with HMDFS_STASH_BLK_SIZE */
33 #define HMDFS_STASH_BLK_SIZE 4096
34 #define HMDFS_STASH_BLK_SHIFT 12
35 #define HMDFS_STASH_PAGE_TO_SECTOR_SHIFT 3
36 #define HMDFS_STASH_DIR_NAME "stash"
37 #define HMDFS_STASH_FMT_DIR_NAME "v1"
38 #define HMDFS_STASH_WORK_DIR_NAME \
39 (HMDFS_STASH_DIR_NAME "/" HMDFS_STASH_FMT_DIR_NAME)
40
41 #define HMDFS_STASH_FILE_NAME_LEN 20
42
43 #define HMDFS_STASH_FLUSH_CNT 2
44
45 #define HMDFS_STASH_PATH_LEN (HMDFS_CID_SIZE + HMDFS_STASH_FILE_NAME_LEN + 1)
46
47 struct hmdfs_cache_file_head {
48 __le32 magic;
49 __le32 crc_offset;
50 __le64 ino;
51 __le64 size;
52 __le64 blocks;
53 __le64 last_write_pos;
54 __le64 ctime;
55 __le32 ctime_nsec;
56 __le32 change_detect_cap;
57 __le64 ichange_count;
58 __le32 path_offs;
59 __le32 path_len;
60 __le32 path_cnt;
61 __le32 data_offs;
62 /* Attention: expand new fields in here to compatible with old ver */
63 __le32 crc32;
64 } __packed;
65
66 struct hmdfs_stash_work {
67 struct hmdfs_peer *conn;
68 struct list_head *list;
69 struct work_struct work;
70 struct completion done;
71 };
72
73 struct hmdfs_inode_tbl {
74 unsigned int cnt;
75 unsigned int max;
76 uint64_t inodes[0];
77 };
78
79 struct hmdfs_stash_dir_context {
80 struct dir_context dctx;
81 char name[NAME_MAX + 1];
82 struct hmdfs_inode_tbl *tbl;
83 };
84
85 struct hmdfs_restore_stats {
86 unsigned int succeed;
87 unsigned int fail;
88 unsigned int keep;
89 unsigned long long ok_pages;
90 unsigned long long fail_pages;
91 };
92
93 struct hmdfs_stash_stats {
94 unsigned int succeed;
95 unsigned int donothing;
96 unsigned int fail;
97 unsigned long long ok_pages;
98 unsigned long long fail_pages;
99 };
100
101 struct hmdfs_file_restore_ctx {
102 struct hmdfs_peer *conn;
103 struct path src_dir_path;
104 struct path dst_root_path;
105 char *dst;
106 char *page;
107 struct file *src_filp;
108 uint64_t inum;
109 uint64_t pages;
110 unsigned int seq;
111 unsigned int data_offs;
112 /* output */
113 bool keep;
114 };
115
116 struct hmdfs_copy_args {
117 struct file *src;
118 struct file *dst;
119 void *buf;
120 size_t buf_len;
121 unsigned int seq;
122 unsigned int data_offs;
123 uint64_t inum;
124 };
125
126 struct hmdfs_copy_ctx {
127 struct hmdfs_copy_args args;
128 loff_t src_pos;
129 loff_t dst_pos;
130 /* output */
131 size_t copied;
132 bool eof;
133 };
134
135 struct hmdfs_rebuild_stats {
136 unsigned int succeed;
137 unsigned int total;
138 unsigned int fail;
139 unsigned int invalid;
140 };
141
142 struct hmdfs_check_work {
143 struct hmdfs_peer *conn;
144 struct work_struct work;
145 struct completion done;
146 };
147
148 typedef int (*stash_operation_func)(struct hmdfs_peer *,
149 unsigned int,
150 struct path *,
151 const struct hmdfs_inode_tbl *,
152 void *);
153
hmdfs_do_vfs_mkdir(struct dentry *parent, const char *name, int namelen, umode_t mode)154 static struct dentry *hmdfs_do_vfs_mkdir(struct dentry *parent,
155 const char *name, int namelen,
156 umode_t mode)
157 {
158 struct inode *dir = d_inode(parent);
159 struct dentry *child = NULL;
160 int err;
161
162 inode_lock_nested(dir, I_MUTEX_PARENT);
163
164 child = lookup_one_len(name, parent, namelen);
165 if (IS_ERR(child))
166 goto out;
167
168 if (d_is_positive(child)) {
169 if (d_can_lookup(child))
170 goto out;
171
172 dput(child);
173 child = ERR_PTR(-EINVAL);
174 goto out;
175 }
176
177 err = vfs_mkdir(&nop_mnt_idmap, dir, child, mode);
178 if (err) {
179 dput(child);
180 child = ERR_PTR(err);
181 goto out;
182 }
183
184 out:
185 inode_unlock(dir);
186 return child;
187 }
188
hmdfs_stash_new_work_dir(struct dentry *parent)189 struct dentry *hmdfs_stash_new_work_dir(struct dentry *parent)
190 {
191 struct dentry *base = NULL;
192 struct dentry *work = NULL;
193
194 base = hmdfs_do_vfs_mkdir(parent, HMDFS_STASH_DIR_NAME,
195 strlen(HMDFS_STASH_DIR_NAME), 0700);
196 if (IS_ERR(base))
197 return base;
198
199 work = hmdfs_do_vfs_mkdir(base, HMDFS_STASH_FMT_DIR_NAME,
200 strlen(HMDFS_STASH_FMT_DIR_NAME), 0700);
201 dput(base);
202
203 return work;
204 }
205
hmdfs_new_stash_file(struct path *d_path, const char *cid)206 static struct file *hmdfs_new_stash_file(struct path *d_path, const char *cid)
207 {
208 struct dentry *parent = NULL;
209 struct file *filp = NULL;
210 struct path stash;
211 int err;
212
213 parent = hmdfs_do_vfs_mkdir(d_path->dentry, cid, strlen(cid), 0700);
214 if (IS_ERR(parent)) {
215 err = PTR_ERR(parent);
216 hmdfs_err("mkdir error %d", err);
217 goto mkdir_err;
218 }
219
220 stash.mnt = d_path->mnt;
221 stash.dentry = parent;
222 filp = kernel_tmpfile_open(&nop_mnt_idmap, &stash, S_IFREG | 0600,
223 O_LARGEFILE | O_WRONLY, current_cred());
224 if (IS_ERR(filp)) {
225 err = PTR_ERR(filp);
226 hmdfs_err("open stash file error %d", err);
227 goto open_err;
228 }
229
230 dput(parent);
231
232 return filp;
233
234 open_err:
235 dput(parent);
236 mkdir_err:
237 return ERR_PTR(err);
238 }
239
hmdfs_is_dir(struct dentry *child)240 static inline bool hmdfs_is_dir(struct dentry *child)
241 {
242 return d_is_positive(child) && d_can_lookup(child);
243 }
244
hmdfs_is_reg(struct dentry *child)245 static inline bool hmdfs_is_reg(struct dentry *child)
246 {
247 return d_is_positive(child) && d_is_reg(child);
248 }
249
hmdfs_set_stash_file_head(const struct hmdfs_cache_info *cache, uint64_t ino, struct hmdfs_cache_file_head *head)250 static void hmdfs_set_stash_file_head(const struct hmdfs_cache_info *cache,
251 uint64_t ino,
252 struct hmdfs_cache_file_head *head)
253 {
254 long long blocks;
255 unsigned int crc_offset;
256
257 memset(head, 0, sizeof(*head));
258 head->magic = cpu_to_le32(HMDFS_STASH_FILE_HEAD_MAGIC);
259 head->ino = cpu_to_le64(ino);
260 head->size = cpu_to_le64(i_size_read(file_inode(cache->cache_file)));
261 blocks = atomic64_read(&cache->written_pgs) <<
262 HMDFS_STASH_PAGE_TO_SECTOR_SHIFT;
263 head->blocks = cpu_to_le64(blocks);
264 head->path_offs = cpu_to_le32(cache->path_offs);
265 head->path_len = cpu_to_le32(cache->path_len);
266 head->path_cnt = cpu_to_le32(cache->path_cnt);
267 head->data_offs = cpu_to_le32(cache->data_offs);
268 crc_offset = offsetof(struct hmdfs_cache_file_head, crc32);
269 head->crc_offset = cpu_to_le32(crc_offset);
270 head->crc32 = cpu_to_le32(crc32(0, head, crc_offset));
271 }
272
hmdfs_flush_stash_file_metadata(struct hmdfs_inode_info *info)273 static int hmdfs_flush_stash_file_metadata(struct hmdfs_inode_info *info)
274 {
275 struct hmdfs_cache_info *cache = NULL;
276 struct hmdfs_peer *conn = info->conn;
277 struct hmdfs_cache_file_head cache_head;
278 size_t written;
279 loff_t pos;
280 unsigned int head_size;
281
282 /* No metadata if no cache file info */
283 cache = info->cache;
284 if (!cache)
285 return -EINVAL;
286
287 if (strlen(cache->path) == 0) {
288 long long to_write_pgs = atomic64_read(&cache->to_write_pgs);
289
290 /* Nothing to stash. No need to flush meta data. */
291 if (to_write_pgs == 0)
292 return 0;
293
294 hmdfs_err("peer 0x%x:0x%llx inode 0x%llx lost %lld pages due to no path",
295 conn->owner, conn->device_id,
296 info->remote_ino, to_write_pgs);
297 return -EINVAL;
298 }
299
300 hmdfs_set_stash_file_head(cache, info->remote_ino, &cache_head);
301
302 /* Write head */
303 pos = 0;
304 head_size = sizeof(cache_head);
305 written = kernel_write(cache->cache_file, &cache_head, head_size, &pos);
306 if (written != head_size) {
307 hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write head len %u err %zd",
308 conn->owner, conn->device_id, info->remote_ino,
309 head_size, written);
310 return -EIO;
311 }
312 /* Write path */
313 pos = (loff_t)cache->path_offs << HMDFS_STASH_BLK_SHIFT;
314 written = kernel_write(cache->cache_file, cache->path, cache->path_len,
315 &pos);
316 if (written != cache->path_len) {
317 hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write path len %u err %zd",
318 conn->owner, conn->device_id, info->remote_ino,
319 cache->path_len, written);
320 return -EIO;
321 }
322
323 return 0;
324 }
325
326 /* Mainly from inode_wait_for_writeback() */
hmdfs_wait_remote_writeback_once(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)327 static void hmdfs_wait_remote_writeback_once(struct hmdfs_peer *conn,
328 struct hmdfs_inode_info *info)
329 {
330 struct inode *inode = &info->vfs_inode;
331 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
332 wait_queue_head_t *wq_head = NULL;
333 bool in_sync = false;
334
335 spin_lock(&inode->i_lock);
336 in_sync = inode->i_state & I_SYNC;
337 spin_unlock(&inode->i_lock);
338
339 if (!in_sync)
340 return;
341
342 hmdfs_info("peer 0x%x:0x%llx ino 0x%llx wait for wb once",
343 conn->owner, conn->device_id, info->remote_ino);
344
345 wq_head = bit_waitqueue(&inode->i_state, __I_SYNC);
346 __wait_on_bit(wq_head, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
347 }
348
hmdfs_reset_remote_write_err(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)349 static void hmdfs_reset_remote_write_err(struct hmdfs_peer *conn,
350 struct hmdfs_inode_info *info)
351 {
352 struct address_space *mapping = info->vfs_inode.i_mapping;
353 int flags_err;
354 errseq_t old;
355 int wb_err;
356
357 flags_err = filemap_check_errors(mapping);
358
359 old = errseq_sample(&mapping->wb_err);
360 wb_err = errseq_check_and_advance(&mapping->wb_err, &old);
361 if (flags_err || wb_err)
362 hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx wb error %d %d before stash",
363 conn->owner, conn->device_id, info->remote_ino,
364 flags_err, wb_err);
365 }
366
hmdfs_is_mapping_clean(struct address_space *mapping)367 static bool hmdfs_is_mapping_clean(struct address_space *mapping)
368 {
369 bool clean = false;
370
371 /* b93b016313b3b ("page cache: use xa_lock") introduces i_pages */
372 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
373 xa_lock_irq(&mapping->i_pages);
374 #else
375 spin_lock_irq(&mapping->tree_lock);
376 #endif
377 clean = !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
378 !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
379 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
380 xa_unlock_irq(&mapping->i_pages);
381 #else
382 spin_unlock_irq(&mapping->tree_lock);
383 #endif
384 return clean;
385 }
386
hmdfs_flush_stash_file_data(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)387 static int hmdfs_flush_stash_file_data(struct hmdfs_peer *conn,
388 struct hmdfs_inode_info *info)
389 {
390 struct inode *inode = &info->vfs_inode;
391 struct address_space *mapping = inode->i_mapping;
392 bool all_clean = true;
393 int err = 0;
394 int i;
395
396 /* Wait for the completion of write syscall */
397 inode_lock(inode);
398 inode_unlock(inode);
399
400 all_clean = hmdfs_is_mapping_clean(mapping);
401 if (all_clean) {
402 hmdfs_reset_remote_write_err(conn, info);
403 return 0;
404 }
405
406 /*
407 * No-sync_all writeback during offline may have not seen
408 * the setting of stash_status as HMDFS_REMOTE_INODE_STASHING
409 * and will call mapping_set_error() after we just reset
410 * the previous error. So waiting for these writeback once,
411 * and the following writeback will do local write.
412 */
413 hmdfs_wait_remote_writeback_once(conn, info);
414
415 /* Need to clear previous error ? */
416 hmdfs_reset_remote_write_err(conn, info);
417
418 /*
419 * 1. dirty page: do write back
420 * 2. writeback page: wait for its completion
421 * 3. writeback -> redirty page: do filemap_write_and_wait()
422 * twice, so 2th writeback should not allow
423 * writeback -> redirty transition
424 */
425 for (i = 0; i < HMDFS_STASH_FLUSH_CNT; i++) {
426 err = filemap_write_and_wait(mapping);
427 if (err) {
428 hmdfs_err("peer 0x%x:0x%llx inode 0x%llx #%d stash flush error %d",
429 conn->owner, conn->device_id,
430 info->remote_ino, i, err);
431 return err;
432 }
433 }
434
435 if (!hmdfs_is_mapping_clean(mapping))
436 hmdfs_err("peer 0x%x:0x%llx inode 0x%llx is still dirty dt %d wb %d",
437 conn->owner, conn->device_id, info->remote_ino,
438 !!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY),
439 !!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK));
440
441 return 0;
442 }
443
hmdfs_flush_stash_file(struct hmdfs_inode_info *info)444 static int hmdfs_flush_stash_file(struct hmdfs_inode_info *info)
445 {
446 int err;
447
448 err = hmdfs_flush_stash_file_data(info->conn, info);
449 if (!err)
450 err = hmdfs_flush_stash_file_metadata(info);
451
452 return err;
453 }
454
hmdfs_enable_stash_file(struct hmdfs_inode_info *info, struct dentry *stash)455 static int hmdfs_enable_stash_file(struct hmdfs_inode_info *info,
456 struct dentry *stash)
457 {
458 char name[HMDFS_STASH_FILE_NAME_LEN];
459 struct dentry *parent = NULL;
460 struct inode *dir = NULL;
461 struct dentry *child = NULL;
462 int err = 0;
463 bool retried = false;
464
465 snprintf(name, sizeof(name), "0x%llx", info->remote_ino);
466
467 parent = lock_parent(stash);
468 dir = d_inode(parent);
469
470 lookup_again:
471 child = lookup_one_len(name, parent, strlen(name));
472 if (IS_ERR(child)) {
473 err = PTR_ERR(child);
474 child = NULL;
475 hmdfs_err("lookup %s err %d", name, err);
476 goto out;
477 }
478
479 if (d_is_positive(child)) {
480 hmdfs_warning("%s exists (mode 0%o)",
481 name, d_inode(child)->i_mode);
482
483 err = vfs_unlink(&nop_mnt_idmap, dir, child, NULL);
484 if (err) {
485 hmdfs_err("unlink %s err %d", name, err);
486 goto out;
487 }
488 if (retried) {
489 err = -EEXIST;
490 goto out;
491 }
492
493 retried = true;
494 dput(child);
495 goto lookup_again;
496 }
497
498 err = vfs_link(stash, &nop_mnt_idmap, dir, child, NULL);
499 if (err) {
500 hmdfs_err("link stash file to %s err %d", name, err);
501 goto out;
502 }
503
504 out:
505 unlock_dir(parent);
506 if (child)
507 dput(child);
508
509 return err;
510 }
511
512 /* Return 1 if stash is done, 0 if nothing is stashed */
hmdfs_close_stash_file(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)513 static int hmdfs_close_stash_file(struct hmdfs_peer *conn,
514 struct hmdfs_inode_info *info)
515 {
516 struct file *cache_file = info->cache->cache_file;
517 struct dentry *c_dentry = file_dentry(cache_file);
518 struct inode *c_inode = d_inode(c_dentry);
519 long long to_write_pgs = atomic64_read(&info->cache->to_write_pgs);
520 int err;
521
522 hmdfs_info("peer 0x%x:0x%llx inode 0x%llx stashed bytes %lld pages %lld",
523 conn->owner, conn->device_id, info->remote_ino,
524 i_size_read(c_inode), to_write_pgs);
525
526 if (to_write_pgs == 0)
527 return 0;
528
529 err = vfs_fsync(cache_file, 0);
530 if (!err)
531 err = hmdfs_enable_stash_file(info, c_dentry);
532 else
533 hmdfs_err("fsync stash file err %d", err);
534
535 return err < 0 ? err : 1;
536 }
537
hmdfs_del_file_cache(struct hmdfs_cache_info *cache)538 static void hmdfs_del_file_cache(struct hmdfs_cache_info *cache)
539 {
540 if (!cache)
541 return;
542
543 fput(cache->cache_file);
544 kfree(cache->path_buf);
545 kfree(cache);
546 }
547
548 static struct hmdfs_cache_info *
hmdfs_new_file_cache(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)549 hmdfs_new_file_cache(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)
550 {
551 struct hmdfs_cache_info *cache = NULL;
552 struct dentry *stash_dentry = NULL;
553 int err;
554
555 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
556 if (!cache)
557 return ERR_PTR(-ENOMEM);
558
559 atomic64_set(&cache->to_write_pgs, 0);
560 atomic64_set(&cache->written_pgs, 0);
561 cache->path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
562 if (!cache->path_buf) {
563 err = -ENOMEM;
564 goto free_cache;
565 }
566
567 /* Need to handle "hardlink" ? */
568 stash_dentry = d_find_any_alias(&info->vfs_inode);
569 if (stash_dentry) {
570 /* Needs full path in hmdfs, will be a device-view path */
571 cache->path = dentry_path_raw(stash_dentry, cache->path_buf,
572 PATH_MAX);
573 dput(stash_dentry);
574 if (IS_ERR(cache->path)) {
575 err = PTR_ERR(cache->path);
576 hmdfs_err("peer 0x%x:0x%llx inode 0x%llx gen path err %d",
577 conn->owner, conn->device_id,
578 info->remote_ino, err);
579 goto free_path;
580 }
581 } else {
582 /* Write-opened file was closed before finding dentry */
583 hmdfs_info("peer 0x%x:0x%llx inode 0x%llx no dentry found",
584 conn->owner, conn->device_id, info->remote_ino);
585 cache->path_buf[0] = '\0';
586 cache->path = cache->path_buf;
587 }
588
589 cache->path_cnt = 1;
590 cache->path_len = strlen(cache->path) + 1;
591 cache->path_offs = DIV_ROUND_UP(sizeof(struct hmdfs_cache_file_head),
592 HMDFS_STASH_BLK_SIZE);
593 cache->data_offs = cache->path_offs + DIV_ROUND_UP(cache->path_len,
594 HMDFS_STASH_BLK_SIZE);
595 cache->cache_file = hmdfs_new_stash_file(&conn->sbi->stash_work_dir,
596 conn->cid);
597 if (IS_ERR(cache->cache_file)) {
598 err = PTR_ERR(cache->cache_file);
599 goto free_path;
600 }
601
602 return cache;
603
604 free_path:
605 kfree(cache->path_buf);
606 free_cache:
607 kfree(cache);
608 return ERR_PTR(err);
609 }
610
hmdfs_init_stash_file_cache(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)611 static void hmdfs_init_stash_file_cache(struct hmdfs_peer *conn,
612 struct hmdfs_inode_info *info)
613 {
614 struct hmdfs_cache_info *cache = NULL;
615
616 cache = hmdfs_new_file_cache(conn, info);
617 if (IS_ERR(cache))
618 /*
619 * Continue even creating stash info failed.
620 * We need to ensure there is no dirty pages
621 * after stash completes
622 */
623 cache = NULL;
624
625 /* Make write() returns */
626 spin_lock(&info->stash_lock);
627 info->cache = cache;
628 info->stash_status = HMDFS_REMOTE_INODE_STASHING;
629 spin_unlock(&info->stash_lock);
630 }
631
hmdfs_update_stash_stats(struct hmdfs_stash_stats *stats, const struct hmdfs_cache_info *cache, int err)632 static void hmdfs_update_stash_stats(struct hmdfs_stash_stats *stats,
633 const struct hmdfs_cache_info *cache,
634 int err)
635 {
636 unsigned long long ok_pages, fail_pages;
637
638 if (cache) {
639 ok_pages = err > 0 ? atomic64_read(&cache->written_pgs) : 0;
640 fail_pages = atomic64_read(&cache->to_write_pgs) - ok_pages;
641 stats->ok_pages += ok_pages;
642 stats->fail_pages += fail_pages;
643 }
644
645 if (err > 0)
646 stats->succeed++;
647 else if (!err)
648 stats->donothing++;
649 else
650 stats->fail++;
651 }
652
653 /* Return 1 if stash is done, 0 if nothing is stashed */
hmdfs_stash_remote_inode(struct hmdfs_inode_info *info, struct hmdfs_stash_stats *stats)654 static int hmdfs_stash_remote_inode(struct hmdfs_inode_info *info,
655 struct hmdfs_stash_stats *stats)
656 {
657 struct hmdfs_cache_info *cache = info->cache;
658 struct hmdfs_peer *conn = info->conn;
659 unsigned int status;
660 int err = 0;
661
662 hmdfs_info("stash peer 0x%x:0x%llx ino 0x%llx",
663 conn->owner, conn->device_id, info->remote_ino);
664
665 err = hmdfs_flush_stash_file(info);
666 if (!err)
667 err = hmdfs_close_stash_file(conn, info);
668
669 if (err <= 0)
670 set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags);
671 status = err > 0 ? HMDFS_REMOTE_INODE_RESTORING :
672 HMDFS_REMOTE_INODE_NONE;
673 spin_lock(&info->stash_lock);
674 info->cache = NULL;
675 /*
676 * Use smp_store_release() to ensure order between HMDFS_FID_NEED_OPEN
677 * and HMDFS_REMOTE_INODE_NONE.
678 */
679 smp_store_release(&info->stash_status, status);
680 spin_unlock(&info->stash_lock);
681
682 hmdfs_update_stash_stats(stats, cache, err);
683 hmdfs_del_file_cache(cache);
684
685 return err;
686 }
687
hmdfs_init_cache_for_stash_files(struct hmdfs_peer *conn, struct list_head *list)688 static void hmdfs_init_cache_for_stash_files(struct hmdfs_peer *conn,
689 struct list_head *list)
690 {
691 const struct cred *old_cred = NULL;
692 struct hmdfs_inode_info *info = NULL;
693
694 /* For file creation under stash_work_dir */
695 old_cred = hmdfs_override_creds(conn->sbi->cred);
696 list_for_each_entry(info, list, stash_node)
697 hmdfs_init_stash_file_cache(conn, info);
698 hmdfs_revert_creds(old_cred);
699 }
700
hmdfs_init_stash_cache_work_fn(struct work_struct *base)701 static void hmdfs_init_stash_cache_work_fn(struct work_struct *base)
702 {
703 struct hmdfs_stash_work *work =
704 container_of(base, struct hmdfs_stash_work, work);
705
706 hmdfs_init_cache_for_stash_files(work->conn, work->list);
707 complete(&work->done);
708 }
709
hmdfs_init_cache_for_stash_files_by_work(struct hmdfs_peer *conn, struct list_head *list)710 static void hmdfs_init_cache_for_stash_files_by_work(struct hmdfs_peer *conn,
711 struct list_head *list)
712 {
713 struct hmdfs_stash_work work = {
714 .conn = conn,
715 .list = list,
716 .done = COMPLETION_INITIALIZER_ONSTACK(work.done),
717 };
718
719 INIT_WORK_ONSTACK(&work.work, hmdfs_init_stash_cache_work_fn);
720 schedule_work(&work.work);
721 wait_for_completion(&work.done);
722 }
723
hmdfs_stash_fetch_ready_files(struct hmdfs_peer *conn, bool check, struct list_head *list)724 static void hmdfs_stash_fetch_ready_files(struct hmdfs_peer *conn,
725 bool check, struct list_head *list)
726 {
727 struct hmdfs_inode_info *info = NULL;
728
729 spin_lock(&conn->wr_opened_inode_lock);
730 list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) {
731 int status;
732
733 /* Paired with *_release() in hmdfs_reset_stashed_inode() */
734 status = smp_load_acquire(&info->stash_status);
735 if (status == HMDFS_REMOTE_INODE_NONE) {
736 list_add_tail(&info->stash_node, list);
737 /*
738 * Prevent close() removing the inode from
739 * writeable-opened inode list
740 */
741 hmdfs_remote_add_wr_opened_inode_nolock(conn, info);
742 /* Prevent the inode from eviction */
743 ihold(&info->vfs_inode);
744 } else if (check && status == HMDFS_REMOTE_INODE_STASHING) {
745 hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unexpected stash status %d",
746 conn->owner, conn->device_id,
747 info->remote_ino, status);
748 }
749 }
750 spin_unlock(&conn->wr_opened_inode_lock);
751 }
752
hmdfs_stash_offline_prepare(struct hmdfs_peer *conn, int evt, unsigned int seq)753 static void hmdfs_stash_offline_prepare(struct hmdfs_peer *conn, int evt,
754 unsigned int seq)
755 {
756 LIST_HEAD(preparing);
757
758 if (!hmdfs_is_stash_enabled(conn->sbi))
759 return;
760
761 mutex_lock(&conn->offline_cb_lock);
762
763 hmdfs_stash_fetch_ready_files(conn, true, &preparing);
764
765 if (list_empty(&preparing))
766 goto out;
767
768 hmdfs_init_cache_for_stash_files_by_work(conn, &preparing);
769 out:
770 mutex_unlock(&conn->offline_cb_lock);
771 }
772
hmdfs_track_inode_locked(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)773 static void hmdfs_track_inode_locked(struct hmdfs_peer *conn,
774 struct hmdfs_inode_info *info)
775 {
776 spin_lock(&conn->stashed_inode_lock);
777 list_add_tail(&info->stash_node, &conn->stashed_inode_list);
778 conn->stashed_inode_nr++;
779 spin_unlock(&conn->stashed_inode_lock);
780 }
781
782 static void
hmdfs_update_peer_stash_stats(struct hmdfs_stash_statistics *stash_stats, const struct hmdfs_stash_stats *stats)783 hmdfs_update_peer_stash_stats(struct hmdfs_stash_statistics *stash_stats,
784 const struct hmdfs_stash_stats *stats)
785 {
786 stash_stats->cur_ok = stats->succeed;
787 stash_stats->cur_nothing = stats->donothing;
788 stash_stats->cur_fail = stats->fail;
789 stash_stats->total_ok += stats->succeed;
790 stash_stats->total_nothing += stats->donothing;
791 stash_stats->total_fail += stats->fail;
792 stash_stats->ok_pages += stats->ok_pages;
793 stash_stats->fail_pages += stats->fail_pages;
794 }
795
hmdfs_stash_remote_inodes(struct hmdfs_peer *conn, struct list_head *list)796 static void hmdfs_stash_remote_inodes(struct hmdfs_peer *conn,
797 struct list_head *list)
798 {
799 const struct cred *old_cred = NULL;
800 struct hmdfs_inode_info *info = NULL;
801 struct hmdfs_inode_info *next = NULL;
802 struct hmdfs_stash_stats stats;
803
804 /* For file creation, write and relink under stash_work_dir */
805 old_cred = hmdfs_override_creds(conn->sbi->cred);
806
807 memset(&stats, 0, sizeof(stats));
808 list_for_each_entry_safe(info, next, list, stash_node) {
809 int err;
810
811 list_del_init(&info->stash_node);
812
813 err = hmdfs_stash_remote_inode(info, &stats);
814 if (err > 0)
815 hmdfs_track_inode_locked(conn, info);
816
817 hmdfs_remote_del_wr_opened_inode(conn, info);
818 if (err <= 0)
819 iput(&info->vfs_inode);
820 }
821 hmdfs_revert_creds(old_cred);
822
823 hmdfs_update_peer_stash_stats(&conn->stats.stash, &stats);
824 hmdfs_info("peer 0x%x:0x%llx total stashed %u cur ok %u none %u fail %u",
825 conn->owner, conn->device_id, conn->stashed_inode_nr,
826 stats.succeed, stats.donothing, stats.fail);
827 }
828
hmdfs_stash_offline_do_stash(struct hmdfs_peer *conn, int evt, unsigned int seq)829 static void hmdfs_stash_offline_do_stash(struct hmdfs_peer *conn, int evt,
830 unsigned int seq)
831 {
832 struct hmdfs_inode_info *info = NULL;
833 LIST_HEAD(preparing);
834 LIST_HEAD(stashing);
835
836 if (!hmdfs_is_stash_enabled(conn->sbi))
837 return;
838
839 /* release seq_lock to prevent blocking no-offline sync cb */
840 mutex_unlock(&conn->seq_lock);
841 /* acquire offline_cb_lock to serialized with offline sync cb */
842 mutex_lock(&conn->offline_cb_lock);
843
844 hmdfs_stash_fetch_ready_files(conn, false, &preparing);
845 if (!list_empty(&preparing))
846 hmdfs_init_cache_for_stash_files(conn, &preparing);
847
848 spin_lock(&conn->wr_opened_inode_lock);
849 list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) {
850 int status = READ_ONCE(info->stash_status);
851
852 if (status == HMDFS_REMOTE_INODE_STASHING)
853 list_add_tail(&info->stash_node, &stashing);
854 }
855 spin_unlock(&conn->wr_opened_inode_lock);
856
857 if (list_empty(&stashing))
858 goto unlock;
859
860 hmdfs_stash_remote_inodes(conn, &stashing);
861
862 unlock:
863 mutex_unlock(&conn->offline_cb_lock);
864 mutex_lock(&conn->seq_lock);
865 }
866
867 static struct hmdfs_inode_info *
hmdfs_lookup_stash_inode(struct hmdfs_peer *conn, uint64_t inum)868 hmdfs_lookup_stash_inode(struct hmdfs_peer *conn, uint64_t inum)
869 {
870 struct hmdfs_inode_info *info = NULL;
871
872 list_for_each_entry(info, &conn->stashed_inode_list, stash_node) {
873 if (info->remote_ino == inum)
874 return info;
875 }
876
877 return NULL;
878 }
879
hmdfs_untrack_stashed_inode(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)880 static void hmdfs_untrack_stashed_inode(struct hmdfs_peer *conn,
881 struct hmdfs_inode_info *info)
882 {
883 list_del_init(&info->stash_node);
884 iput(&info->vfs_inode);
885
886 conn->stashed_inode_nr--;
887 }
888
hmdfs_reset_stashed_inode(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)889 static void hmdfs_reset_stashed_inode(struct hmdfs_peer *conn,
890 struct hmdfs_inode_info *info)
891 {
892 struct inode *ino = &info->vfs_inode;
893
894 /*
895 * For updating stash_status after iput()
896 * in hmdfs_untrack_stashed_inode()
897 */
898 ihold(ino);
899 hmdfs_untrack_stashed_inode(conn, info);
900 /*
901 * Ensure the order of stash_node and stash_status:
902 * only update stash_status to NONE after removal of
903 * stash_node is completed.
904 */
905 smp_store_release(&info->stash_status,
906 HMDFS_REMOTE_INODE_NONE);
907 iput(ino);
908 }
909
hmdfs_drop_stashed_inodes(struct hmdfs_peer *conn)910 static void hmdfs_drop_stashed_inodes(struct hmdfs_peer *conn)
911 {
912 struct hmdfs_inode_info *info = NULL;
913 struct hmdfs_inode_info *next = NULL;
914
915 if (list_empty(&conn->stashed_inode_list))
916 return;
917
918 hmdfs_warning("peer 0x%x:0x%llx drop unrestorable file %u",
919 conn->owner, conn->device_id, conn->stashed_inode_nr);
920
921 list_for_each_entry_safe(info, next,
922 &conn->stashed_inode_list, stash_node) {
923 hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unrestorable status %u",
924 conn->owner, conn->device_id, info->remote_ino,
925 READ_ONCE(info->stash_status));
926
927 hmdfs_reset_stashed_inode(conn, info);
928 }
929 }
930
hmdfs_open_stash_dir(struct path *d_path, const char *cid)931 static struct file *hmdfs_open_stash_dir(struct path *d_path, const char *cid)
932 {
933 int err = 0;
934 struct dentry *parent = d_path->dentry;
935 struct inode *dir = d_inode(parent);
936 struct dentry *child = NULL;
937 struct path peer_path;
938 struct file *filp = NULL;
939
940 inode_lock_nested(dir, I_MUTEX_PARENT);
941 child = lookup_one_len(cid, parent, strlen(cid));
942 if (!IS_ERR(child)) {
943 if (!hmdfs_is_dir(child)) {
944 if (d_is_positive(child)) {
945 hmdfs_err("invalid stash dir mode 0%o", d_inode(child)->i_mode);
946 err = -EINVAL;
947 } else {
948 err = -ENOENT;
949 }
950 dput(child);
951 }
952 } else {
953 err = PTR_ERR(child);
954 hmdfs_err("lookup stash dir err %d", err);
955 }
956 inode_unlock(dir);
957
958 if (err)
959 return ERR_PTR(err);
960
961 peer_path.mnt = d_path->mnt;
962 peer_path.dentry = child;
963 filp = dentry_open(&peer_path, O_RDONLY | O_DIRECTORY, current_cred());
964 if (IS_ERR(filp))
965 hmdfs_err("open err %d", (int)PTR_ERR(filp));
966
967 dput(child);
968
969 return filp;
970 }
971
hmdfs_new_inode_tbl(struct hmdfs_inode_tbl **tbl)972 static int hmdfs_new_inode_tbl(struct hmdfs_inode_tbl **tbl)
973 {
974 struct hmdfs_inode_tbl *new = NULL;
975
976 new = kmalloc(PAGE_SIZE, GFP_KERNEL);
977 if (!new)
978 return -ENOMEM;
979
980 new->cnt = 0;
981 new->max = (PAGE_SIZE - offsetof(struct hmdfs_inode_tbl, inodes)) /
982 sizeof(new->inodes[0]);
983 *tbl = new;
984
985 return 0;
986 }
987
hmdfs_parse_stash_file_name(struct dir_context *dctx, const char *name, int namelen, unsigned int d_type, uint64_t *stash_inum)988 static int hmdfs_parse_stash_file_name(struct dir_context *dctx,
989 const char *name,
990 int namelen,
991 unsigned int d_type,
992 uint64_t *stash_inum)
993 {
994 struct hmdfs_stash_dir_context *ctx = NULL;
995 int err;
996
997 if (d_type != DT_UNKNOWN && d_type != DT_REG)
998 return 0;
999 if (namelen > NAME_MAX)
1000 return 0;
1001
1002 ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx);
1003 memcpy(ctx->name, name, namelen);
1004 ctx->name[namelen] = '\0';
1005 err = kstrtoull(ctx->name, 16, stash_inum);
1006 if (err) {
1007 hmdfs_err("unexpected stash file err %d", err);
1008 return 0;
1009 }
1010 return 1;
1011 }
1012
hmdfs_has_stash_file(struct dir_context *dctx, const char *name, int namelen, loff_t offset, u64 inum, unsigned int d_type)1013 static bool hmdfs_has_stash_file(struct dir_context *dctx, const char *name,
1014 int namelen, loff_t offset,
1015 u64 inum, unsigned int d_type)
1016 {
1017 struct hmdfs_stash_dir_context *ctx = NULL;
1018 uint64_t stash_inum;
1019 int err;
1020
1021 ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx);
1022 err = hmdfs_parse_stash_file_name(dctx, name, namelen,
1023 d_type, &stash_inum);
1024 if (!err)
1025 return true;
1026
1027 ctx->tbl->cnt++;
1028 return false;
1029 }
1030
hmdfs_fill_stash_file(struct dir_context *dctx, const char *name, int namelen, loff_t offset, u64 inum, unsigned int d_type)1031 static bool hmdfs_fill_stash_file(struct dir_context *dctx, const char *name,
1032 int namelen, loff_t offset,
1033 u64 inum, unsigned int d_type)
1034 {
1035 struct hmdfs_stash_dir_context *ctx = NULL;
1036 uint64_t stash_inum;
1037 int err;
1038
1039 ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx);
1040 err = hmdfs_parse_stash_file_name(dctx, name, namelen,
1041 d_type, &stash_inum);
1042 if (!err)
1043 return true;
1044 if (ctx->tbl->cnt >= ctx->tbl->max)
1045 return false;
1046
1047 ctx->tbl->inodes[ctx->tbl->cnt++] = stash_inum;
1048
1049 return true;
1050 }
1051
hmdfs_del_stash_file(struct dentry *parent, struct dentry *child)1052 static int hmdfs_del_stash_file(struct dentry *parent, struct dentry *child)
1053 {
1054 struct inode *dir = d_inode(parent);
1055 int err = 0;
1056
1057 /* Prevent d_delete() from calling dentry_unlink_inode() */
1058 dget(child);
1059
1060 inode_lock_nested(dir, I_MUTEX_PARENT);
1061 err = vfs_unlink(&nop_mnt_idmap, dir, child, NULL);
1062 if (err)
1063 hmdfs_err("remove stash file err %d", err);
1064 inode_unlock(dir);
1065
1066 dput(child);
1067
1068 return err;
1069 }
1070
hmdfs_is_node_offlined(const struct hmdfs_peer *conn, unsigned int seq)1071 static inline bool hmdfs_is_node_offlined(const struct hmdfs_peer *conn,
1072 unsigned int seq)
1073 {
1074 /*
1075 * open()/fsync() may fail due to "status = NODE_STAT_OFFLINE"
1076 * in hmdfs_disconnect_node().
1077 * Pair with smp_mb() in hmdfs_disconnect_node() to ensure
1078 * getting the newest event sequence.
1079 */
1080 smp_mb__before_atomic();
1081 return hmdfs_node_evt_seq(conn) != seq;
1082 }
1083
hmdfs_verify_restore_file_head(struct hmdfs_file_restore_ctx *ctx, const struct hmdfs_cache_file_head *head)1084 static int hmdfs_verify_restore_file_head(struct hmdfs_file_restore_ctx *ctx,
1085 const struct hmdfs_cache_file_head *head)
1086 {
1087 struct inode *inode = file_inode(ctx->src_filp);
1088 struct hmdfs_peer *conn = ctx->conn;
1089 unsigned int crc, read_crc, crc_offset;
1090 loff_t path_offs, data_offs, isize;
1091 int err = 0;
1092
1093 if (le32_to_cpu(head->magic) != HMDFS_STASH_FILE_HEAD_MAGIC) {
1094 err = -EUCLEAN;
1095 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid magic: got 0x%x, exp 0x%x",
1096 conn->owner, conn->device_id, ctx->inum,
1097 le32_to_cpu(head->magic),
1098 HMDFS_STASH_FILE_HEAD_MAGIC);
1099 goto out;
1100 }
1101
1102 crc_offset = le32_to_cpu(head->crc_offset);
1103 read_crc = le32_to_cpu(*((__le32 *)((char *)head + crc_offset)));
1104 crc = crc32(0, head, crc_offset);
1105 if (read_crc != crc) {
1106 err = -EUCLEAN;
1107 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid crc: got 0x%x, exp 0x%x",
1108 conn->owner, conn->device_id, ctx->inum,
1109 read_crc, crc);
1110 goto out;
1111 }
1112
1113 if (le64_to_cpu(head->ino) != ctx->inum) {
1114 err = -EUCLEAN;
1115 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid ino: got %llu, exp %llu",
1116 conn->owner, conn->device_id, ctx->inum,
1117 le64_to_cpu(head->ino), ctx->inum);
1118 goto out;
1119 }
1120
1121 path_offs = (loff_t)le32_to_cpu(head->path_offs) <<
1122 HMDFS_STASH_BLK_SHIFT;
1123 if (path_offs <= 0 || path_offs >= i_size_read(inode)) {
1124 err = -EUCLEAN;
1125 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_offs %d, stash file size %llu",
1126 conn->owner, conn->device_id, ctx->inum,
1127 le32_to_cpu(head->path_offs), i_size_read(inode));
1128 goto out;
1129 }
1130
1131 data_offs = (loff_t)le32_to_cpu(head->data_offs) <<
1132 HMDFS_STASH_BLK_SHIFT;
1133 if (path_offs >= data_offs) {
1134 err = -EUCLEAN;
1135 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, path_offs %d",
1136 conn->owner, conn->device_id, ctx->inum,
1137 le32_to_cpu(head->data_offs),
1138 le32_to_cpu(head->path_offs));
1139 goto out;
1140 }
1141 if (data_offs <= 0 || data_offs >= i_size_read(inode)) {
1142 err = -EUCLEAN;
1143 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, stash file size %llu",
1144 conn->owner, conn->device_id, ctx->inum,
1145 le32_to_cpu(head->data_offs), i_size_read(inode));
1146 goto out;
1147 }
1148
1149 isize = le64_to_cpu(head->size);
1150 if (isize != i_size_read(inode)) {
1151 err = -EUCLEAN;
1152 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid isize: got %llu, exp %llu",
1153 conn->owner, conn->device_id, ctx->inum,
1154 le64_to_cpu(head->size), i_size_read(inode));
1155 goto out;
1156 }
1157
1158 if (le32_to_cpu(head->path_cnt) < 1) {
1159 err = -EUCLEAN;
1160 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_cnt %d",
1161 conn->owner, conn->device_id, ctx->inum,
1162 le32_to_cpu(head->path_cnt));
1163 goto out;
1164 }
1165
1166 out:
1167 return err;
1168 }
1169
hmdfs_get_restore_file_metadata(struct hmdfs_file_restore_ctx *ctx)1170 static int hmdfs_get_restore_file_metadata(struct hmdfs_file_restore_ctx *ctx)
1171 {
1172 struct hmdfs_cache_file_head head;
1173 struct hmdfs_peer *conn = ctx->conn;
1174 unsigned int head_size, read_size, head_crc_offset;
1175 loff_t pos;
1176 ssize_t rd;
1177 int err = 0;
1178
1179 head_size = sizeof(struct hmdfs_cache_file_head);
1180 memset(&head, 0, head_size);
1181 /* Read part head */
1182 pos = 0;
1183 read_size = offsetof(struct hmdfs_cache_file_head, crc_offset) +
1184 sizeof(head.crc_offset);
1185 rd = kernel_read(ctx->src_filp, &head, read_size, &pos);
1186 if (rd != read_size) {
1187 err = rd < 0 ? rd : -ENODATA;
1188 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read part head err %d",
1189 conn->owner, conn->device_id, ctx->inum, err);
1190 goto out;
1191 }
1192 head_crc_offset = le32_to_cpu(head.crc_offset);
1193 if (head_crc_offset + sizeof(head.crc32) < head_crc_offset ||
1194 head_crc_offset + sizeof(head.crc32) > head_size) {
1195 err = -EUCLEAN;
1196 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx got bad head: Too long crc_offset %u which exceeds head size %u",
1197 conn->owner, conn->device_id, ctx->inum,
1198 head_crc_offset, head_size);
1199 goto out;
1200 }
1201
1202 /* Read full head */
1203 pos = 0;
1204 read_size = le32_to_cpu(head.crc_offset) + sizeof(head.crc32);
1205 rd = kernel_read(ctx->src_filp, &head, read_size, &pos);
1206 if (rd != read_size) {
1207 err = rd < 0 ? rd : -ENODATA;
1208 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read full head err %d",
1209 conn->owner, conn->device_id, ctx->inum, err);
1210 goto out;
1211 }
1212
1213 err = hmdfs_verify_restore_file_head(ctx, &head);
1214 if (err)
1215 goto out;
1216
1217 ctx->pages = le64_to_cpu(head.blocks) >>
1218 HMDFS_STASH_PAGE_TO_SECTOR_SHIFT;
1219 ctx->data_offs = le32_to_cpu(head.data_offs);
1220 /* Read path */
1221 read_size = min_t(unsigned int, le32_to_cpu(head.path_len), PATH_MAX);
1222 pos = (loff_t)le32_to_cpu(head.path_offs) << HMDFS_STASH_BLK_SHIFT;
1223 rd = kernel_read(ctx->src_filp, ctx->dst, read_size, &pos);
1224 if (rd != read_size) {
1225 err = rd < 0 ? rd : -ENODATA;
1226 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path err %d",
1227 conn->owner, conn->device_id, ctx->inum, err);
1228 goto out;
1229 }
1230 if (strnlen(ctx->dst, read_size) >= read_size) {
1231 err = -EUCLEAN;
1232 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path not end with \\0",
1233 conn->owner, conn->device_id, ctx->inum);
1234 goto out;
1235 }
1236 /* TODO: Pick a valid path from all paths */
1237
1238 out:
1239 return err;
1240 }
1241
hmdfs_open_restore_dst_file(struct hmdfs_file_restore_ctx *ctx, unsigned int rw_flag, struct file **filp)1242 static int hmdfs_open_restore_dst_file(struct hmdfs_file_restore_ctx *ctx,
1243 unsigned int rw_flag, struct file **filp)
1244 {
1245 struct hmdfs_peer *conn = ctx->conn;
1246 struct file *dst = NULL;
1247 int err = 0;
1248
1249 err = hmdfs_get_restore_file_metadata(ctx);
1250 if (err)
1251 goto out;
1252
1253 /* Error comes from connection or server ? */
1254 dst = file_open_root(&ctx->dst_root_path,
1255 ctx->dst, O_LARGEFILE | rw_flag, 0);
1256 if (IS_ERR(dst)) {
1257 err = PTR_ERR(dst);
1258 hmdfs_err("open remote file ino 0x%llx err %d", ctx->inum, err);
1259 if (hmdfs_is_node_offlined(conn, ctx->seq))
1260 err = -ESHUTDOWN;
1261 goto out;
1262 }
1263
1264 *filp = dst;
1265 out:
1266 return err;
1267 }
1268
hmdfs_need_abort_restore(struct hmdfs_file_restore_ctx *ctx, struct hmdfs_inode_info *pinned, struct file *opened_file)1269 static bool hmdfs_need_abort_restore(struct hmdfs_file_restore_ctx *ctx,
1270 struct hmdfs_inode_info *pinned,
1271 struct file *opened_file)
1272 {
1273 struct hmdfs_inode_info *opened = hmdfs_i(file_inode(opened_file));
1274
1275 if (opened->inode_type != HMDFS_LAYER_OTHER_REMOTE)
1276 goto abort;
1277
1278 if (opened == pinned)
1279 return false;
1280
1281 abort:
1282 hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx invalid remote file",
1283 ctx->conn->owner, ctx->conn->device_id, ctx->inum);
1284 hmdfs_warning("got: peer 0x%x:0x%llx inode 0x%llx type %d status %d",
1285 opened->conn ? opened->conn->owner : 0,
1286 opened->conn ? opened->conn->device_id : 0,
1287 opened->remote_ino, opened->inode_type,
1288 opened->stash_status);
1289 hmdfs_warning("pinned: peer 0x%x:0x%llx inode 0x%llx type %d status %d",
1290 pinned->conn->owner, pinned->conn->device_id,
1291 pinned->remote_ino, pinned->inode_type,
1292 pinned->stash_status);
1293 return true;
1294 }
1295
hmdfs_init_copy_args(const struct hmdfs_file_restore_ctx *ctx, struct file *dst, struct hmdfs_copy_args *args)1296 static void hmdfs_init_copy_args(const struct hmdfs_file_restore_ctx *ctx,
1297 struct file *dst, struct hmdfs_copy_args *args)
1298 {
1299 args->src = ctx->src_filp;
1300 args->dst = dst;
1301 args->buf = ctx->page;
1302 args->buf_len = PAGE_SIZE;
1303 args->seq = ctx->seq;
1304 args->data_offs = ctx->data_offs;
1305 args->inum = ctx->inum;
1306 }
1307
hmdfs_write_dst(struct hmdfs_peer *conn, struct file *filp, void *buf, size_t len, loff_t pos)1308 static ssize_t hmdfs_write_dst(struct hmdfs_peer *conn, struct file *filp,
1309 void *buf, size_t len, loff_t pos)
1310 {
1311 struct kiocb kiocb;
1312 struct iovec iov;
1313 struct iov_iter iter;
1314 ssize_t wr;
1315 int err = 0;
1316
1317 file_start_write(filp);
1318
1319 init_sync_kiocb(&kiocb, filp);
1320 kiocb.ki_pos = pos;
1321
1322 iov.iov_base = buf;
1323 iov.iov_len = len;
1324 iov_iter_init(&iter, WRITE, &iov, 1, len);
1325
1326 wr = hmdfs_file_write_iter_remote_nocheck(&kiocb, &iter);
1327
1328 file_end_write(filp);
1329
1330 if (wr != len) {
1331 struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp));
1332
1333 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short write ret %zd exp %zu",
1334 conn->owner, conn->device_id, info->remote_ino,
1335 wr, len);
1336 err = wr < 0 ? (int)wr : -EFAULT;
1337 }
1338
1339 return err;
1340 }
1341
hmdfs_rd_src_wr_dst(struct hmdfs_peer *conn, struct hmdfs_copy_ctx *ctx)1342 static int hmdfs_rd_src_wr_dst(struct hmdfs_peer *conn,
1343 struct hmdfs_copy_ctx *ctx)
1344 {
1345 const struct hmdfs_copy_args *args = NULL;
1346 int err = 0;
1347 loff_t rd_pos;
1348 ssize_t rd;
1349
1350 ctx->eof = false;
1351 ctx->copied = 0;
1352
1353 args = &ctx->args;
1354 rd_pos = ctx->src_pos;
1355 rd = kernel_read(args->src, args->buf, args->buf_len, &rd_pos);
1356 if (rd < 0) {
1357 err = (int)rd;
1358 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short read err %d",
1359 conn->owner, conn->device_id, args->inum, err);
1360 goto out;
1361 } else if (rd == 0) {
1362 ctx->eof = true;
1363 goto out;
1364 }
1365
1366 err = hmdfs_write_dst(conn, args->dst, args->buf, rd, ctx->dst_pos);
1367 if (!err)
1368 ctx->copied = rd;
1369 else if (hmdfs_is_node_offlined(conn, args->seq))
1370 err = -ESHUTDOWN;
1371 out:
1372 return err;
1373 }
1374
hmdfs_copy_src_to_dst(struct hmdfs_peer *conn, const struct hmdfs_copy_args *args)1375 static int hmdfs_copy_src_to_dst(struct hmdfs_peer *conn,
1376 const struct hmdfs_copy_args *args)
1377 {
1378 int err = 0;
1379 struct file *src = NULL;
1380 struct hmdfs_copy_ctx ctx;
1381 loff_t seek_pos, data_init_pos;
1382 loff_t src_size;
1383
1384 ctx.args = *args;
1385
1386 src = ctx.args.src;
1387 data_init_pos = (loff_t)ctx.args.data_offs << HMDFS_STASH_BLK_SHIFT;
1388 seek_pos = data_init_pos;
1389 src_size = i_size_read(file_inode(src));
1390 while (true) {
1391 loff_t data_pos;
1392
1393 data_pos = vfs_llseek(src, seek_pos, SEEK_DATA);
1394 if (data_pos > seek_pos) {
1395 seek_pos = data_pos;
1396 continue;
1397 } else if (data_pos < 0) {
1398 if (data_pos == -ENXIO) {
1399 loff_t src_blks = file_inode(src)->i_blocks;
1400
1401 hmdfs_info("peer 0x%x:0x%llx ino 0x%llx end at 0x%llx (sz 0x%llx blk 0x%llx)",
1402 conn->owner, conn->device_id,
1403 args->inum, seek_pos,
1404 src_size, src_blks);
1405 } else {
1406 err = (int)data_pos;
1407 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx seek pos 0x%llx err %d",
1408 conn->owner, conn->device_id,
1409 args->inum, seek_pos, err);
1410 }
1411 break;
1412 }
1413
1414 hmdfs_debug("peer 0x%x:0x%llx ino 0x%llx seek to 0x%llx",
1415 conn->owner, conn->device_id, args->inum, data_pos);
1416
1417 ctx.src_pos = data_pos;
1418 ctx.dst_pos = data_pos - data_init_pos;
1419 err = hmdfs_rd_src_wr_dst(conn, &ctx);
1420 if (err || ctx.eof)
1421 break;
1422
1423 seek_pos += ctx.copied;
1424 if (seek_pos >= src_size)
1425 break;
1426 }
1427
1428 return err;
1429 }
1430
hmdfs_restore_src_to_dst(struct hmdfs_file_restore_ctx *ctx, struct file *dst)1431 static int hmdfs_restore_src_to_dst(struct hmdfs_file_restore_ctx *ctx,
1432 struct file *dst)
1433 {
1434 struct file *src = ctx->src_filp;
1435 struct hmdfs_copy_args args;
1436 int err;
1437
1438 hmdfs_init_copy_args(ctx, dst, &args);
1439 err = hmdfs_copy_src_to_dst(ctx->conn, &args);
1440 if (err)
1441 goto out;
1442
1443 err = vfs_fsync(dst, 0);
1444 if (err) {
1445 hmdfs_err("fsync remote file ino 0x%llx err %d", ctx->inum, err);
1446 if (hmdfs_is_node_offlined(ctx->conn, ctx->seq))
1447 err = -ESHUTDOWN;
1448 }
1449
1450 out:
1451 if (err)
1452 truncate_inode_pages(file_inode(dst)->i_mapping, 0);
1453
1454 /* Remove the unnecessary cache */
1455 invalidate_mapping_pages(file_inode(src)->i_mapping, 0, -1);
1456
1457 return err;
1458 }
1459
1460
hmdfs_restore_file(struct hmdfs_file_restore_ctx *ctx)1461 static int hmdfs_restore_file(struct hmdfs_file_restore_ctx *ctx)
1462 {
1463 struct hmdfs_peer *conn = ctx->conn;
1464 uint64_t inum = ctx->inum;
1465 struct hmdfs_inode_info *pinned_info = NULL;
1466 struct file *dst_filp = NULL;
1467 int err = 0;
1468 bool keep = false;
1469
1470 hmdfs_info("peer 0x%x:0x%llx ino 0x%llx do restore",
1471 conn->owner, conn->device_id, inum);
1472
1473 pinned_info = hmdfs_lookup_stash_inode(conn, inum);
1474 if (pinned_info) {
1475 unsigned int status = READ_ONCE(pinned_info->stash_status);
1476
1477 if (status != HMDFS_REMOTE_INODE_RESTORING) {
1478 hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid status %u",
1479 conn->owner, conn->device_id, inum, status);
1480 err = -EINVAL;
1481 goto clean;
1482 }
1483 } else {
1484 hmdfs_warning("peer 0x%x:0x%llx ino 0x%llx doesn't being pinned",
1485 conn->owner, conn->device_id, inum);
1486 err = -EINVAL;
1487 goto clean;
1488 }
1489
1490 set_bit(HMDFS_FID_NEED_OPEN, &pinned_info->fid_flags);
1491 err = hmdfs_open_restore_dst_file(ctx, O_RDWR, &dst_filp);
1492 if (err) {
1493 if (err == -ESHUTDOWN)
1494 keep = true;
1495 goto clean;
1496 }
1497
1498 if (hmdfs_need_abort_restore(ctx, pinned_info, dst_filp))
1499 goto abort;
1500
1501 err = hmdfs_restore_src_to_dst(ctx, dst_filp);
1502 if (err == -ESHUTDOWN)
1503 keep = true;
1504 abort:
1505 fput(dst_filp);
1506 clean:
1507 if (pinned_info && !keep)
1508 hmdfs_reset_stashed_inode(conn, pinned_info);
1509 ctx->keep = keep;
1510
1511 hmdfs_info("peer 0x%x:0x%llx ino 0x%llx restore err %d keep %d",
1512 conn->owner, conn->device_id, inum, err, ctx->keep);
1513
1514 return err;
1515 }
1516
hmdfs_init_file_restore_ctx(struct hmdfs_peer *conn, unsigned int seq, struct path *src_dir, struct hmdfs_file_restore_ctx *ctx)1517 static int hmdfs_init_file_restore_ctx(struct hmdfs_peer *conn,
1518 unsigned int seq, struct path *src_dir,
1519 struct hmdfs_file_restore_ctx *ctx)
1520 {
1521 struct hmdfs_sb_info *sbi = conn->sbi;
1522 struct path dst_root;
1523 char *dst = NULL;
1524 char *page = NULL;
1525 int err = 0;
1526
1527 err = hmdfs_get_path_in_sb(sbi->sb, sbi->real_dst, LOOKUP_DIRECTORY,
1528 &dst_root);
1529 if (err)
1530 return err;
1531
1532 dst = kmalloc(PATH_MAX, GFP_KERNEL);
1533 if (!dst) {
1534 err = -ENOMEM;
1535 goto put_path;
1536 }
1537
1538 page = kmalloc(PAGE_SIZE, GFP_KERNEL);
1539 if (!page) {
1540 err = -ENOMEM;
1541 goto free_dst;
1542 }
1543
1544 ctx->conn = conn;
1545 ctx->src_dir_path = *src_dir;
1546 ctx->dst_root_path = dst_root;
1547 ctx->dst = dst;
1548 ctx->page = page;
1549 ctx->seq = seq;
1550
1551 return 0;
1552 free_dst:
1553 kfree(dst);
1554 put_path:
1555 path_put(&dst_root);
1556 return err;
1557 }
1558
hmdfs_exit_file_restore_ctx(struct hmdfs_file_restore_ctx *ctx)1559 static void hmdfs_exit_file_restore_ctx(struct hmdfs_file_restore_ctx *ctx)
1560 {
1561 path_put(&ctx->dst_root_path);
1562 kfree(ctx->dst);
1563 kfree(ctx->page);
1564 }
1565
hmdfs_open_stash_file(struct path *p_path, char *name)1566 static struct file *hmdfs_open_stash_file(struct path *p_path, char *name)
1567 {
1568 struct dentry *parent = NULL;
1569 struct inode *dir = NULL;
1570 struct dentry *child = NULL;
1571 struct file *filp = NULL;
1572 struct path c_path;
1573 int err = 0;
1574
1575 parent = p_path->dentry;
1576 dir = d_inode(parent);
1577 inode_lock_nested(dir, I_MUTEX_PARENT);
1578 child = lookup_one_len(name, parent, strlen(name));
1579 if (!IS_ERR(child) && !hmdfs_is_reg(child)) {
1580 if (d_is_positive(child)) {
1581 hmdfs_err("invalid stash file (mode 0%o)",
1582 d_inode(child)->i_mode);
1583 err = -EINVAL;
1584 } else {
1585 hmdfs_err("missing stash file");
1586 err = -ENOENT;
1587 }
1588 dput(child);
1589 } else if (IS_ERR(child)) {
1590 err = PTR_ERR(child);
1591 hmdfs_err("lookup stash file err %d", err);
1592 }
1593 inode_unlock(dir);
1594
1595 if (err)
1596 return ERR_PTR(err);
1597
1598 c_path.mnt = p_path->mnt;
1599 c_path.dentry = child;
1600 filp = dentry_open(&c_path, O_RDONLY | O_LARGEFILE, current_cred());
1601 if (IS_ERR(filp))
1602 hmdfs_err("open stash file err %d", (int)PTR_ERR(filp));
1603
1604 dput(child);
1605
1606 return filp;
1607 }
1608
hmdfs_update_restore_stats(struct hmdfs_restore_stats *stats, bool keep, uint64_t pages, int err)1609 static void hmdfs_update_restore_stats(struct hmdfs_restore_stats *stats,
1610 bool keep, uint64_t pages, int err)
1611 {
1612 if (!err) {
1613 stats->succeed++;
1614 stats->ok_pages += pages;
1615 } else if (keep) {
1616 stats->keep++;
1617 } else {
1618 stats->fail++;
1619 stats->fail_pages += pages;
1620 }
1621 }
1622
hmdfs_restore_files(struct hmdfs_peer *conn, unsigned int seq, struct path *dir, const struct hmdfs_inode_tbl *tbl, void *priv)1623 static int hmdfs_restore_files(struct hmdfs_peer *conn,
1624 unsigned int seq, struct path *dir,
1625 const struct hmdfs_inode_tbl *tbl,
1626 void *priv)
1627 {
1628 unsigned int i;
1629 struct hmdfs_file_restore_ctx ctx;
1630 int err = 0;
1631 struct hmdfs_restore_stats *stats = priv;
1632
1633 err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx);
1634 if (err)
1635 return err;
1636
1637 for (i = 0; i < tbl->cnt; i++) {
1638 char name[HMDFS_STASH_FILE_NAME_LEN];
1639 struct file *filp = NULL;
1640
1641 snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]);
1642 filp = hmdfs_open_stash_file(dir, name);
1643 /* Continue to restore if any error */
1644 if (IS_ERR(filp)) {
1645 stats->fail++;
1646 continue;
1647 }
1648
1649 ctx.inum = tbl->inodes[i];
1650 ctx.src_filp = filp;
1651 ctx.keep = false;
1652 ctx.pages = 0;
1653 err = hmdfs_restore_file(&ctx);
1654 hmdfs_update_restore_stats(stats, ctx.keep, ctx.pages, err);
1655
1656 if (!ctx.keep)
1657 hmdfs_del_stash_file(dir->dentry,
1658 file_dentry(ctx.src_filp));
1659 fput(ctx.src_filp);
1660
1661 /* Continue to restore */
1662 if (err == -ESHUTDOWN)
1663 break;
1664 err = 0;
1665 }
1666
1667 hmdfs_exit_file_restore_ctx(&ctx);
1668
1669 return err;
1670 }
1671
hmdfs_is_valid_stash_status(struct hmdfs_inode_info *inode_info, uint64_t ino)1672 static bool hmdfs_is_valid_stash_status(struct hmdfs_inode_info *inode_info,
1673 uint64_t ino)
1674 {
1675 return (inode_info->inode_type == HMDFS_LAYER_OTHER_REMOTE &&
1676 inode_info->stash_status == HMDFS_REMOTE_INODE_RESTORING &&
1677 inode_info->remote_ino == ino);
1678 }
1679
hmdfs_rebuild_stash_list(struct hmdfs_peer *conn, unsigned int seq, struct path *dir, const struct hmdfs_inode_tbl *tbl, void *priv)1680 static int hmdfs_rebuild_stash_list(struct hmdfs_peer *conn,
1681 unsigned int seq,
1682 struct path *dir,
1683 const struct hmdfs_inode_tbl *tbl,
1684 void *priv)
1685 {
1686 struct hmdfs_file_restore_ctx ctx;
1687 unsigned int i;
1688 int err;
1689 struct hmdfs_rebuild_stats *stats = priv;
1690
1691 err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx);
1692 if (err)
1693 return err;
1694
1695 stats->total += tbl->cnt;
1696
1697 for (i = 0; i < tbl->cnt; i++) {
1698 char name[HMDFS_STASH_FILE_NAME_LEN];
1699 struct file *src_filp = NULL;
1700 struct file *dst_filp = NULL;
1701 struct hmdfs_inode_info *inode_info = NULL;
1702 bool is_valid = true;
1703
1704 snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]);
1705 src_filp = hmdfs_open_stash_file(dir, name);
1706 if (IS_ERR(src_filp)) {
1707 stats->fail++;
1708 continue;
1709 }
1710 ctx.inum = tbl->inodes[i];
1711 ctx.src_filp = src_filp;
1712
1713 /* No need to track the open which only needs meta info */
1714 err = hmdfs_open_restore_dst_file(&ctx, O_RDONLY, &dst_filp);
1715 if (err) {
1716 fput(src_filp);
1717 if (err == -ESHUTDOWN)
1718 break;
1719 stats->fail++;
1720 err = 0;
1721 continue;
1722 }
1723
1724 inode_info = hmdfs_i(file_inode(dst_filp));
1725 is_valid = hmdfs_is_valid_stash_status(inode_info,
1726 ctx.inum);
1727 if (is_valid) {
1728 stats->succeed++;
1729 } else {
1730 hmdfs_err("peer 0x%x:0x%llx inode 0x%llx invalid state: type: %d, status: %u, inode: %llu",
1731 conn->owner, conn->device_id, ctx.inum,
1732 inode_info->inode_type,
1733 READ_ONCE(inode_info->stash_status),
1734 inode_info->remote_ino);
1735 stats->invalid++;
1736 }
1737
1738 fput(ctx.src_filp);
1739 fput(dst_filp);
1740 }
1741
1742 hmdfs_exit_file_restore_ctx(&ctx);
1743 return err;
1744 }
1745
hmdfs_iter_stash_file(struct hmdfs_peer *conn, unsigned int seq, struct file *filp, stash_operation_func op, void *priv)1746 static int hmdfs_iter_stash_file(struct hmdfs_peer *conn,
1747 unsigned int seq,
1748 struct file *filp,
1749 stash_operation_func op,
1750 void *priv)
1751 {
1752 int err = 0;
1753 struct hmdfs_stash_dir_context ctx = {
1754 .dctx.actor = hmdfs_fill_stash_file,
1755 };
1756 struct hmdfs_inode_tbl *tbl = NULL;
1757 struct path dir;
1758
1759 err = hmdfs_new_inode_tbl(&tbl);
1760 if (err)
1761 goto out;
1762
1763 dir.mnt = filp->f_path.mnt;
1764 dir.dentry = file_dentry(filp);
1765
1766 ctx.tbl = tbl;
1767 ctx.dctx.pos = 0;
1768 do {
1769 tbl->cnt = 0;
1770 err = iterate_dir(filp, &ctx.dctx);
1771 if (err || !tbl->cnt) {
1772 if (err)
1773 hmdfs_err("iterate stash dir err %d", err);
1774 break;
1775 }
1776 err = op(conn, seq, &dir, tbl, priv);
1777 } while (!err);
1778
1779 out:
1780 kfree(tbl);
1781 return err;
1782 }
1783
hmdfs_rebuild_check_work_fn(struct work_struct *base)1784 static void hmdfs_rebuild_check_work_fn(struct work_struct *base)
1785 {
1786 struct hmdfs_check_work *work =
1787 container_of(base, struct hmdfs_check_work, work);
1788 struct hmdfs_peer *conn = work->conn;
1789 struct hmdfs_sb_info *sbi = conn->sbi;
1790 struct file *filp = NULL;
1791 const struct cred *old_cred = NULL;
1792 struct hmdfs_stash_dir_context ctx = {
1793 .dctx.actor = hmdfs_has_stash_file,
1794 };
1795 struct hmdfs_inode_tbl tbl;
1796 int err;
1797
1798 old_cred = hmdfs_override_creds(sbi->cred);
1799 filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid);
1800 if (IS_ERR(filp))
1801 goto out;
1802
1803 memset(&tbl, 0, sizeof(tbl));
1804 ctx.tbl = &tbl;
1805 err = iterate_dir(filp, &ctx.dctx);
1806 if (!err && ctx.tbl->cnt > 0)
1807 conn->need_rebuild_stash_list = true;
1808
1809 fput(filp);
1810 out:
1811 hmdfs_revert_creds(old_cred);
1812 hmdfs_info("peer 0x%x:0x%llx %sneed to rebuild stash list",
1813 conn->owner, conn->device_id,
1814 conn->need_rebuild_stash_list ? "" : "don't ");
1815 complete(&work->done);
1816 }
1817
hmdfs_stash_add_do_check(struct hmdfs_peer *conn, int evt, unsigned int seq)1818 static void hmdfs_stash_add_do_check(struct hmdfs_peer *conn, int evt,
1819 unsigned int seq)
1820 {
1821 struct hmdfs_sb_info *sbi = conn->sbi;
1822 struct hmdfs_check_work work = {
1823 .conn = conn,
1824 .done = COMPLETION_INITIALIZER_ONSTACK(work.done),
1825 };
1826
1827 if (!hmdfs_is_stash_enabled(sbi))
1828 return;
1829
1830 INIT_WORK_ONSTACK(&work.work, hmdfs_rebuild_check_work_fn);
1831 schedule_work(&work.work);
1832 wait_for_completion(&work.done);
1833 }
1834
1835 static void
hmdfs_update_peer_rebuild_stats(struct hmdfs_rebuild_statistics *rebuild_stats, const struct hmdfs_rebuild_stats *stats)1836 hmdfs_update_peer_rebuild_stats(struct hmdfs_rebuild_statistics *rebuild_stats,
1837 const struct hmdfs_rebuild_stats *stats)
1838 {
1839 rebuild_stats->cur_ok = stats->succeed;
1840 rebuild_stats->cur_fail = stats->fail;
1841 rebuild_stats->cur_invalid = stats->invalid;
1842 rebuild_stats->total_ok += stats->succeed;
1843 rebuild_stats->total_fail += stats->fail;
1844 rebuild_stats->total_invalid += stats->invalid;
1845 }
1846
1847 /* rebuild stash inode list */
hmdfs_stash_online_prepare(struct hmdfs_peer *conn, int evt, unsigned int seq)1848 static void hmdfs_stash_online_prepare(struct hmdfs_peer *conn, int evt,
1849 unsigned int seq)
1850 {
1851 struct hmdfs_sb_info *sbi = conn->sbi;
1852 struct file *filp = NULL;
1853 const struct cred *old_cred = NULL;
1854 int err;
1855 struct hmdfs_rebuild_stats stats;
1856
1857 if (!hmdfs_is_stash_enabled(sbi) ||
1858 !conn->need_rebuild_stash_list)
1859 return;
1860
1861 /* release seq_lock to prevent blocking no-online sync cb */
1862 mutex_unlock(&conn->seq_lock);
1863 old_cred = hmdfs_override_creds(sbi->cred);
1864 filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid);
1865 if (IS_ERR(filp))
1866 goto out;
1867
1868 memset(&stats, 0, sizeof(stats));
1869 err = hmdfs_iter_stash_file(conn, seq, filp,
1870 hmdfs_rebuild_stash_list, &stats);
1871 if (err == -ESHUTDOWN) {
1872 hmdfs_info("peer 0x%x:0x%llx offline again during rebuild",
1873 conn->owner, conn->device_id);
1874 } else {
1875 WRITE_ONCE(conn->need_rebuild_stash_list, false);
1876 if (err)
1877 hmdfs_warning("partial rebuild fail err %d", err);
1878 }
1879
1880 hmdfs_update_peer_rebuild_stats(&conn->stats.rebuild, &stats);
1881 hmdfs_info("peer 0x%x:0x%llx rebuild stashed-file total %u succeed %u fail %u invalid %u",
1882 conn->owner, conn->device_id, stats.total, stats.succeed,
1883 stats.fail, stats.invalid);
1884 fput(filp);
1885 out:
1886 conn->stats.rebuild.time++;
1887 hmdfs_revert_creds(old_cred);
1888 if (!READ_ONCE(conn->need_rebuild_stash_list)) {
1889 /*
1890 * Use smp_mb__before_atomic() to ensure order between
1891 * writing @conn->need_rebuild_stash_list and
1892 * reading conn->rebuild_inode_status_nr.
1893 */
1894 smp_mb__before_atomic();
1895 /*
1896 * Wait until all inodes finish rebuilding stash status before
1897 * accessing @conn->stashed_inode_list in restoring.
1898 */
1899 wait_event(conn->rebuild_inode_status_wq,
1900 !atomic_read(&conn->rebuild_inode_status_nr));
1901 }
1902 mutex_lock(&conn->seq_lock);
1903 }
1904
1905 static void
hmdfs_update_peer_restore_stats(struct hmdfs_restore_statistics *restore_stats, const struct hmdfs_restore_stats *stats)1906 hmdfs_update_peer_restore_stats(struct hmdfs_restore_statistics *restore_stats,
1907 const struct hmdfs_restore_stats *stats)
1908 {
1909 restore_stats->cur_ok = stats->succeed;
1910 restore_stats->cur_fail = stats->fail;
1911 restore_stats->cur_keep = stats->keep;
1912 restore_stats->total_ok += stats->succeed;
1913 restore_stats->total_fail += stats->fail;
1914 restore_stats->total_keep += stats->keep;
1915 restore_stats->ok_pages += stats->ok_pages;
1916 restore_stats->fail_pages += stats->fail_pages;
1917 }
1918
hmdfs_stash_online_do_restore(struct hmdfs_peer *conn, int evt, unsigned int seq)1919 static void hmdfs_stash_online_do_restore(struct hmdfs_peer *conn, int evt,
1920 unsigned int seq)
1921 {
1922 struct hmdfs_sb_info *sbi = conn->sbi;
1923 struct file *filp = NULL;
1924 const struct cred *old_cred = NULL;
1925 struct hmdfs_restore_stats stats;
1926 int err = 0;
1927
1928 if (!hmdfs_is_stash_enabled(sbi) || conn->need_rebuild_stash_list) {
1929 if (conn->need_rebuild_stash_list)
1930 hmdfs_info("peer 0x%x:0x%llx skip restoring due to rebuild-need",
1931 conn->owner, conn->device_id);
1932 return;
1933 }
1934
1935 /* release seq_lock to prevent blocking no-online sync cb */
1936 mutex_unlock(&conn->seq_lock);
1937 /* For dir iteration, file read and unlink */
1938 old_cred = hmdfs_override_creds(conn->sbi->cred);
1939
1940 memset(&stats, 0, sizeof(stats));
1941 filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid);
1942 if (IS_ERR(filp)) {
1943 err = PTR_ERR(filp);
1944 goto out;
1945 }
1946
1947 err = hmdfs_iter_stash_file(conn, seq, filp,
1948 hmdfs_restore_files, &stats);
1949
1950 fput(filp);
1951 out:
1952 hmdfs_revert_creds(old_cred);
1953
1954 /* offline again ? */
1955 if (err != -ESHUTDOWN)
1956 hmdfs_drop_stashed_inodes(conn);
1957
1958 hmdfs_update_peer_restore_stats(&conn->stats.restore, &stats);
1959 hmdfs_info("peer 0x%x:0x%llx restore stashed-file ok %u fail %u keep %u",
1960 conn->owner, conn->device_id,
1961 stats.succeed, stats.fail, stats.keep);
1962
1963 mutex_lock(&conn->seq_lock);
1964 }
1965
hmdfs_stash_del_do_cleanup(struct hmdfs_peer *conn, int evt, unsigned int seq)1966 static void hmdfs_stash_del_do_cleanup(struct hmdfs_peer *conn, int evt,
1967 unsigned int seq)
1968 {
1969 struct hmdfs_inode_info *info = NULL;
1970 struct hmdfs_inode_info *next = NULL;
1971 unsigned int preparing;
1972
1973 if (!hmdfs_is_stash_enabled(conn->sbi))
1974 return;
1975
1976 /* Async cb is cancelled */
1977 preparing = 0;
1978 list_for_each_entry_safe(info, next, &conn->wr_opened_inode_list,
1979 wr_opened_node) {
1980 int status = READ_ONCE(info->stash_status);
1981
1982 if (status == HMDFS_REMOTE_INODE_STASHING) {
1983 struct hmdfs_cache_info *cache = NULL;
1984
1985 spin_lock(&info->stash_lock);
1986 cache = info->cache;
1987 info->cache = NULL;
1988 info->stash_status = HMDFS_REMOTE_INODE_NONE;
1989 spin_unlock(&info->stash_lock);
1990
1991 hmdfs_remote_del_wr_opened_inode(conn, info);
1992 hmdfs_del_file_cache(cache);
1993 /* put inode after all access are completed */
1994 iput(&info->vfs_inode);
1995 preparing++;
1996 }
1997 }
1998 hmdfs_info("release %u preparing inodes", preparing);
1999
2000 hmdfs_info("release %u pinned inodes", conn->stashed_inode_nr);
2001 if (list_empty(&conn->stashed_inode_list))
2002 return;
2003
2004 list_for_each_entry_safe(info, next,
2005 &conn->stashed_inode_list, stash_node)
2006 hmdfs_untrack_stashed_inode(conn, info);
2007 }
2008
hmdfs_exit_stash(struct hmdfs_sb_info *sbi)2009 void hmdfs_exit_stash(struct hmdfs_sb_info *sbi)
2010 {
2011 if (!sbi->s_offline_stash)
2012 return;
2013
2014 if (sbi->stash_work_dir.dentry) {
2015 path_put(&sbi->stash_work_dir);
2016 sbi->stash_work_dir.dentry = NULL;
2017 }
2018 }
2019
hmdfs_init_stash(struct hmdfs_sb_info *sbi)2020 int hmdfs_init_stash(struct hmdfs_sb_info *sbi)
2021 {
2022 int err = 0;
2023 struct path parent;
2024 struct dentry *child = NULL;
2025
2026 if (!sbi->s_offline_stash)
2027 return 0;
2028
2029 err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
2030 &parent);
2031 if (err) {
2032 hmdfs_err("invalid cache dir err %d", err);
2033 goto out;
2034 }
2035
2036 child = hmdfs_stash_new_work_dir(parent.dentry);
2037 if (!IS_ERR(child)) {
2038 sbi->stash_work_dir.mnt = mntget(parent.mnt);
2039 sbi->stash_work_dir.dentry = child;
2040 } else {
2041 err = PTR_ERR(child);
2042 hmdfs_err("create stash work dir err %d", err);
2043 }
2044
2045 path_put(&parent);
2046 out:
2047 return err;
2048 }
2049
hmdfs_stash_write_local_file(struct hmdfs_peer *conn, struct hmdfs_inode_info *info, struct hmdfs_writepage_context *ctx, struct hmdfs_cache_info *cache)2050 static int hmdfs_stash_write_local_file(struct hmdfs_peer *conn,
2051 struct hmdfs_inode_info *info,
2052 struct hmdfs_writepage_context *ctx,
2053 struct hmdfs_cache_info *cache)
2054 {
2055 struct page *page = ctx->page;
2056 const struct cred *old_cred = NULL;
2057 void *buf = NULL;
2058 loff_t pos;
2059 unsigned int flags;
2060 ssize_t written;
2061 int err = 0;
2062
2063 buf = kmap(page);
2064 pos = (loff_t)page->index << PAGE_SHIFT;
2065 /* enable NOFS for memory allocation */
2066 flags = memalloc_nofs_save();
2067 old_cred = hmdfs_override_creds(conn->sbi->cred);
2068 pos += cache->data_offs << HMDFS_STASH_BLK_SHIFT;
2069 written = kernel_write(cache->cache_file, buf, ctx->count, &pos);
2070 hmdfs_revert_creds(old_cred);
2071 memalloc_nofs_restore(flags);
2072 kunmap(page);
2073
2074 if (written != ctx->count) {
2075 hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx page 0x%lx data_offs 0x%x len %u err %zd",
2076 conn->owner, conn->device_id, info->remote_ino,
2077 page->index, cache->data_offs, ctx->count, written);
2078 err = -EIO;
2079 }
2080
2081 return err;
2082 }
2083
hmdfs_stash_writepage(struct hmdfs_peer *conn, struct hmdfs_writepage_context *ctx)2084 int hmdfs_stash_writepage(struct hmdfs_peer *conn,
2085 struct hmdfs_writepage_context *ctx)
2086 {
2087 struct inode *inode = ctx->page->mapping->host;
2088 struct hmdfs_inode_info *info = hmdfs_i(inode);
2089 struct hmdfs_cache_info *cache = NULL;
2090 int err;
2091
2092 /* e.g. fail to create stash file */
2093 cache = info->cache;
2094 if (!cache)
2095 return -EIO;
2096
2097 err = hmdfs_stash_write_local_file(conn, info, ctx, cache);
2098 if (!err) {
2099 hmdfs_client_writepage_done(info, ctx);
2100 atomic64_inc(&cache->written_pgs);
2101 put_task_struct(ctx->caller);
2102 kfree(ctx);
2103 }
2104 atomic64_inc(&cache->to_write_pgs);
2105
2106 return err;
2107 }
2108
hmdfs_stash_rebuild_status(struct hmdfs_peer *conn, struct inode *inode)2109 static void hmdfs_stash_rebuild_status(struct hmdfs_peer *conn,
2110 struct inode *inode)
2111 {
2112 char *path_str = NULL;
2113 struct hmdfs_inode_info *info = NULL;
2114 const struct cred *old_cred = NULL;
2115 struct path path;
2116 struct path *stash_path = NULL;
2117 int err = 0;
2118
2119 path_str = kmalloc(HMDFS_STASH_PATH_LEN, GFP_KERNEL);
2120 if (!path_str) {
2121 err = -ENOMEM;
2122 return;
2123 }
2124
2125 info = hmdfs_i(inode);
2126 err = snprintf(path_str, HMDFS_STASH_PATH_LEN, "%s/0x%llx",
2127 conn->cid, info->remote_ino);
2128 if (err >= HMDFS_STASH_PATH_LEN) {
2129 kfree(path_str);
2130 hmdfs_err("peer 0x%x:0x%llx inode 0x%llx too long name len",
2131 conn->owner, conn->device_id, info->remote_ino);
2132 return;
2133 }
2134 old_cred = hmdfs_override_creds(conn->sbi->cred);
2135 stash_path = &conn->sbi->stash_work_dir;
2136 err = vfs_path_lookup(stash_path->dentry, stash_path->mnt,
2137 path_str, 0, &path);
2138 hmdfs_revert_creds(old_cred);
2139 if (!err) {
2140 if (hmdfs_is_reg(path.dentry)) {
2141 WRITE_ONCE(info->stash_status,
2142 HMDFS_REMOTE_INODE_RESTORING);
2143 ihold(&info->vfs_inode);
2144 hmdfs_track_inode_locked(conn, info);
2145 } else {
2146 hmdfs_info("peer 0x%x:0x%llx inode 0x%llx unexpected stashed file mode 0%o",
2147 conn->owner, conn->device_id,
2148 info->remote_ino,
2149 d_inode(path.dentry)->i_mode);
2150 }
2151
2152 path_put(&path);
2153 } else if (err && err != -ENOENT) {
2154 hmdfs_err("peer 0x%x:0x%llx inode 0x%llx find %s err %d",
2155 conn->owner, conn->device_id, info->remote_ino,
2156 path_str, err);
2157 }
2158
2159 kfree(path_str);
2160 }
2161
2162 static inline bool
hmdfs_need_rebuild_inode_stash_status(struct hmdfs_peer *conn, umode_t mode)2163 hmdfs_need_rebuild_inode_stash_status(struct hmdfs_peer *conn, umode_t mode)
2164 {
2165 return hmdfs_is_stash_enabled(conn->sbi) &&
2166 READ_ONCE(conn->need_rebuild_stash_list) &&
2167 (S_ISREG(mode) || S_ISLNK(mode));
2168 }
2169
hmdfs_remote_init_stash_status(struct hmdfs_peer *conn, struct inode *inode, umode_t mode)2170 void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn,
2171 struct inode *inode, umode_t mode)
2172 {
2173 if (!hmdfs_need_rebuild_inode_stash_status(conn, mode))
2174 return;
2175
2176 atomic_inc(&conn->rebuild_inode_status_nr);
2177 /*
2178 * Use smp_mb__after_atomic() to ensure order between writing
2179 * @conn->rebuild_inode_status_nr and reading
2180 * @conn->need_rebuild_stash_list.
2181 */
2182 smp_mb__after_atomic();
2183 if (READ_ONCE(conn->need_rebuild_stash_list))
2184 hmdfs_stash_rebuild_status(conn, inode);
2185 if (atomic_dec_and_test(&conn->rebuild_inode_status_nr))
2186 wake_up(&conn->rebuild_inode_status_wq);
2187 }
2188
2189 static struct hmdfs_node_cb_desc stash_cb[] = {
2190 {
2191 .evt = NODE_EVT_OFFLINE,
2192 .sync = true,
2193 .fn = hmdfs_stash_offline_prepare,
2194 },
2195 {
2196 .evt = NODE_EVT_OFFLINE,
2197 .sync = false,
2198 .fn = hmdfs_stash_offline_do_stash,
2199 },
2200 {
2201 .evt = NODE_EVT_ADD,
2202 .sync = true,
2203 .fn = hmdfs_stash_add_do_check,
2204 },
2205 {
2206 .evt = NODE_EVT_ONLINE,
2207 .sync = false,
2208 .fn = hmdfs_stash_online_prepare,
2209 },
2210 {
2211 .evt = NODE_EVT_ONLINE,
2212 .sync = false,
2213 .fn = hmdfs_stash_online_do_restore,
2214 },
2215 {
2216 .evt = NODE_EVT_DEL,
2217 .sync = true,
2218 .fn = hmdfs_stash_del_do_cleanup,
2219 },
2220 };
2221
hmdfs_stash_add_node_evt_cb(void)2222 void __init hmdfs_stash_add_node_evt_cb(void)
2223 {
2224 hmdfs_node_add_evt_cb(stash_cb, ARRAY_SIZE(stash_cb));
2225 }
2226
2227