1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/f2fs/segment.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/sched/mm.h>
13 #include <linux/prefetch.h>
14 #include <linux/kthread.h>
15 #include <linux/swap.h>
16 #include <linux/timer.h>
17 #include <linux/freezer.h>
18 #include <linux/sched/signal.h>
19 #include <linux/random.h>
20
21 #include "f2fs.h"
22 #include "segment.h"
23 #include "node.h"
24 #include "gc.h"
25 #include "iostat.h"
26 #include <trace/events/f2fs.h>
27
28 #define __reverse_ffz(x) __reverse_ffs(~(x))
29
30 static struct kmem_cache *discard_entry_slab;
31 static struct kmem_cache *discard_cmd_slab;
32 static struct kmem_cache *sit_entry_set_slab;
33 static struct kmem_cache *revoke_entry_slab;
34
__reverse_ulong(unsigned char *str)35 static unsigned long __reverse_ulong(unsigned char *str)
36 {
37 unsigned long tmp = 0;
38 int shift = 24, idx = 0;
39
40 #if BITS_PER_LONG == 64
41 shift = 56;
42 #endif
43 while (shift >= 0) {
44 tmp |= (unsigned long)str[idx++] << shift;
45 shift -= BITS_PER_BYTE;
46 }
47 return tmp;
48 }
49
50 /*
51 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
52 * MSB and LSB are reversed in a byte by f2fs_set_bit.
53 */
__reverse_ffs(unsigned long word)54 static inline unsigned long __reverse_ffs(unsigned long word)
55 {
56 int num = 0;
57
58 #if BITS_PER_LONG == 64
59 if ((word & 0xffffffff00000000UL) == 0)
60 num += 32;
61 else
62 word >>= 32;
63 #endif
64 if ((word & 0xffff0000) == 0)
65 num += 16;
66 else
67 word >>= 16;
68
69 if ((word & 0xff00) == 0)
70 num += 8;
71 else
72 word >>= 8;
73
74 if ((word & 0xf0) == 0)
75 num += 4;
76 else
77 word >>= 4;
78
79 if ((word & 0xc) == 0)
80 num += 2;
81 else
82 word >>= 2;
83
84 if ((word & 0x2) == 0)
85 num += 1;
86 return num;
87 }
88
89 /*
90 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
91 * f2fs_set_bit makes MSB and LSB reversed in a byte.
92 * @size must be integral times of unsigned long.
93 * Example:
94 * MSB <--> LSB
95 * f2fs_set_bit(0, bitmap) => 1000 0000
96 * f2fs_set_bit(7, bitmap) => 0000 0001
97 */
__find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)98 static unsigned long __find_rev_next_bit(const unsigned long *addr,
99 unsigned long size, unsigned long offset)
100 {
101 const unsigned long *p = addr + BIT_WORD(offset);
102 unsigned long result = size;
103 unsigned long tmp;
104
105 if (offset >= size)
106 return size;
107
108 size -= (offset & ~(BITS_PER_LONG - 1));
109 offset %= BITS_PER_LONG;
110
111 while (1) {
112 if (*p == 0)
113 goto pass;
114
115 tmp = __reverse_ulong((unsigned char *)p);
116
117 tmp &= ~0UL >> offset;
118 if (size < BITS_PER_LONG)
119 tmp &= (~0UL << (BITS_PER_LONG - size));
120 if (tmp)
121 goto found;
122 pass:
123 if (size <= BITS_PER_LONG)
124 break;
125 size -= BITS_PER_LONG;
126 offset = 0;
127 p++;
128 }
129 return result;
130 found:
131 return result - size + __reverse_ffs(tmp);
132 }
133
__find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset)134 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
135 unsigned long size, unsigned long offset)
136 {
137 const unsigned long *p = addr + BIT_WORD(offset);
138 unsigned long result = size;
139 unsigned long tmp;
140
141 if (offset >= size)
142 return size;
143
144 size -= (offset & ~(BITS_PER_LONG - 1));
145 offset %= BITS_PER_LONG;
146
147 while (1) {
148 if (*p == ~0UL)
149 goto pass;
150
151 tmp = __reverse_ulong((unsigned char *)p);
152
153 if (offset)
154 tmp |= ~0UL << (BITS_PER_LONG - offset);
155 if (size < BITS_PER_LONG)
156 tmp |= ~0UL >> size;
157 if (tmp != ~0UL)
158 goto found;
159 pass:
160 if (size <= BITS_PER_LONG)
161 break;
162 size -= BITS_PER_LONG;
163 offset = 0;
164 p++;
165 }
166 return result;
167 found:
168 return result - size + __reverse_ffz(tmp);
169 }
170
f2fs_need_SSR(struct f2fs_sb_info *sbi)171 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
172 {
173 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
174 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
175 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
176
177 if (f2fs_lfs_mode(sbi))
178 return false;
179 if (sbi->gc_mode == GC_URGENT_HIGH)
180 return true;
181 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
182 return true;
183
184 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
185 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
186 }
187
f2fs_abort_atomic_write(struct inode *inode, bool clean)188 void f2fs_abort_atomic_write(struct inode *inode, bool clean)
189 {
190 struct f2fs_inode_info *fi = F2FS_I(inode);
191
192 if (!f2fs_is_atomic_file(inode))
193 return;
194
195 release_atomic_write_cnt(inode);
196 clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
197 clear_inode_flag(inode, FI_ATOMIC_REPLACE);
198 clear_inode_flag(inode, FI_ATOMIC_FILE);
199 stat_dec_atomic_inode(inode);
200
201 F2FS_I(inode)->atomic_write_task = NULL;
202
203 if (clean) {
204 truncate_inode_pages_final(inode->i_mapping);
205 f2fs_i_size_write(inode, fi->original_i_size);
206 fi->original_i_size = 0;
207 }
208 /* avoid stale dirty inode during eviction */
209 sync_inode_metadata(inode, 0);
210 }
211
__replace_atomic_write_block(struct inode *inode, pgoff_t index, block_t new_addr, block_t *old_addr, bool recover)212 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
213 block_t new_addr, block_t *old_addr, bool recover)
214 {
215 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
216 struct dnode_of_data dn;
217 struct node_info ni;
218 int err;
219
220 retry:
221 set_new_dnode(&dn, inode, NULL, NULL, 0);
222 err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
223 if (err) {
224 if (err == -ENOMEM) {
225 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
226 goto retry;
227 }
228 return err;
229 }
230
231 err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
232 if (err) {
233 f2fs_put_dnode(&dn);
234 return err;
235 }
236
237 if (recover) {
238 /* dn.data_blkaddr is always valid */
239 if (!__is_valid_data_blkaddr(new_addr)) {
240 if (new_addr == NULL_ADDR)
241 dec_valid_block_count(sbi, inode, 1);
242 f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
243 f2fs_update_data_blkaddr(&dn, new_addr);
244 } else {
245 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
246 new_addr, ni.version, true, true);
247 }
248 } else {
249 blkcnt_t count = 1;
250
251 err = inc_valid_block_count(sbi, inode, &count, true);
252 if (err) {
253 f2fs_put_dnode(&dn);
254 return err;
255 }
256
257 *old_addr = dn.data_blkaddr;
258 f2fs_truncate_data_blocks_range(&dn, 1);
259 dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
260
261 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
262 ni.version, true, false);
263 }
264
265 f2fs_put_dnode(&dn);
266
267 trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
268 index, old_addr ? *old_addr : 0, new_addr, recover);
269 return 0;
270 }
271
__complete_revoke_list(struct inode *inode, struct list_head *head, bool revoke)272 static void __complete_revoke_list(struct inode *inode, struct list_head *head,
273 bool revoke)
274 {
275 struct revoke_entry *cur, *tmp;
276 pgoff_t start_index = 0;
277 bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
278
279 list_for_each_entry_safe(cur, tmp, head, list) {
280 if (revoke) {
281 __replace_atomic_write_block(inode, cur->index,
282 cur->old_addr, NULL, true);
283 } else if (truncate) {
284 f2fs_truncate_hole(inode, start_index, cur->index);
285 start_index = cur->index + 1;
286 }
287
288 list_del(&cur->list);
289 kmem_cache_free(revoke_entry_slab, cur);
290 }
291
292 if (!revoke && truncate)
293 f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
294 }
295
__f2fs_commit_atomic_write(struct inode *inode)296 static int __f2fs_commit_atomic_write(struct inode *inode)
297 {
298 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
299 struct f2fs_inode_info *fi = F2FS_I(inode);
300 struct inode *cow_inode = fi->cow_inode;
301 struct revoke_entry *new;
302 struct list_head revoke_list;
303 block_t blkaddr;
304 struct dnode_of_data dn;
305 pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
306 pgoff_t off = 0, blen, index;
307 int ret = 0, i;
308
309 INIT_LIST_HEAD(&revoke_list);
310
311 while (len) {
312 blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
313
314 set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
315 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
316 if (ret && ret != -ENOENT) {
317 goto out;
318 } else if (ret == -ENOENT) {
319 ret = 0;
320 if (dn.max_level == 0)
321 goto out;
322 goto next;
323 }
324
325 blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
326 len);
327 index = off;
328 for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
329 blkaddr = f2fs_data_blkaddr(&dn);
330
331 if (!__is_valid_data_blkaddr(blkaddr)) {
332 continue;
333 } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
334 DATA_GENERIC_ENHANCE)) {
335 f2fs_put_dnode(&dn);
336 ret = -EFSCORRUPTED;
337 f2fs_handle_error(sbi,
338 ERROR_INVALID_BLKADDR);
339 goto out;
340 }
341
342 new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
343 true, NULL);
344
345 ret = __replace_atomic_write_block(inode, index, blkaddr,
346 &new->old_addr, false);
347 if (ret) {
348 f2fs_put_dnode(&dn);
349 kmem_cache_free(revoke_entry_slab, new);
350 goto out;
351 }
352
353 f2fs_update_data_blkaddr(&dn, NULL_ADDR);
354 new->index = index;
355 list_add_tail(&new->list, &revoke_list);
356 }
357 f2fs_put_dnode(&dn);
358 next:
359 off += blen;
360 len -= blen;
361 }
362
363 out:
364 if (ret) {
365 sbi->revoked_atomic_block += fi->atomic_write_cnt;
366 } else {
367 sbi->committed_atomic_block += fi->atomic_write_cnt;
368 set_inode_flag(inode, FI_ATOMIC_COMMITTED);
369 }
370
371 __complete_revoke_list(inode, &revoke_list, ret ? true : false);
372
373 return ret;
374 }
375
f2fs_commit_atomic_write(struct inode *inode)376 int f2fs_commit_atomic_write(struct inode *inode)
377 {
378 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
379 struct f2fs_inode_info *fi = F2FS_I(inode);
380 int err;
381
382 err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
383 if (err)
384 return err;
385
386 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
387 f2fs_lock_op(sbi);
388
389 err = __f2fs_commit_atomic_write(inode);
390
391 f2fs_unlock_op(sbi);
392 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
393
394 return err;
395 }
396
397 /*
398 * This function balances dirty node and dentry pages.
399 * In addition, it controls garbage collection.
400 */
f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)401 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
402 {
403 if (time_to_inject(sbi, FAULT_CHECKPOINT))
404 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
405
406 /* balance_fs_bg is able to be pending */
407 if (need && excess_cached_nats(sbi))
408 f2fs_balance_fs_bg(sbi, false);
409
410 if (!f2fs_is_checkpoint_ready(sbi))
411 return;
412
413 /*
414 * We should do GC or end up with checkpoint, if there are so many dirty
415 * dir/node pages without enough free segments.
416 */
417 if (has_enough_free_secs(sbi, 0, 0))
418 return;
419
420 if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
421 sbi->gc_thread->f2fs_gc_task) {
422 DEFINE_WAIT(wait);
423
424 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
425 TASK_UNINTERRUPTIBLE);
426 wake_up(&sbi->gc_thread->gc_wait_queue_head);
427 io_schedule();
428 finish_wait(&sbi->gc_thread->fggc_wq, &wait);
429 } else {
430 struct f2fs_gc_control gc_control = {
431 .victim_segno = NULL_SEGNO,
432 .init_gc_type = BG_GC,
433 .no_bg_gc = true,
434 .should_migrate_blocks = false,
435 .err_gc_skipped = false,
436 .nr_free_secs = 1 };
437 f2fs_down_write(&sbi->gc_lock);
438 stat_inc_gc_call_count(sbi, FOREGROUND);
439 f2fs_gc(sbi, &gc_control);
440 }
441 }
442
excess_dirty_threshold(struct f2fs_sb_info *sbi)443 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
444 {
445 int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
446 unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
447 unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
448 unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
449 unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
450 unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
451 unsigned int threshold = sbi->blocks_per_seg * factor *
452 DEFAULT_DIRTY_THRESHOLD;
453 unsigned int global_threshold = threshold * 3 / 2;
454
455 if (dents >= threshold || qdata >= threshold ||
456 nodes >= threshold || meta >= threshold ||
457 imeta >= threshold)
458 return true;
459 return dents + qdata + nodes + meta + imeta > global_threshold;
460 }
461
f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)462 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
463 {
464 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
465 return;
466
467 /* try to shrink extent cache when there is no enough memory */
468 if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
469 f2fs_shrink_read_extent_tree(sbi,
470 READ_EXTENT_CACHE_SHRINK_NUMBER);
471
472 /* try to shrink age extent cache when there is no enough memory */
473 if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
474 f2fs_shrink_age_extent_tree(sbi,
475 AGE_EXTENT_CACHE_SHRINK_NUMBER);
476
477 /* check the # of cached NAT entries */
478 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
479 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
480
481 if (!f2fs_available_free_memory(sbi, FREE_NIDS))
482 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
483 else
484 f2fs_build_free_nids(sbi, false, false);
485
486 if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
487 excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
488 goto do_sync;
489
490 /* there is background inflight IO or foreground operation recently */
491 if (is_inflight_io(sbi, REQ_TIME) ||
492 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
493 return;
494
495 /* exceed periodical checkpoint timeout threshold */
496 if (f2fs_time_over(sbi, CP_TIME))
497 goto do_sync;
498
499 /* checkpoint is the only way to shrink partial cached entries */
500 if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
501 f2fs_available_free_memory(sbi, INO_ENTRIES))
502 return;
503
504 do_sync:
505 if (test_opt(sbi, DATA_FLUSH) && from_bg) {
506 struct blk_plug plug;
507
508 mutex_lock(&sbi->flush_lock);
509
510 blk_start_plug(&plug);
511 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
512 blk_finish_plug(&plug);
513
514 mutex_unlock(&sbi->flush_lock);
515 }
516 stat_inc_cp_call_count(sbi, BACKGROUND);
517 f2fs_sync_fs(sbi->sb, 1);
518 }
519
__submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev)520 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
521 struct block_device *bdev)
522 {
523 int ret = blkdev_issue_flush(bdev);
524
525 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
526 test_opt(sbi, FLUSH_MERGE), ret);
527 if (!ret)
528 f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
529 return ret;
530 }
531
submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)532 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
533 {
534 int ret = 0;
535 int i;
536
537 if (!f2fs_is_multi_device(sbi))
538 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
539
540 for (i = 0; i < sbi->s_ndevs; i++) {
541 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
542 continue;
543 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
544 if (ret)
545 break;
546 }
547 return ret;
548 }
549
issue_flush_thread(void *data)550 static int issue_flush_thread(void *data)
551 {
552 struct f2fs_sb_info *sbi = data;
553 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
554 wait_queue_head_t *q = &fcc->flush_wait_queue;
555 repeat:
556 if (kthread_should_stop())
557 return 0;
558
559 if (!llist_empty(&fcc->issue_list)) {
560 struct flush_cmd *cmd, *next;
561 int ret;
562
563 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
564 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
565
566 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
567
568 ret = submit_flush_wait(sbi, cmd->ino);
569 atomic_inc(&fcc->issued_flush);
570
571 llist_for_each_entry_safe(cmd, next,
572 fcc->dispatch_list, llnode) {
573 cmd->ret = ret;
574 complete(&cmd->wait);
575 }
576 fcc->dispatch_list = NULL;
577 }
578
579 wait_event_interruptible(*q,
580 kthread_should_stop() || !llist_empty(&fcc->issue_list));
581 goto repeat;
582 }
583
f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)584 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
585 {
586 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
587 struct flush_cmd cmd;
588 int ret;
589
590 if (test_opt(sbi, NOBARRIER))
591 return 0;
592
593 if (!test_opt(sbi, FLUSH_MERGE)) {
594 atomic_inc(&fcc->queued_flush);
595 ret = submit_flush_wait(sbi, ino);
596 atomic_dec(&fcc->queued_flush);
597 atomic_inc(&fcc->issued_flush);
598 return ret;
599 }
600
601 if (atomic_inc_return(&fcc->queued_flush) == 1 ||
602 f2fs_is_multi_device(sbi)) {
603 ret = submit_flush_wait(sbi, ino);
604 atomic_dec(&fcc->queued_flush);
605
606 atomic_inc(&fcc->issued_flush);
607 return ret;
608 }
609
610 cmd.ino = ino;
611 init_completion(&cmd.wait);
612
613 llist_add(&cmd.llnode, &fcc->issue_list);
614
615 /*
616 * update issue_list before we wake up issue_flush thread, this
617 * smp_mb() pairs with another barrier in ___wait_event(), see
618 * more details in comments of waitqueue_active().
619 */
620 smp_mb();
621
622 if (waitqueue_active(&fcc->flush_wait_queue))
623 wake_up(&fcc->flush_wait_queue);
624
625 if (fcc->f2fs_issue_flush) {
626 wait_for_completion(&cmd.wait);
627 atomic_dec(&fcc->queued_flush);
628 } else {
629 struct llist_node *list;
630
631 list = llist_del_all(&fcc->issue_list);
632 if (!list) {
633 wait_for_completion(&cmd.wait);
634 atomic_dec(&fcc->queued_flush);
635 } else {
636 struct flush_cmd *tmp, *next;
637
638 ret = submit_flush_wait(sbi, ino);
639
640 llist_for_each_entry_safe(tmp, next, list, llnode) {
641 if (tmp == &cmd) {
642 cmd.ret = ret;
643 atomic_dec(&fcc->queued_flush);
644 continue;
645 }
646 tmp->ret = ret;
647 complete(&tmp->wait);
648 }
649 }
650 }
651
652 return cmd.ret;
653 }
654
f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)655 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
656 {
657 dev_t dev = sbi->sb->s_bdev->bd_dev;
658 struct flush_cmd_control *fcc;
659
660 if (SM_I(sbi)->fcc_info) {
661 fcc = SM_I(sbi)->fcc_info;
662 if (fcc->f2fs_issue_flush)
663 return 0;
664 goto init_thread;
665 }
666
667 fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
668 if (!fcc)
669 return -ENOMEM;
670 atomic_set(&fcc->issued_flush, 0);
671 atomic_set(&fcc->queued_flush, 0);
672 init_waitqueue_head(&fcc->flush_wait_queue);
673 init_llist_head(&fcc->issue_list);
674 SM_I(sbi)->fcc_info = fcc;
675 if (!test_opt(sbi, FLUSH_MERGE))
676 return 0;
677
678 init_thread:
679 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
680 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
681 if (IS_ERR(fcc->f2fs_issue_flush)) {
682 int err = PTR_ERR(fcc->f2fs_issue_flush);
683
684 fcc->f2fs_issue_flush = NULL;
685 return err;
686 }
687
688 return 0;
689 }
690
f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)691 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
692 {
693 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
694
695 if (fcc && fcc->f2fs_issue_flush) {
696 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
697
698 fcc->f2fs_issue_flush = NULL;
699 kthread_stop(flush_thread);
700 }
701 if (free) {
702 kfree(fcc);
703 SM_I(sbi)->fcc_info = NULL;
704 }
705 }
706
f2fs_flush_device_cache(struct f2fs_sb_info *sbi)707 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
708 {
709 int ret = 0, i;
710
711 if (!f2fs_is_multi_device(sbi))
712 return 0;
713
714 if (test_opt(sbi, NOBARRIER))
715 return 0;
716
717 for (i = 1; i < sbi->s_ndevs; i++) {
718 int count = DEFAULT_RETRY_IO_COUNT;
719
720 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
721 continue;
722
723 do {
724 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
725 if (ret)
726 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
727 } while (ret && --count);
728
729 if (ret) {
730 f2fs_stop_checkpoint(sbi, false,
731 STOP_CP_REASON_FLUSH_FAIL);
732 break;
733 }
734
735 spin_lock(&sbi->dev_lock);
736 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
737 spin_unlock(&sbi->dev_lock);
738 }
739
740 return ret;
741 }
742
__locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type)743 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
744 enum dirty_type dirty_type)
745 {
746 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
747
748 /* need not be added */
749 if (IS_CURSEG(sbi, segno))
750 return;
751
752 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
753 dirty_i->nr_dirty[dirty_type]++;
754
755 if (dirty_type == DIRTY) {
756 struct seg_entry *sentry = get_seg_entry(sbi, segno);
757 enum dirty_type t = sentry->type;
758
759 if (unlikely(t >= DIRTY)) {
760 f2fs_bug_on(sbi, 1);
761 return;
762 }
763 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
764 dirty_i->nr_dirty[t]++;
765
766 if (__is_large_section(sbi)) {
767 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
768 block_t valid_blocks =
769 get_valid_blocks(sbi, segno, true);
770
771 f2fs_bug_on(sbi, unlikely(!valid_blocks ||
772 valid_blocks == CAP_BLKS_PER_SEC(sbi)));
773
774 if (!IS_CURSEC(sbi, secno))
775 set_bit(secno, dirty_i->dirty_secmap);
776 }
777 }
778 }
779
__remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type)780 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
781 enum dirty_type dirty_type)
782 {
783 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
784 block_t valid_blocks;
785
786 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
787 dirty_i->nr_dirty[dirty_type]--;
788
789 if (dirty_type == DIRTY) {
790 struct seg_entry *sentry = get_seg_entry(sbi, segno);
791 enum dirty_type t = sentry->type;
792
793 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
794 dirty_i->nr_dirty[t]--;
795
796 valid_blocks = get_valid_blocks(sbi, segno, true);
797 if (valid_blocks == 0) {
798 clear_bit(GET_SEC_FROM_SEG(sbi, segno),
799 dirty_i->victim_secmap);
800 #ifdef CONFIG_F2FS_CHECK_FS
801 clear_bit(segno, SIT_I(sbi)->invalid_segmap);
802 #endif
803 }
804 if (__is_large_section(sbi)) {
805 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
806
807 if (!valid_blocks ||
808 valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
809 clear_bit(secno, dirty_i->dirty_secmap);
810 return;
811 }
812
813 if (!IS_CURSEC(sbi, secno))
814 set_bit(secno, dirty_i->dirty_secmap);
815 }
816 }
817 }
818
819 /*
820 * Should not occur error such as -ENOMEM.
821 * Adding dirty entry into seglist is not critical operation.
822 * If a given segment is one of current working segments, it won't be added.
823 */
locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)824 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
825 {
826 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
827 unsigned short valid_blocks, ckpt_valid_blocks;
828 unsigned int usable_blocks;
829
830 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
831 return;
832
833 usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
834 mutex_lock(&dirty_i->seglist_lock);
835
836 valid_blocks = get_valid_blocks(sbi, segno, false);
837 ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
838
839 if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
840 ckpt_valid_blocks == usable_blocks)) {
841 __locate_dirty_segment(sbi, segno, PRE);
842 __remove_dirty_segment(sbi, segno, DIRTY);
843 } else if (valid_blocks < usable_blocks) {
844 __locate_dirty_segment(sbi, segno, DIRTY);
845 } else {
846 /* Recovery routine with SSR needs this */
847 __remove_dirty_segment(sbi, segno, DIRTY);
848 }
849
850 mutex_unlock(&dirty_i->seglist_lock);
851 }
852
853 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)854 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
855 {
856 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
857 unsigned int segno;
858
859 mutex_lock(&dirty_i->seglist_lock);
860 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
861 if (get_valid_blocks(sbi, segno, false))
862 continue;
863 if (IS_CURSEG(sbi, segno))
864 continue;
865 __locate_dirty_segment(sbi, segno, PRE);
866 __remove_dirty_segment(sbi, segno, DIRTY);
867 }
868 mutex_unlock(&dirty_i->seglist_lock);
869 }
870
f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)871 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
872 {
873 int ovp_hole_segs =
874 (overprovision_segments(sbi) - reserved_segments(sbi));
875 block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
876 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
877 block_t holes[2] = {0, 0}; /* DATA and NODE */
878 block_t unusable;
879 struct seg_entry *se;
880 unsigned int segno;
881
882 mutex_lock(&dirty_i->seglist_lock);
883 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
884 se = get_seg_entry(sbi, segno);
885 if (IS_NODESEG(se->type))
886 holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
887 se->valid_blocks;
888 else
889 holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
890 se->valid_blocks;
891 }
892 mutex_unlock(&dirty_i->seglist_lock);
893
894 unusable = max(holes[DATA], holes[NODE]);
895 if (unusable > ovp_holes)
896 return unusable - ovp_holes;
897 return 0;
898 }
899
f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)900 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
901 {
902 int ovp_hole_segs =
903 (overprovision_segments(sbi) - reserved_segments(sbi));
904 if (unusable > F2FS_OPTION(sbi).unusable_cap)
905 return -EAGAIN;
906 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
907 dirty_segments(sbi) > ovp_hole_segs)
908 return -EAGAIN;
909 return 0;
910 }
911
912 /* This is only used by SBI_CP_DISABLED */
get_free_segment(struct f2fs_sb_info *sbi)913 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
914 {
915 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
916 unsigned int segno = 0;
917
918 mutex_lock(&dirty_i->seglist_lock);
919 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
920 if (get_valid_blocks(sbi, segno, false))
921 continue;
922 if (get_ckpt_valid_blocks(sbi, segno, false))
923 continue;
924 mutex_unlock(&dirty_i->seglist_lock);
925 return segno;
926 }
927 mutex_unlock(&dirty_i->seglist_lock);
928 return NULL_SEGNO;
929 }
930
__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len)931 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
932 struct block_device *bdev, block_t lstart,
933 block_t start, block_t len)
934 {
935 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
936 struct list_head *pend_list;
937 struct discard_cmd *dc;
938
939 f2fs_bug_on(sbi, !len);
940
941 pend_list = &dcc->pend_list[plist_idx(len)];
942
943 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
944 INIT_LIST_HEAD(&dc->list);
945 dc->bdev = bdev;
946 dc->di.lstart = lstart;
947 dc->di.start = start;
948 dc->di.len = len;
949 dc->ref = 0;
950 dc->state = D_PREP;
951 dc->queued = 0;
952 dc->error = 0;
953 init_completion(&dc->wait);
954 list_add_tail(&dc->list, pend_list);
955 spin_lock_init(&dc->lock);
956 dc->bio_ref = 0;
957 atomic_inc(&dcc->discard_cmd_cnt);
958 dcc->undiscard_blks += len;
959
960 return dc;
961 }
962
f2fs_check_discard_tree(struct f2fs_sb_info *sbi)963 static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
964 {
965 #ifdef CONFIG_F2FS_CHECK_FS
966 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
967 struct rb_node *cur = rb_first_cached(&dcc->root), *next;
968 struct discard_cmd *cur_dc, *next_dc;
969
970 while (cur) {
971 next = rb_next(cur);
972 if (!next)
973 return true;
974
975 cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
976 next_dc = rb_entry(next, struct discard_cmd, rb_node);
977
978 if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
979 f2fs_info(sbi, "broken discard_rbtree, "
980 "cur(%u, %u) next(%u, %u)",
981 cur_dc->di.lstart, cur_dc->di.len,
982 next_dc->di.lstart, next_dc->di.len);
983 return false;
984 }
985 cur = next;
986 }
987 #endif
988 return true;
989 }
990
__lookup_discard_cmd(struct f2fs_sb_info *sbi, block_t blkaddr)991 static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
992 block_t blkaddr)
993 {
994 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
995 struct rb_node *node = dcc->root.rb_root.rb_node;
996 struct discard_cmd *dc;
997
998 while (node) {
999 dc = rb_entry(node, struct discard_cmd, rb_node);
1000
1001 if (blkaddr < dc->di.lstart)
1002 node = node->rb_left;
1003 else if (blkaddr >= dc->di.lstart + dc->di.len)
1004 node = node->rb_right;
1005 else
1006 return dc;
1007 }
1008 return NULL;
1009 }
1010
__lookup_discard_cmd_ret(struct rb_root_cached *root, block_t blkaddr, struct discard_cmd **prev_entry, struct discard_cmd **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent)1011 static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
1012 block_t blkaddr,
1013 struct discard_cmd **prev_entry,
1014 struct discard_cmd **next_entry,
1015 struct rb_node ***insert_p,
1016 struct rb_node **insert_parent)
1017 {
1018 struct rb_node **pnode = &root->rb_root.rb_node;
1019 struct rb_node *parent = NULL, *tmp_node;
1020 struct discard_cmd *dc;
1021
1022 *insert_p = NULL;
1023 *insert_parent = NULL;
1024 *prev_entry = NULL;
1025 *next_entry = NULL;
1026
1027 if (RB_EMPTY_ROOT(&root->rb_root))
1028 return NULL;
1029
1030 while (*pnode) {
1031 parent = *pnode;
1032 dc = rb_entry(*pnode, struct discard_cmd, rb_node);
1033
1034 if (blkaddr < dc->di.lstart)
1035 pnode = &(*pnode)->rb_left;
1036 else if (blkaddr >= dc->di.lstart + dc->di.len)
1037 pnode = &(*pnode)->rb_right;
1038 else
1039 goto lookup_neighbors;
1040 }
1041
1042 *insert_p = pnode;
1043 *insert_parent = parent;
1044
1045 dc = rb_entry(parent, struct discard_cmd, rb_node);
1046 tmp_node = parent;
1047 if (parent && blkaddr > dc->di.lstart)
1048 tmp_node = rb_next(parent);
1049 *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1050
1051 tmp_node = parent;
1052 if (parent && blkaddr < dc->di.lstart)
1053 tmp_node = rb_prev(parent);
1054 *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1055 return NULL;
1056
1057 lookup_neighbors:
1058 /* lookup prev node for merging backward later */
1059 tmp_node = rb_prev(&dc->rb_node);
1060 *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1061
1062 /* lookup next node for merging frontward later */
1063 tmp_node = rb_next(&dc->rb_node);
1064 *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1065 return dc;
1066 }
1067
__detach_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc)1068 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1069 struct discard_cmd *dc)
1070 {
1071 if (dc->state == D_DONE)
1072 atomic_sub(dc->queued, &dcc->queued_discard);
1073
1074 list_del(&dc->list);
1075 rb_erase_cached(&dc->rb_node, &dcc->root);
1076 dcc->undiscard_blks -= dc->di.len;
1077
1078 kmem_cache_free(discard_cmd_slab, dc);
1079
1080 atomic_dec(&dcc->discard_cmd_cnt);
1081 }
1082
__remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)1083 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1084 struct discard_cmd *dc)
1085 {
1086 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1087 unsigned long flags;
1088
1089 trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
1090
1091 spin_lock_irqsave(&dc->lock, flags);
1092 if (dc->bio_ref) {
1093 spin_unlock_irqrestore(&dc->lock, flags);
1094 return;
1095 }
1096 spin_unlock_irqrestore(&dc->lock, flags);
1097
1098 f2fs_bug_on(sbi, dc->ref);
1099
1100 if (dc->error == -EOPNOTSUPP)
1101 dc->error = 0;
1102
1103 if (dc->error)
1104 printk_ratelimited(
1105 "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
1106 KERN_INFO, sbi->sb->s_id,
1107 dc->di.lstart, dc->di.start, dc->di.len, dc->error);
1108 __detach_discard_cmd(dcc, dc);
1109 }
1110
f2fs_submit_discard_endio(struct bio *bio)1111 static void f2fs_submit_discard_endio(struct bio *bio)
1112 {
1113 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1114 unsigned long flags;
1115
1116 spin_lock_irqsave(&dc->lock, flags);
1117 if (!dc->error)
1118 dc->error = blk_status_to_errno(bio->bi_status);
1119 dc->bio_ref--;
1120 if (!dc->bio_ref && dc->state == D_SUBMIT) {
1121 dc->state = D_DONE;
1122 complete_all(&dc->wait);
1123 }
1124 spin_unlock_irqrestore(&dc->lock, flags);
1125 bio_put(bio);
1126 }
1127
__check_sit_bitmap(struct f2fs_sb_info *sbi, block_t start, block_t end)1128 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1129 block_t start, block_t end)
1130 {
1131 #ifdef CONFIG_F2FS_CHECK_FS
1132 struct seg_entry *sentry;
1133 unsigned int segno;
1134 block_t blk = start;
1135 unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1136 unsigned long *map;
1137
1138 while (blk < end) {
1139 segno = GET_SEGNO(sbi, blk);
1140 sentry = get_seg_entry(sbi, segno);
1141 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1142
1143 if (end < START_BLOCK(sbi, segno + 1))
1144 size = GET_BLKOFF_FROM_SEG0(sbi, end);
1145 else
1146 size = max_blocks;
1147 map = (unsigned long *)(sentry->cur_valid_map);
1148 offset = __find_rev_next_bit(map, size, offset);
1149 f2fs_bug_on(sbi, offset != size);
1150 blk = START_BLOCK(sbi, segno + 1);
1151 }
1152 #endif
1153 }
1154
__init_discard_policy(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, int discard_type, unsigned int granularity)1155 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1156 struct discard_policy *dpolicy,
1157 int discard_type, unsigned int granularity)
1158 {
1159 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1160
1161 /* common policy */
1162 dpolicy->type = discard_type;
1163 dpolicy->sync = true;
1164 dpolicy->ordered = false;
1165 dpolicy->granularity = granularity;
1166
1167 dpolicy->max_requests = dcc->max_discard_request;
1168 dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
1169 dpolicy->timeout = false;
1170
1171 if (discard_type == DPOLICY_BG) {
1172 dpolicy->min_interval = dcc->min_discard_issue_time;
1173 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1174 dpolicy->max_interval = dcc->max_discard_issue_time;
1175 dpolicy->io_aware = true;
1176 dpolicy->sync = false;
1177 dpolicy->ordered = true;
1178 if (utilization(sbi) > dcc->discard_urgent_util) {
1179 dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1180 if (atomic_read(&dcc->discard_cmd_cnt))
1181 dpolicy->max_interval =
1182 dcc->min_discard_issue_time;
1183 }
1184 } else if (discard_type == DPOLICY_FORCE) {
1185 dpolicy->min_interval = dcc->min_discard_issue_time;
1186 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1187 dpolicy->max_interval = dcc->max_discard_issue_time;
1188 dpolicy->io_aware = false;
1189 } else if (discard_type == DPOLICY_FSTRIM) {
1190 dpolicy->io_aware = false;
1191 } else if (discard_type == DPOLICY_UMOUNT) {
1192 dpolicy->io_aware = false;
1193 /* we need to issue all to keep CP_TRIMMED_FLAG */
1194 dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1195 dpolicy->timeout = true;
1196 }
1197 }
1198
1199 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1200 struct block_device *bdev, block_t lstart,
1201 block_t start, block_t len);
1202
1203 #ifdef CONFIG_BLK_DEV_ZONED
__submit_zone_reset_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, blk_opf_t flag, struct list_head *wait_list, unsigned int *issued)1204 static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
1205 struct discard_cmd *dc, blk_opf_t flag,
1206 struct list_head *wait_list,
1207 unsigned int *issued)
1208 {
1209 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1210 struct block_device *bdev = dc->bdev;
1211 struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
1212 unsigned long flags;
1213
1214 trace_f2fs_issue_reset_zone(bdev, dc->di.start);
1215
1216 spin_lock_irqsave(&dc->lock, flags);
1217 dc->state = D_SUBMIT;
1218 dc->bio_ref++;
1219 spin_unlock_irqrestore(&dc->lock, flags);
1220
1221 if (issued)
1222 (*issued)++;
1223
1224 atomic_inc(&dcc->queued_discard);
1225 dc->queued++;
1226 list_move_tail(&dc->list, wait_list);
1227
1228 /* sanity check on discard range */
1229 __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
1230
1231 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
1232 bio->bi_private = dc;
1233 bio->bi_end_io = f2fs_submit_discard_endio;
1234 submit_bio(bio);
1235
1236 atomic_inc(&dcc->issued_discard);
1237 f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
1238 }
1239 #endif
1240
1241 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
__submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, struct discard_cmd *dc, int *issued)1242 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1243 struct discard_policy *dpolicy,
1244 struct discard_cmd *dc, int *issued)
1245 {
1246 struct block_device *bdev = dc->bdev;
1247 unsigned int max_discard_blocks =
1248 SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1249 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1250 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1251 &(dcc->fstrim_list) : &(dcc->wait_list);
1252 blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
1253 block_t lstart, start, len, total_len;
1254 int err = 0;
1255
1256 if (dc->state != D_PREP)
1257 return 0;
1258
1259 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1260 return 0;
1261
1262 #ifdef CONFIG_BLK_DEV_ZONED
1263 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
1264 int devi = f2fs_bdev_index(sbi, bdev);
1265
1266 if (devi < 0)
1267 return -EINVAL;
1268
1269 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1270 __submit_zone_reset_cmd(sbi, dc, flag,
1271 wait_list, issued);
1272 return 0;
1273 }
1274 }
1275 #endif
1276
1277 trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
1278
1279 lstart = dc->di.lstart;
1280 start = dc->di.start;
1281 len = dc->di.len;
1282 total_len = len;
1283
1284 dc->di.len = 0;
1285
1286 while (total_len && *issued < dpolicy->max_requests && !err) {
1287 struct bio *bio = NULL;
1288 unsigned long flags;
1289 bool last = true;
1290
1291 if (len > max_discard_blocks) {
1292 len = max_discard_blocks;
1293 last = false;
1294 }
1295
1296 (*issued)++;
1297 if (*issued == dpolicy->max_requests)
1298 last = true;
1299
1300 dc->di.len += len;
1301
1302 if (time_to_inject(sbi, FAULT_DISCARD)) {
1303 err = -EIO;
1304 } else {
1305 err = __blkdev_issue_discard(bdev,
1306 SECTOR_FROM_BLOCK(start),
1307 SECTOR_FROM_BLOCK(len),
1308 GFP_NOFS, &bio);
1309 }
1310 if (err) {
1311 spin_lock_irqsave(&dc->lock, flags);
1312 if (dc->state == D_PARTIAL)
1313 dc->state = D_SUBMIT;
1314 spin_unlock_irqrestore(&dc->lock, flags);
1315
1316 break;
1317 }
1318
1319 f2fs_bug_on(sbi, !bio);
1320
1321 /*
1322 * should keep before submission to avoid D_DONE
1323 * right away
1324 */
1325 spin_lock_irqsave(&dc->lock, flags);
1326 if (last)
1327 dc->state = D_SUBMIT;
1328 else
1329 dc->state = D_PARTIAL;
1330 dc->bio_ref++;
1331 spin_unlock_irqrestore(&dc->lock, flags);
1332
1333 atomic_inc(&dcc->queued_discard);
1334 dc->queued++;
1335 list_move_tail(&dc->list, wait_list);
1336
1337 /* sanity check on discard range */
1338 __check_sit_bitmap(sbi, lstart, lstart + len);
1339
1340 bio->bi_private = dc;
1341 bio->bi_end_io = f2fs_submit_discard_endio;
1342 bio->bi_opf |= flag;
1343 submit_bio(bio);
1344
1345 atomic_inc(&dcc->issued_discard);
1346
1347 f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
1348
1349 lstart += len;
1350 start += len;
1351 total_len -= len;
1352 len = total_len;
1353 }
1354
1355 if (!err && len) {
1356 dcc->undiscard_blks -= len;
1357 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1358 }
1359 return err;
1360 }
1361
__insert_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len)1362 static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
1363 struct block_device *bdev, block_t lstart,
1364 block_t start, block_t len)
1365 {
1366 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1367 struct rb_node **p = &dcc->root.rb_root.rb_node;
1368 struct rb_node *parent = NULL;
1369 struct discard_cmd *dc;
1370 bool leftmost = true;
1371
1372 /* look up rb tree to find parent node */
1373 while (*p) {
1374 parent = *p;
1375 dc = rb_entry(parent, struct discard_cmd, rb_node);
1376
1377 if (lstart < dc->di.lstart) {
1378 p = &(*p)->rb_left;
1379 } else if (lstart >= dc->di.lstart + dc->di.len) {
1380 p = &(*p)->rb_right;
1381 leftmost = false;
1382 } else {
1383 f2fs_bug_on(sbi, 1);
1384 }
1385 }
1386
1387 dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1388
1389 rb_link_node(&dc->rb_node, parent, p);
1390 rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1391 }
1392
__relocate_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc)1393 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1394 struct discard_cmd *dc)
1395 {
1396 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
1397 }
1398
__punch_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, block_t blkaddr)1399 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1400 struct discard_cmd *dc, block_t blkaddr)
1401 {
1402 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1403 struct discard_info di = dc->di;
1404 bool modified = false;
1405
1406 if (dc->state == D_DONE || dc->di.len == 1) {
1407 __remove_discard_cmd(sbi, dc);
1408 return;
1409 }
1410
1411 dcc->undiscard_blks -= di.len;
1412
1413 if (blkaddr > di.lstart) {
1414 dc->di.len = blkaddr - dc->di.lstart;
1415 dcc->undiscard_blks += dc->di.len;
1416 __relocate_discard_cmd(dcc, dc);
1417 modified = true;
1418 }
1419
1420 if (blkaddr < di.lstart + di.len - 1) {
1421 if (modified) {
1422 __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
1423 di.start + blkaddr + 1 - di.lstart,
1424 di.lstart + di.len - 1 - blkaddr);
1425 } else {
1426 dc->di.lstart++;
1427 dc->di.len--;
1428 dc->di.start++;
1429 dcc->undiscard_blks += dc->di.len;
1430 __relocate_discard_cmd(dcc, dc);
1431 }
1432 }
1433 }
1434
__update_discard_tree_range(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len)1435 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1436 struct block_device *bdev, block_t lstart,
1437 block_t start, block_t len)
1438 {
1439 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1440 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1441 struct discard_cmd *dc;
1442 struct discard_info di = {0};
1443 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1444 unsigned int max_discard_blocks =
1445 SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1446 block_t end = lstart + len;
1447
1448 dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
1449 &prev_dc, &next_dc, &insert_p, &insert_parent);
1450 if (dc)
1451 prev_dc = dc;
1452
1453 if (!prev_dc) {
1454 di.lstart = lstart;
1455 di.len = next_dc ? next_dc->di.lstart - lstart : len;
1456 di.len = min(di.len, len);
1457 di.start = start;
1458 }
1459
1460 while (1) {
1461 struct rb_node *node;
1462 bool merged = false;
1463 struct discard_cmd *tdc = NULL;
1464
1465 if (prev_dc) {
1466 di.lstart = prev_dc->di.lstart + prev_dc->di.len;
1467 if (di.lstart < lstart)
1468 di.lstart = lstart;
1469 if (di.lstart >= end)
1470 break;
1471
1472 if (!next_dc || next_dc->di.lstart > end)
1473 di.len = end - di.lstart;
1474 else
1475 di.len = next_dc->di.lstart - di.lstart;
1476 di.start = start + di.lstart - lstart;
1477 }
1478
1479 if (!di.len)
1480 goto next;
1481
1482 if (prev_dc && prev_dc->state == D_PREP &&
1483 prev_dc->bdev == bdev &&
1484 __is_discard_back_mergeable(&di, &prev_dc->di,
1485 max_discard_blocks)) {
1486 prev_dc->di.len += di.len;
1487 dcc->undiscard_blks += di.len;
1488 __relocate_discard_cmd(dcc, prev_dc);
1489 di = prev_dc->di;
1490 tdc = prev_dc;
1491 merged = true;
1492 }
1493
1494 if (next_dc && next_dc->state == D_PREP &&
1495 next_dc->bdev == bdev &&
1496 __is_discard_front_mergeable(&di, &next_dc->di,
1497 max_discard_blocks)) {
1498 next_dc->di.lstart = di.lstart;
1499 next_dc->di.len += di.len;
1500 next_dc->di.start = di.start;
1501 dcc->undiscard_blks += di.len;
1502 __relocate_discard_cmd(dcc, next_dc);
1503 if (tdc)
1504 __remove_discard_cmd(sbi, tdc);
1505 merged = true;
1506 }
1507
1508 if (!merged)
1509 __insert_discard_cmd(sbi, bdev,
1510 di.lstart, di.start, di.len);
1511 next:
1512 prev_dc = next_dc;
1513 if (!prev_dc)
1514 break;
1515
1516 node = rb_next(&prev_dc->rb_node);
1517 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1518 }
1519 }
1520
1521 #ifdef CONFIG_BLK_DEV_ZONED
__queue_zone_reset_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t lblkstart, block_t blklen)1522 static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
1523 struct block_device *bdev, block_t blkstart, block_t lblkstart,
1524 block_t blklen)
1525 {
1526 trace_f2fs_queue_reset_zone(bdev, blkstart);
1527
1528 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1529 __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
1530 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1531 }
1532 #endif
1533
__queue_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen)1534 static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
1535 struct block_device *bdev, block_t blkstart, block_t blklen)
1536 {
1537 block_t lblkstart = blkstart;
1538
1539 if (!f2fs_bdev_support_discard(bdev))
1540 return;
1541
1542 trace_f2fs_queue_discard(bdev, blkstart, blklen);
1543
1544 if (f2fs_is_multi_device(sbi)) {
1545 int devi = f2fs_target_device_index(sbi, blkstart);
1546
1547 blkstart -= FDEV(devi).start_blk;
1548 }
1549 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1550 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1551 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1552 }
1553
__issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, int *issued)1554 static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1555 struct discard_policy *dpolicy, int *issued)
1556 {
1557 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1558 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1559 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1560 struct discard_cmd *dc;
1561 struct blk_plug plug;
1562 bool io_interrupted = false;
1563
1564 mutex_lock(&dcc->cmd_lock);
1565 dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
1566 &prev_dc, &next_dc, &insert_p, &insert_parent);
1567 if (!dc)
1568 dc = next_dc;
1569
1570 blk_start_plug(&plug);
1571
1572 while (dc) {
1573 struct rb_node *node;
1574 int err = 0;
1575
1576 if (dc->state != D_PREP)
1577 goto next;
1578
1579 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1580 io_interrupted = true;
1581 break;
1582 }
1583
1584 dcc->next_pos = dc->di.lstart + dc->di.len;
1585 err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
1586
1587 if (*issued >= dpolicy->max_requests)
1588 break;
1589 next:
1590 node = rb_next(&dc->rb_node);
1591 if (err)
1592 __remove_discard_cmd(sbi, dc);
1593 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1594 }
1595
1596 blk_finish_plug(&plug);
1597
1598 if (!dc)
1599 dcc->next_pos = 0;
1600
1601 mutex_unlock(&dcc->cmd_lock);
1602
1603 if (!(*issued) && io_interrupted)
1604 *issued = -1;
1605 }
1606 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1607 struct discard_policy *dpolicy);
1608
__issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy)1609 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1610 struct discard_policy *dpolicy)
1611 {
1612 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1613 struct list_head *pend_list;
1614 struct discard_cmd *dc, *tmp;
1615 struct blk_plug plug;
1616 int i, issued;
1617 bool io_interrupted = false;
1618
1619 if (dpolicy->timeout)
1620 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1621
1622 retry:
1623 issued = 0;
1624 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1625 if (dpolicy->timeout &&
1626 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1627 break;
1628
1629 if (i + 1 < dpolicy->granularity)
1630 break;
1631
1632 if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
1633 __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
1634 return issued;
1635 }
1636
1637 pend_list = &dcc->pend_list[i];
1638
1639 mutex_lock(&dcc->cmd_lock);
1640 if (list_empty(pend_list))
1641 goto next;
1642 if (unlikely(dcc->rbtree_check))
1643 f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
1644 blk_start_plug(&plug);
1645 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1646 f2fs_bug_on(sbi, dc->state != D_PREP);
1647
1648 if (dpolicy->timeout &&
1649 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1650 break;
1651
1652 if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1653 !is_idle(sbi, DISCARD_TIME)) {
1654 io_interrupted = true;
1655 break;
1656 }
1657
1658 __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1659
1660 if (issued >= dpolicy->max_requests)
1661 break;
1662 }
1663 blk_finish_plug(&plug);
1664 next:
1665 mutex_unlock(&dcc->cmd_lock);
1666
1667 if (issued >= dpolicy->max_requests || io_interrupted)
1668 break;
1669 }
1670
1671 if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1672 __wait_all_discard_cmd(sbi, dpolicy);
1673 goto retry;
1674 }
1675
1676 if (!issued && io_interrupted)
1677 issued = -1;
1678
1679 return issued;
1680 }
1681
__drop_discard_cmd(struct f2fs_sb_info *sbi)1682 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1683 {
1684 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1685 struct list_head *pend_list;
1686 struct discard_cmd *dc, *tmp;
1687 int i;
1688 bool dropped = false;
1689
1690 mutex_lock(&dcc->cmd_lock);
1691 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1692 pend_list = &dcc->pend_list[i];
1693 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1694 f2fs_bug_on(sbi, dc->state != D_PREP);
1695 __remove_discard_cmd(sbi, dc);
1696 dropped = true;
1697 }
1698 }
1699 mutex_unlock(&dcc->cmd_lock);
1700
1701 return dropped;
1702 }
1703
f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)1704 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1705 {
1706 __drop_discard_cmd(sbi);
1707 }
1708
__wait_one_discard_bio(struct f2fs_sb_info *sbi, struct discard_cmd *dc)1709 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1710 struct discard_cmd *dc)
1711 {
1712 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1713 unsigned int len = 0;
1714
1715 wait_for_completion_io(&dc->wait);
1716 mutex_lock(&dcc->cmd_lock);
1717 f2fs_bug_on(sbi, dc->state != D_DONE);
1718 dc->ref--;
1719 if (!dc->ref) {
1720 if (!dc->error)
1721 len = dc->di.len;
1722 __remove_discard_cmd(sbi, dc);
1723 }
1724 mutex_unlock(&dcc->cmd_lock);
1725
1726 return len;
1727 }
1728
__wait_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, block_t start, block_t end)1729 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1730 struct discard_policy *dpolicy,
1731 block_t start, block_t end)
1732 {
1733 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1734 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1735 &(dcc->fstrim_list) : &(dcc->wait_list);
1736 struct discard_cmd *dc = NULL, *iter, *tmp;
1737 unsigned int trimmed = 0;
1738
1739 next:
1740 dc = NULL;
1741
1742 mutex_lock(&dcc->cmd_lock);
1743 list_for_each_entry_safe(iter, tmp, wait_list, list) {
1744 if (iter->di.lstart + iter->di.len <= start ||
1745 end <= iter->di.lstart)
1746 continue;
1747 if (iter->di.len < dpolicy->granularity)
1748 continue;
1749 if (iter->state == D_DONE && !iter->ref) {
1750 wait_for_completion_io(&iter->wait);
1751 if (!iter->error)
1752 trimmed += iter->di.len;
1753 __remove_discard_cmd(sbi, iter);
1754 } else {
1755 iter->ref++;
1756 dc = iter;
1757 break;
1758 }
1759 }
1760 mutex_unlock(&dcc->cmd_lock);
1761
1762 if (dc) {
1763 trimmed += __wait_one_discard_bio(sbi, dc);
1764 goto next;
1765 }
1766
1767 return trimmed;
1768 }
1769
__wait_all_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy)1770 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1771 struct discard_policy *dpolicy)
1772 {
1773 struct discard_policy dp;
1774 unsigned int discard_blks;
1775
1776 if (dpolicy)
1777 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1778
1779 /* wait all */
1780 __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
1781 discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1782 __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
1783 discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1784
1785 return discard_blks;
1786 }
1787
1788 /* This should be covered by global mutex, &sit_i->sentry_lock */
f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)1789 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1790 {
1791 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1792 struct discard_cmd *dc;
1793 bool need_wait = false;
1794
1795 mutex_lock(&dcc->cmd_lock);
1796 dc = __lookup_discard_cmd(sbi, blkaddr);
1797 #ifdef CONFIG_BLK_DEV_ZONED
1798 if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
1799 int devi = f2fs_bdev_index(sbi, dc->bdev);
1800
1801 if (devi < 0) {
1802 mutex_unlock(&dcc->cmd_lock);
1803 return;
1804 }
1805
1806 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1807 /* force submit zone reset */
1808 if (dc->state == D_PREP)
1809 __submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
1810 &dcc->wait_list, NULL);
1811 dc->ref++;
1812 mutex_unlock(&dcc->cmd_lock);
1813 /* wait zone reset */
1814 __wait_one_discard_bio(sbi, dc);
1815 return;
1816 }
1817 }
1818 #endif
1819 if (dc) {
1820 if (dc->state == D_PREP) {
1821 __punch_discard_cmd(sbi, dc, blkaddr);
1822 } else {
1823 dc->ref++;
1824 need_wait = true;
1825 }
1826 }
1827 mutex_unlock(&dcc->cmd_lock);
1828
1829 if (need_wait)
1830 __wait_one_discard_bio(sbi, dc);
1831 }
1832
f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)1833 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1834 {
1835 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1836
1837 if (dcc && dcc->f2fs_issue_discard) {
1838 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1839
1840 dcc->f2fs_issue_discard = NULL;
1841 kthread_stop(discard_thread);
1842 }
1843 }
1844
1845 /**
1846 * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
1847 * @sbi: the f2fs_sb_info data for discard cmd to issue
1848 *
1849 * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
1850 *
1851 * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
1852 */
f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)1853 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1854 {
1855 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1856 struct discard_policy dpolicy;
1857 bool dropped;
1858
1859 if (!atomic_read(&dcc->discard_cmd_cnt))
1860 return true;
1861
1862 __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1863 dcc->discard_granularity);
1864 __issue_discard_cmd(sbi, &dpolicy);
1865 dropped = __drop_discard_cmd(sbi);
1866
1867 /* just to make sure there is no pending discard commands */
1868 __wait_all_discard_cmd(sbi, NULL);
1869
1870 f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1871 return !dropped;
1872 }
1873
issue_discard_thread(void *data)1874 static int issue_discard_thread(void *data)
1875 {
1876 struct f2fs_sb_info *sbi = data;
1877 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1878 wait_queue_head_t *q = &dcc->discard_wait_queue;
1879 struct discard_policy dpolicy;
1880 unsigned int wait_ms = dcc->min_discard_issue_time;
1881 int issued;
1882
1883 set_freezable();
1884
1885 do {
1886 wait_event_interruptible_timeout(*q,
1887 kthread_should_stop() || freezing(current) ||
1888 dcc->discard_wake,
1889 msecs_to_jiffies(wait_ms));
1890
1891 if (sbi->gc_mode == GC_URGENT_HIGH ||
1892 !f2fs_available_free_memory(sbi, DISCARD_CACHE))
1893 __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
1894 MIN_DISCARD_GRANULARITY);
1895 else
1896 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1897 dcc->discard_granularity);
1898
1899 if (dcc->discard_wake)
1900 dcc->discard_wake = false;
1901
1902 /* clean up pending candidates before going to sleep */
1903 if (atomic_read(&dcc->queued_discard))
1904 __wait_all_discard_cmd(sbi, NULL);
1905
1906 if (try_to_freeze())
1907 continue;
1908 if (f2fs_readonly(sbi->sb))
1909 continue;
1910 if (kthread_should_stop())
1911 return 0;
1912 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
1913 !atomic_read(&dcc->discard_cmd_cnt)) {
1914 wait_ms = dpolicy.max_interval;
1915 continue;
1916 }
1917
1918 sb_start_intwrite(sbi->sb);
1919
1920 issued = __issue_discard_cmd(sbi, &dpolicy);
1921 if (issued > 0) {
1922 __wait_all_discard_cmd(sbi, &dpolicy);
1923 wait_ms = dpolicy.min_interval;
1924 } else if (issued == -1) {
1925 wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1926 if (!wait_ms)
1927 wait_ms = dpolicy.mid_interval;
1928 } else {
1929 wait_ms = dpolicy.max_interval;
1930 }
1931 if (!atomic_read(&dcc->discard_cmd_cnt))
1932 wait_ms = dpolicy.max_interval;
1933
1934 sb_end_intwrite(sbi->sb);
1935
1936 } while (!kthread_should_stop());
1937 return 0;
1938 }
1939
1940 #ifdef CONFIG_BLK_DEV_ZONED
__f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen)1941 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1942 struct block_device *bdev, block_t blkstart, block_t blklen)
1943 {
1944 sector_t sector, nr_sects;
1945 block_t lblkstart = blkstart;
1946 int devi = 0;
1947 u64 remainder = 0;
1948
1949 if (f2fs_is_multi_device(sbi)) {
1950 devi = f2fs_target_device_index(sbi, blkstart);
1951 if (blkstart < FDEV(devi).start_blk ||
1952 blkstart > FDEV(devi).end_blk) {
1953 f2fs_err(sbi, "Invalid block %x", blkstart);
1954 return -EIO;
1955 }
1956 blkstart -= FDEV(devi).start_blk;
1957 }
1958
1959 /* For sequential zones, reset the zone write pointer */
1960 if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1961 sector = SECTOR_FROM_BLOCK(blkstart);
1962 nr_sects = SECTOR_FROM_BLOCK(blklen);
1963 div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
1964
1965 if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
1966 f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1967 devi, sbi->s_ndevs ? FDEV(devi).path : "",
1968 blkstart, blklen);
1969 return -EIO;
1970 }
1971
1972 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
1973 trace_f2fs_issue_reset_zone(bdev, blkstart);
1974 return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
1975 sector, nr_sects, GFP_NOFS);
1976 }
1977
1978 __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
1979 return 0;
1980 }
1981
1982 /* For conventional zones, use regular discard if supported */
1983 __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1984 return 0;
1985 }
1986 #endif
1987
__issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen)1988 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1989 struct block_device *bdev, block_t blkstart, block_t blklen)
1990 {
1991 #ifdef CONFIG_BLK_DEV_ZONED
1992 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1993 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1994 #endif
1995 __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1996 return 0;
1997 }
1998
f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen)1999 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
2000 block_t blkstart, block_t blklen)
2001 {
2002 sector_t start = blkstart, len = 0;
2003 struct block_device *bdev;
2004 struct seg_entry *se;
2005 unsigned int offset;
2006 block_t i;
2007 int err = 0;
2008
2009 bdev = f2fs_target_device(sbi, blkstart, NULL);
2010
2011 for (i = blkstart; i < blkstart + blklen; i++, len++) {
2012 if (i != start) {
2013 struct block_device *bdev2 =
2014 f2fs_target_device(sbi, i, NULL);
2015
2016 if (bdev2 != bdev) {
2017 err = __issue_discard_async(sbi, bdev,
2018 start, len);
2019 if (err)
2020 return err;
2021 bdev = bdev2;
2022 start = i;
2023 len = 0;
2024 }
2025 }
2026
2027 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
2028 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
2029
2030 if (f2fs_block_unit_discard(sbi) &&
2031 !f2fs_test_and_set_bit(offset, se->discard_map))
2032 sbi->discard_blks--;
2033 }
2034
2035 if (len)
2036 err = __issue_discard_async(sbi, bdev, start, len);
2037 return err;
2038 }
2039
add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, bool check_only)2040 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
2041 bool check_only)
2042 {
2043 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2044 int max_blocks = sbi->blocks_per_seg;
2045 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
2046 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2047 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2048 unsigned long *discard_map = (unsigned long *)se->discard_map;
2049 unsigned long *dmap = SIT_I(sbi)->tmp_map;
2050 unsigned int start = 0, end = -1;
2051 bool force = (cpc->reason & CP_DISCARD);
2052 struct discard_entry *de = NULL;
2053 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
2054 int i;
2055
2056 if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
2057 !f2fs_block_unit_discard(sbi))
2058 return false;
2059
2060 if (!force) {
2061 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
2062 SM_I(sbi)->dcc_info->nr_discards >=
2063 SM_I(sbi)->dcc_info->max_discards)
2064 return false;
2065 }
2066
2067 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
2068 for (i = 0; i < entries; i++)
2069 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
2070 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
2071
2072 while (force || SM_I(sbi)->dcc_info->nr_discards <=
2073 SM_I(sbi)->dcc_info->max_discards) {
2074 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
2075 if (start >= max_blocks)
2076 break;
2077
2078 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
2079 if (force && start && end != max_blocks
2080 && (end - start) < cpc->trim_minlen)
2081 continue;
2082
2083 if (check_only)
2084 return true;
2085
2086 if (!de) {
2087 de = f2fs_kmem_cache_alloc(discard_entry_slab,
2088 GFP_F2FS_ZERO, true, NULL);
2089 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
2090 list_add_tail(&de->list, head);
2091 }
2092
2093 for (i = start; i < end; i++)
2094 __set_bit_le(i, (void *)de->discard_map);
2095
2096 SM_I(sbi)->dcc_info->nr_discards += end - start;
2097 }
2098 return false;
2099 }
2100
release_discard_addr(struct discard_entry *entry)2101 static void release_discard_addr(struct discard_entry *entry)
2102 {
2103 list_del(&entry->list);
2104 kmem_cache_free(discard_entry_slab, entry);
2105 }
2106
f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)2107 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
2108 {
2109 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
2110 struct discard_entry *entry, *this;
2111
2112 /* drop caches */
2113 list_for_each_entry_safe(entry, this, head, list)
2114 release_discard_addr(entry);
2115 }
2116
2117 /*
2118 * Should call f2fs_clear_prefree_segments after checkpoint is done.
2119 */
set_prefree_as_free_segments(struct f2fs_sb_info *sbi)2120 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
2121 {
2122 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2123 unsigned int segno;
2124
2125 mutex_lock(&dirty_i->seglist_lock);
2126 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2127 __set_test_and_free(sbi, segno, false);
2128 mutex_unlock(&dirty_i->seglist_lock);
2129 }
2130
f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)2131 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2132 struct cp_control *cpc)
2133 {
2134 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2135 struct list_head *head = &dcc->entry_list;
2136 struct discard_entry *entry, *this;
2137 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2138 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2139 unsigned int start = 0, end = -1;
2140 unsigned int secno, start_segno;
2141 bool force = (cpc->reason & CP_DISCARD);
2142 bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
2143 DISCARD_UNIT_SECTION;
2144
2145 if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
2146 section_alignment = true;
2147
2148 mutex_lock(&dirty_i->seglist_lock);
2149
2150 while (1) {
2151 int i;
2152
2153 if (section_alignment && end != -1)
2154 end--;
2155 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2156 if (start >= MAIN_SEGS(sbi))
2157 break;
2158 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2159 start + 1);
2160
2161 if (section_alignment) {
2162 start = rounddown(start, sbi->segs_per_sec);
2163 end = roundup(end, sbi->segs_per_sec);
2164 }
2165
2166 for (i = start; i < end; i++) {
2167 if (test_and_clear_bit(i, prefree_map))
2168 dirty_i->nr_dirty[PRE]--;
2169 }
2170
2171 if (!f2fs_realtime_discard_enable(sbi))
2172 continue;
2173
2174 if (force && start >= cpc->trim_start &&
2175 (end - 1) <= cpc->trim_end)
2176 continue;
2177
2178 /* Should cover 2MB zoned device for zone-based reset */
2179 if (!f2fs_sb_has_blkzoned(sbi) &&
2180 (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
2181 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2182 (end - start) << sbi->log_blocks_per_seg);
2183 continue;
2184 }
2185 next:
2186 secno = GET_SEC_FROM_SEG(sbi, start);
2187 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2188 if (!IS_CURSEC(sbi, secno) &&
2189 !get_valid_blocks(sbi, start, true))
2190 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2191 sbi->segs_per_sec << sbi->log_blocks_per_seg);
2192
2193 start = start_segno + sbi->segs_per_sec;
2194 if (start < end)
2195 goto next;
2196 else
2197 end = start - 1;
2198 }
2199 mutex_unlock(&dirty_i->seglist_lock);
2200
2201 if (!f2fs_block_unit_discard(sbi))
2202 goto wakeup;
2203
2204 /* send small discards */
2205 list_for_each_entry_safe(entry, this, head, list) {
2206 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2207 bool is_valid = test_bit_le(0, entry->discard_map);
2208
2209 find_next:
2210 if (is_valid) {
2211 next_pos = find_next_zero_bit_le(entry->discard_map,
2212 sbi->blocks_per_seg, cur_pos);
2213 len = next_pos - cur_pos;
2214
2215 if (f2fs_sb_has_blkzoned(sbi) ||
2216 (force && len < cpc->trim_minlen))
2217 goto skip;
2218
2219 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2220 len);
2221 total_len += len;
2222 } else {
2223 next_pos = find_next_bit_le(entry->discard_map,
2224 sbi->blocks_per_seg, cur_pos);
2225 }
2226 skip:
2227 cur_pos = next_pos;
2228 is_valid = !is_valid;
2229
2230 if (cur_pos < sbi->blocks_per_seg)
2231 goto find_next;
2232
2233 release_discard_addr(entry);
2234 dcc->nr_discards -= total_len;
2235 }
2236
2237 wakeup:
2238 wake_up_discard_thread(sbi, false);
2239 }
2240
f2fs_start_discard_thread(struct f2fs_sb_info *sbi)2241 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
2242 {
2243 dev_t dev = sbi->sb->s_bdev->bd_dev;
2244 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2245 int err = 0;
2246
2247 if (!f2fs_realtime_discard_enable(sbi))
2248 return 0;
2249
2250 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2251 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2252 if (IS_ERR(dcc->f2fs_issue_discard)) {
2253 err = PTR_ERR(dcc->f2fs_issue_discard);
2254 dcc->f2fs_issue_discard = NULL;
2255 }
2256
2257 return err;
2258 }
2259
create_discard_cmd_control(struct f2fs_sb_info *sbi)2260 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2261 {
2262 struct discard_cmd_control *dcc;
2263 int err = 0, i;
2264
2265 if (SM_I(sbi)->dcc_info) {
2266 dcc = SM_I(sbi)->dcc_info;
2267 goto init_thread;
2268 }
2269
2270 dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2271 if (!dcc)
2272 return -ENOMEM;
2273
2274 dcc->discard_io_aware_gran = MAX_PLIST_NUM;
2275 dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2276 dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
2277 if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
2278 dcc->discard_granularity = sbi->blocks_per_seg;
2279 else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
2280 dcc->discard_granularity = BLKS_PER_SEC(sbi);
2281
2282 INIT_LIST_HEAD(&dcc->entry_list);
2283 for (i = 0; i < MAX_PLIST_NUM; i++)
2284 INIT_LIST_HEAD(&dcc->pend_list[i]);
2285 INIT_LIST_HEAD(&dcc->wait_list);
2286 INIT_LIST_HEAD(&dcc->fstrim_list);
2287 mutex_init(&dcc->cmd_lock);
2288 atomic_set(&dcc->issued_discard, 0);
2289 atomic_set(&dcc->queued_discard, 0);
2290 atomic_set(&dcc->discard_cmd_cnt, 0);
2291 dcc->nr_discards = 0;
2292 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2293 dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
2294 dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
2295 dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
2296 dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
2297 dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
2298 dcc->undiscard_blks = 0;
2299 dcc->next_pos = 0;
2300 dcc->root = RB_ROOT_CACHED;
2301 dcc->rbtree_check = false;
2302
2303 init_waitqueue_head(&dcc->discard_wait_queue);
2304 SM_I(sbi)->dcc_info = dcc;
2305 init_thread:
2306 err = f2fs_start_discard_thread(sbi);
2307 if (err) {
2308 kfree(dcc);
2309 SM_I(sbi)->dcc_info = NULL;
2310 }
2311
2312 return err;
2313 }
2314
destroy_discard_cmd_control(struct f2fs_sb_info *sbi)2315 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2316 {
2317 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2318
2319 if (!dcc)
2320 return;
2321
2322 f2fs_stop_discard_thread(sbi);
2323
2324 /*
2325 * Recovery can cache discard commands, so in error path of
2326 * fill_super(), it needs to give a chance to handle them.
2327 */
2328 f2fs_issue_discard_timeout(sbi);
2329
2330 kfree(dcc);
2331 SM_I(sbi)->dcc_info = NULL;
2332 }
2333
__mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)2334 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2335 {
2336 struct sit_info *sit_i = SIT_I(sbi);
2337
2338 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2339 sit_i->dirty_sentries++;
2340 return false;
2341 }
2342
2343 return true;
2344 }
2345
__set_sit_entry_type(struct f2fs_sb_info *sbi, int type, unsigned int segno, int modified)2346 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2347 unsigned int segno, int modified)
2348 {
2349 struct seg_entry *se = get_seg_entry(sbi, segno);
2350
2351 se->type = type;
2352 if (modified)
2353 __mark_sit_entry_dirty(sbi, segno);
2354 }
2355
get_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr)2356 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2357 block_t blkaddr)
2358 {
2359 unsigned int segno = GET_SEGNO(sbi, blkaddr);
2360
2361 if (segno == NULL_SEGNO)
2362 return 0;
2363 return get_seg_entry(sbi, segno)->mtime;
2364 }
2365
update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned long long old_mtime)2366 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2367 unsigned long long old_mtime)
2368 {
2369 struct seg_entry *se;
2370 unsigned int segno = GET_SEGNO(sbi, blkaddr);
2371 unsigned long long ctime = get_mtime(sbi, false);
2372 unsigned long long mtime = old_mtime ? old_mtime : ctime;
2373
2374 if (segno == NULL_SEGNO)
2375 return;
2376
2377 se = get_seg_entry(sbi, segno);
2378
2379 if (!se->mtime)
2380 se->mtime = mtime;
2381 else
2382 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2383 se->valid_blocks + 1);
2384
2385 if (ctime > SIT_I(sbi)->max_mtime)
2386 SIT_I(sbi)->max_mtime = ctime;
2387 }
2388
update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)2389 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2390 {
2391 struct seg_entry *se;
2392 unsigned int segno, offset;
2393 long int new_vblocks;
2394 bool exist;
2395 #ifdef CONFIG_F2FS_CHECK_FS
2396 bool mir_exist;
2397 #endif
2398
2399 segno = GET_SEGNO(sbi, blkaddr);
2400
2401 se = get_seg_entry(sbi, segno);
2402 new_vblocks = se->valid_blocks + del;
2403 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2404
2405 f2fs_bug_on(sbi, (new_vblocks < 0 ||
2406 (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2407
2408 se->valid_blocks = new_vblocks;
2409
2410 /* Update valid block bitmap */
2411 if (del > 0) {
2412 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2413 #ifdef CONFIG_F2FS_CHECK_FS
2414 mir_exist = f2fs_test_and_set_bit(offset,
2415 se->cur_valid_map_mir);
2416 if (unlikely(exist != mir_exist)) {
2417 f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2418 blkaddr, exist);
2419 f2fs_bug_on(sbi, 1);
2420 }
2421 #endif
2422 if (unlikely(exist)) {
2423 f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2424 blkaddr);
2425 f2fs_bug_on(sbi, 1);
2426 se->valid_blocks--;
2427 del = 0;
2428 }
2429
2430 if (f2fs_block_unit_discard(sbi) &&
2431 !f2fs_test_and_set_bit(offset, se->discard_map))
2432 sbi->discard_blks--;
2433
2434 /*
2435 * SSR should never reuse block which is checkpointed
2436 * or newly invalidated.
2437 */
2438 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2439 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2440 se->ckpt_valid_blocks++;
2441 }
2442 } else {
2443 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2444 #ifdef CONFIG_F2FS_CHECK_FS
2445 mir_exist = f2fs_test_and_clear_bit(offset,
2446 se->cur_valid_map_mir);
2447 if (unlikely(exist != mir_exist)) {
2448 f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2449 blkaddr, exist);
2450 f2fs_bug_on(sbi, 1);
2451 }
2452 #endif
2453 if (unlikely(!exist)) {
2454 f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2455 blkaddr);
2456 f2fs_bug_on(sbi, 1);
2457 se->valid_blocks++;
2458 del = 0;
2459 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2460 /*
2461 * If checkpoints are off, we must not reuse data that
2462 * was used in the previous checkpoint. If it was used
2463 * before, we must track that to know how much space we
2464 * really have.
2465 */
2466 if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2467 spin_lock(&sbi->stat_lock);
2468 sbi->unusable_block_count++;
2469 spin_unlock(&sbi->stat_lock);
2470 }
2471 }
2472
2473 if (f2fs_block_unit_discard(sbi) &&
2474 f2fs_test_and_clear_bit(offset, se->discard_map))
2475 sbi->discard_blks++;
2476 }
2477 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2478 se->ckpt_valid_blocks += del;
2479
2480 __mark_sit_entry_dirty(sbi, segno);
2481
2482 /* update total number of valid blocks to be written in ckpt area */
2483 SIT_I(sbi)->written_valid_blocks += del;
2484
2485 if (__is_large_section(sbi))
2486 get_sec_entry(sbi, segno)->valid_blocks += del;
2487 }
2488
f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)2489 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2490 {
2491 unsigned int segno = GET_SEGNO(sbi, addr);
2492 struct sit_info *sit_i = SIT_I(sbi);
2493
2494 f2fs_bug_on(sbi, addr == NULL_ADDR);
2495 if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2496 return;
2497
2498 f2fs_invalidate_internal_cache(sbi, addr);
2499
2500 /* add it into sit main buffer */
2501 down_write(&sit_i->sentry_lock);
2502
2503 update_segment_mtime(sbi, addr, 0);
2504 update_sit_entry(sbi, addr, -1);
2505
2506 /* add it into dirty seglist */
2507 locate_dirty_segment(sbi, segno);
2508
2509 up_write(&sit_i->sentry_lock);
2510 }
2511
f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)2512 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2513 {
2514 struct sit_info *sit_i = SIT_I(sbi);
2515 unsigned int segno, offset;
2516 struct seg_entry *se;
2517 bool is_cp = false;
2518
2519 if (!__is_valid_data_blkaddr(blkaddr))
2520 return true;
2521
2522 down_read(&sit_i->sentry_lock);
2523
2524 segno = GET_SEGNO(sbi, blkaddr);
2525 se = get_seg_entry(sbi, segno);
2526 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2527
2528 if (f2fs_test_bit(offset, se->ckpt_valid_map))
2529 is_cp = true;
2530
2531 up_read(&sit_i->sentry_lock);
2532
2533 return is_cp;
2534 }
2535
f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)2536 static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
2537 {
2538 struct curseg_info *curseg = CURSEG_I(sbi, type);
2539
2540 if (sbi->ckpt->alloc_type[type] == SSR)
2541 return sbi->blocks_per_seg;
2542 return curseg->next_blkoff;
2543 }
2544
2545 /*
2546 * Calculate the number of current summary pages for writing
2547 */
f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)2548 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2549 {
2550 int valid_sum_count = 0;
2551 int i, sum_in_page;
2552
2553 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2554 if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
2555 valid_sum_count +=
2556 le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2557 else
2558 valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
2559 }
2560
2561 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2562 SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2563 if (valid_sum_count <= sum_in_page)
2564 return 1;
2565 else if ((valid_sum_count - sum_in_page) <=
2566 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2567 return 2;
2568 return 3;
2569 }
2570
2571 /*
2572 * Caller should put this summary page
2573 */
f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)2574 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2575 {
2576 if (unlikely(f2fs_cp_error(sbi)))
2577 return ERR_PTR(-EIO);
2578 return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2579 }
2580
f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)2581 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2582 void *src, block_t blk_addr)
2583 {
2584 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2585
2586 memcpy(page_address(page), src, PAGE_SIZE);
2587 set_page_dirty(page);
2588 f2fs_put_page(page, 1);
2589 }
2590
write_sum_page(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum_blk, block_t blk_addr)2591 static void write_sum_page(struct f2fs_sb_info *sbi,
2592 struct f2fs_summary_block *sum_blk, block_t blk_addr)
2593 {
2594 f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2595 }
2596
write_current_sum_page(struct f2fs_sb_info *sbi, int type, block_t blk_addr)2597 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2598 int type, block_t blk_addr)
2599 {
2600 struct curseg_info *curseg = CURSEG_I(sbi, type);
2601 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2602 struct f2fs_summary_block *src = curseg->sum_blk;
2603 struct f2fs_summary_block *dst;
2604
2605 dst = (struct f2fs_summary_block *)page_address(page);
2606 memset(dst, 0, PAGE_SIZE);
2607
2608 mutex_lock(&curseg->curseg_mutex);
2609
2610 down_read(&curseg->journal_rwsem);
2611 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2612 up_read(&curseg->journal_rwsem);
2613
2614 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2615 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2616
2617 mutex_unlock(&curseg->curseg_mutex);
2618
2619 set_page_dirty(page);
2620 f2fs_put_page(page, 1);
2621 }
2622
is_next_segment_free(struct f2fs_sb_info *sbi, struct curseg_info *curseg, int type)2623 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2624 struct curseg_info *curseg, int type)
2625 {
2626 unsigned int segno = curseg->segno + 1;
2627 struct free_segmap_info *free_i = FREE_I(sbi);
2628
2629 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2630 return !test_bit(segno, free_i->free_segmap);
2631 return 0;
2632 }
2633
2634 /*
2635 * Find a new segment from the free segments bitmap to right order
2636 * This function should be returned with success, otherwise BUG
2637 */
get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, int dir)2638 static void get_new_segment(struct f2fs_sb_info *sbi,
2639 unsigned int *newseg, bool new_sec, int dir)
2640 {
2641 struct free_segmap_info *free_i = FREE_I(sbi);
2642 unsigned int segno, secno, zoneno;
2643 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2644 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2645 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2646 unsigned int left_start = hint;
2647 bool init = true;
2648 int go_left = 0;
2649 int i;
2650
2651 spin_lock(&free_i->segmap_lock);
2652
2653 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2654 segno = find_next_zero_bit(free_i->free_segmap,
2655 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2656 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2657 goto got_it;
2658 }
2659 find_other_zone:
2660 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2661 if (secno >= MAIN_SECS(sbi)) {
2662 if (dir == ALLOC_RIGHT) {
2663 secno = find_first_zero_bit(free_i->free_secmap,
2664 MAIN_SECS(sbi));
2665 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2666 } else {
2667 go_left = 1;
2668 left_start = hint - 1;
2669 }
2670 }
2671 if (go_left == 0)
2672 goto skip_left;
2673
2674 while (test_bit(left_start, free_i->free_secmap)) {
2675 if (left_start > 0) {
2676 left_start--;
2677 continue;
2678 }
2679 left_start = find_first_zero_bit(free_i->free_secmap,
2680 MAIN_SECS(sbi));
2681 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2682 break;
2683 }
2684 secno = left_start;
2685 skip_left:
2686 segno = GET_SEG_FROM_SEC(sbi, secno);
2687 zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2688
2689 /* give up on finding another zone */
2690 if (!init)
2691 goto got_it;
2692 if (sbi->secs_per_zone == 1)
2693 goto got_it;
2694 if (zoneno == old_zoneno)
2695 goto got_it;
2696 if (dir == ALLOC_LEFT) {
2697 if (!go_left && zoneno + 1 >= total_zones)
2698 goto got_it;
2699 if (go_left && zoneno == 0)
2700 goto got_it;
2701 }
2702 for (i = 0; i < NR_CURSEG_TYPE; i++)
2703 if (CURSEG_I(sbi, i)->zone == zoneno)
2704 break;
2705
2706 if (i < NR_CURSEG_TYPE) {
2707 /* zone is in user, try another */
2708 if (go_left)
2709 hint = zoneno * sbi->secs_per_zone - 1;
2710 else if (zoneno + 1 >= total_zones)
2711 hint = 0;
2712 else
2713 hint = (zoneno + 1) * sbi->secs_per_zone;
2714 init = false;
2715 goto find_other_zone;
2716 }
2717 got_it:
2718 /* set it as dirty segment in free segmap */
2719 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2720 __set_inuse(sbi, segno);
2721 *newseg = segno;
2722 spin_unlock(&free_i->segmap_lock);
2723 }
2724
reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)2725 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2726 {
2727 struct curseg_info *curseg = CURSEG_I(sbi, type);
2728 struct summary_footer *sum_footer;
2729 unsigned short seg_type = curseg->seg_type;
2730
2731 curseg->inited = true;
2732 curseg->segno = curseg->next_segno;
2733 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2734 curseg->next_blkoff = 0;
2735 curseg->next_segno = NULL_SEGNO;
2736
2737 sum_footer = &(curseg->sum_blk->footer);
2738 memset(sum_footer, 0, sizeof(struct summary_footer));
2739
2740 sanity_check_seg_type(sbi, seg_type);
2741
2742 if (IS_DATASEG(seg_type))
2743 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2744 if (IS_NODESEG(seg_type))
2745 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2746 __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2747 }
2748
__get_next_segno(struct f2fs_sb_info *sbi, int type)2749 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2750 {
2751 struct curseg_info *curseg = CURSEG_I(sbi, type);
2752 unsigned short seg_type = curseg->seg_type;
2753
2754 sanity_check_seg_type(sbi, seg_type);
2755 if (f2fs_need_rand_seg(sbi))
2756 return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
2757
2758 /* if segs_per_sec is large than 1, we need to keep original policy. */
2759 if (__is_large_section(sbi))
2760 return curseg->segno;
2761
2762 /* inmem log may not locate on any segment after mount */
2763 if (!curseg->inited)
2764 return 0;
2765
2766 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2767 return 0;
2768
2769 if (test_opt(sbi, NOHEAP) &&
2770 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
2771 return 0;
2772
2773 if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2774 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2775
2776 /* find segments from 0 to reuse freed segments */
2777 if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2778 return 0;
2779
2780 return curseg->segno;
2781 }
2782
2783 /*
2784 * Allocate a current working segment.
2785 * This function always allocates a free segment in LFS manner.
2786 */
new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)2787 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2788 {
2789 struct curseg_info *curseg = CURSEG_I(sbi, type);
2790 unsigned short seg_type = curseg->seg_type;
2791 unsigned int segno = curseg->segno;
2792 int dir = ALLOC_LEFT;
2793
2794 if (curseg->inited)
2795 write_sum_page(sbi, curseg->sum_blk,
2796 GET_SUM_BLOCK(sbi, segno));
2797 if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
2798 dir = ALLOC_RIGHT;
2799
2800 if (test_opt(sbi, NOHEAP))
2801 dir = ALLOC_RIGHT;
2802
2803 segno = __get_next_segno(sbi, type);
2804 get_new_segment(sbi, &segno, new_sec, dir);
2805 curseg->next_segno = segno;
2806 reset_curseg(sbi, type, 1);
2807 curseg->alloc_type = LFS;
2808 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
2809 curseg->fragment_remained_chunk =
2810 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2811 }
2812
__next_free_blkoff(struct f2fs_sb_info *sbi, int segno, block_t start)2813 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2814 int segno, block_t start)
2815 {
2816 struct seg_entry *se = get_seg_entry(sbi, segno);
2817 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2818 unsigned long *target_map = SIT_I(sbi)->tmp_map;
2819 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2820 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2821 int i;
2822
2823 for (i = 0; i < entries; i++)
2824 target_map[i] = ckpt_map[i] | cur_map[i];
2825
2826 return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2827 }
2828
f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi, struct curseg_info *seg)2829 static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
2830 struct curseg_info *seg)
2831 {
2832 return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
2833 }
2834
f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)2835 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2836 {
2837 return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
2838 }
2839
2840 /*
2841 * This function always allocates a used segment(from dirty seglist) by SSR
2842 * manner, so it should recover the existing segment information of valid blocks
2843 */
change_curseg(struct f2fs_sb_info *sbi, int type)2844 static void change_curseg(struct f2fs_sb_info *sbi, int type)
2845 {
2846 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2847 struct curseg_info *curseg = CURSEG_I(sbi, type);
2848 unsigned int new_segno = curseg->next_segno;
2849 struct f2fs_summary_block *sum_node;
2850 struct page *sum_page;
2851
2852 write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
2853
2854 __set_test_and_inuse(sbi, new_segno);
2855
2856 mutex_lock(&dirty_i->seglist_lock);
2857 __remove_dirty_segment(sbi, new_segno, PRE);
2858 __remove_dirty_segment(sbi, new_segno, DIRTY);
2859 mutex_unlock(&dirty_i->seglist_lock);
2860
2861 reset_curseg(sbi, type, 1);
2862 curseg->alloc_type = SSR;
2863 curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2864
2865 sum_page = f2fs_get_sum_page(sbi, new_segno);
2866 if (IS_ERR(sum_page)) {
2867 /* GC won't be able to use stale summary pages by cp_error */
2868 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2869 return;
2870 }
2871 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2872 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2873 f2fs_put_page(sum_page, 1);
2874 }
2875
2876 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2877 int alloc_mode, unsigned long long age);
2878
get_atssr_segment(struct f2fs_sb_info *sbi, int type, int target_type, int alloc_mode, unsigned long long age)2879 static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2880 int target_type, int alloc_mode,
2881 unsigned long long age)
2882 {
2883 struct curseg_info *curseg = CURSEG_I(sbi, type);
2884
2885 curseg->seg_type = target_type;
2886
2887 if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2888 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2889
2890 curseg->seg_type = se->type;
2891 change_curseg(sbi, type);
2892 } else {
2893 /* allocate cold segment by default */
2894 curseg->seg_type = CURSEG_COLD_DATA;
2895 new_curseg(sbi, type, true);
2896 }
2897 stat_inc_seg_type(sbi, curseg);
2898 }
2899
__f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)2900 static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
2901 {
2902 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2903
2904 if (!sbi->am.atgc_enabled)
2905 return;
2906
2907 f2fs_down_read(&SM_I(sbi)->curseg_lock);
2908
2909 mutex_lock(&curseg->curseg_mutex);
2910 down_write(&SIT_I(sbi)->sentry_lock);
2911
2912 get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
2913
2914 up_write(&SIT_I(sbi)->sentry_lock);
2915 mutex_unlock(&curseg->curseg_mutex);
2916
2917 f2fs_up_read(&SM_I(sbi)->curseg_lock);
2918
2919 }
f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)2920 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
2921 {
2922 __f2fs_init_atgc_curseg(sbi);
2923 }
2924
__f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)2925 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2926 {
2927 struct curseg_info *curseg = CURSEG_I(sbi, type);
2928
2929 mutex_lock(&curseg->curseg_mutex);
2930 if (!curseg->inited)
2931 goto out;
2932
2933 if (get_valid_blocks(sbi, curseg->segno, false)) {
2934 write_sum_page(sbi, curseg->sum_blk,
2935 GET_SUM_BLOCK(sbi, curseg->segno));
2936 } else {
2937 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2938 __set_test_and_free(sbi, curseg->segno, true);
2939 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2940 }
2941 out:
2942 mutex_unlock(&curseg->curseg_mutex);
2943 }
2944
f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)2945 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
2946 {
2947 __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2948
2949 if (sbi->am.atgc_enabled)
2950 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2951 }
2952
__f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)2953 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2954 {
2955 struct curseg_info *curseg = CURSEG_I(sbi, type);
2956
2957 mutex_lock(&curseg->curseg_mutex);
2958 if (!curseg->inited)
2959 goto out;
2960 if (get_valid_blocks(sbi, curseg->segno, false))
2961 goto out;
2962
2963 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2964 __set_test_and_inuse(sbi, curseg->segno);
2965 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2966 out:
2967 mutex_unlock(&curseg->curseg_mutex);
2968 }
2969
f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)2970 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
2971 {
2972 __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2973
2974 if (sbi->am.atgc_enabled)
2975 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2976 }
2977
get_ssr_segment(struct f2fs_sb_info *sbi, int type, int alloc_mode, unsigned long long age)2978 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2979 int alloc_mode, unsigned long long age)
2980 {
2981 struct curseg_info *curseg = CURSEG_I(sbi, type);
2982 unsigned segno = NULL_SEGNO;
2983 unsigned short seg_type = curseg->seg_type;
2984 int i, cnt;
2985 bool reversed = false;
2986
2987 sanity_check_seg_type(sbi, seg_type);
2988
2989 /* f2fs_need_SSR() already forces to do this */
2990 if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
2991 curseg->next_segno = segno;
2992 return 1;
2993 }
2994
2995 /* For node segments, let's do SSR more intensively */
2996 if (IS_NODESEG(seg_type)) {
2997 if (seg_type >= CURSEG_WARM_NODE) {
2998 reversed = true;
2999 i = CURSEG_COLD_NODE;
3000 } else {
3001 i = CURSEG_HOT_NODE;
3002 }
3003 cnt = NR_CURSEG_NODE_TYPE;
3004 } else {
3005 if (seg_type >= CURSEG_WARM_DATA) {
3006 reversed = true;
3007 i = CURSEG_COLD_DATA;
3008 } else {
3009 i = CURSEG_HOT_DATA;
3010 }
3011 cnt = NR_CURSEG_DATA_TYPE;
3012 }
3013
3014 for (; cnt-- > 0; reversed ? i-- : i++) {
3015 if (i == seg_type)
3016 continue;
3017 if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
3018 curseg->next_segno = segno;
3019 return 1;
3020 }
3021 }
3022
3023 /* find valid_blocks=0 in dirty list */
3024 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
3025 segno = get_free_segment(sbi);
3026 if (segno != NULL_SEGNO) {
3027 curseg->next_segno = segno;
3028 return 1;
3029 }
3030 }
3031 return 0;
3032 }
3033
need_new_seg(struct f2fs_sb_info *sbi, int type)3034 static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
3035 {
3036 struct curseg_info *curseg = CURSEG_I(sbi, type);
3037
3038 if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
3039 curseg->seg_type == CURSEG_WARM_NODE)
3040 return true;
3041 if (curseg->alloc_type == LFS &&
3042 is_next_segment_free(sbi, curseg, type) &&
3043 likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3044 return true;
3045 if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
3046 return true;
3047 return false;
3048 }
3049
f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end)3050 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
3051 unsigned int start, unsigned int end)
3052 {
3053 struct curseg_info *curseg = CURSEG_I(sbi, type);
3054 unsigned int segno;
3055
3056 f2fs_down_read(&SM_I(sbi)->curseg_lock);
3057 mutex_lock(&curseg->curseg_mutex);
3058 down_write(&SIT_I(sbi)->sentry_lock);
3059
3060 segno = CURSEG_I(sbi, type)->segno;
3061 if (segno < start || segno > end)
3062 goto unlock;
3063
3064 if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
3065 change_curseg(sbi, type);
3066 else
3067 new_curseg(sbi, type, true);
3068
3069 stat_inc_seg_type(sbi, curseg);
3070
3071 locate_dirty_segment(sbi, segno);
3072 unlock:
3073 up_write(&SIT_I(sbi)->sentry_lock);
3074
3075 if (segno != curseg->segno)
3076 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
3077 type, segno, curseg->segno);
3078
3079 mutex_unlock(&curseg->curseg_mutex);
3080 f2fs_up_read(&SM_I(sbi)->curseg_lock);
3081 }
3082
__allocate_new_segment(struct f2fs_sb_info *sbi, int type, bool new_sec, bool force)3083 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
3084 bool new_sec, bool force)
3085 {
3086 struct curseg_info *curseg = CURSEG_I(sbi, type);
3087 unsigned int old_segno;
3088
3089 if (!force && curseg->inited &&
3090 !curseg->next_blkoff &&
3091 !get_valid_blocks(sbi, curseg->segno, new_sec) &&
3092 !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
3093 return;
3094
3095 old_segno = curseg->segno;
3096 new_curseg(sbi, type, true);
3097 stat_inc_seg_type(sbi, curseg);
3098 locate_dirty_segment(sbi, old_segno);
3099 }
3100
f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)3101 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
3102 {
3103 f2fs_down_read(&SM_I(sbi)->curseg_lock);
3104 down_write(&SIT_I(sbi)->sentry_lock);
3105 __allocate_new_segment(sbi, type, true, force);
3106 up_write(&SIT_I(sbi)->sentry_lock);
3107 f2fs_up_read(&SM_I(sbi)->curseg_lock);
3108 }
3109
f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)3110 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
3111 {
3112 int i;
3113
3114 f2fs_down_read(&SM_I(sbi)->curseg_lock);
3115 down_write(&SIT_I(sbi)->sentry_lock);
3116 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
3117 __allocate_new_segment(sbi, i, false, false);
3118 up_write(&SIT_I(sbi)->sentry_lock);
3119 f2fs_up_read(&SM_I(sbi)->curseg_lock);
3120 }
3121
f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)3122 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3123 struct cp_control *cpc)
3124 {
3125 __u64 trim_start = cpc->trim_start;
3126 bool has_candidate = false;
3127
3128 down_write(&SIT_I(sbi)->sentry_lock);
3129 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3130 if (add_discard_addrs(sbi, cpc, true)) {
3131 has_candidate = true;
3132 break;
3133 }
3134 }
3135 up_write(&SIT_I(sbi)->sentry_lock);
3136
3137 cpc->trim_start = trim_start;
3138 return has_candidate;
3139 }
3140
__issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, unsigned int start, unsigned int end)3141 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3142 struct discard_policy *dpolicy,
3143 unsigned int start, unsigned int end)
3144 {
3145 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3146 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3147 struct rb_node **insert_p = NULL, *insert_parent = NULL;
3148 struct discard_cmd *dc;
3149 struct blk_plug plug;
3150 int issued;
3151 unsigned int trimmed = 0;
3152
3153 next:
3154 issued = 0;
3155
3156 mutex_lock(&dcc->cmd_lock);
3157 if (unlikely(dcc->rbtree_check))
3158 f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
3159
3160 dc = __lookup_discard_cmd_ret(&dcc->root, start,
3161 &prev_dc, &next_dc, &insert_p, &insert_parent);
3162 if (!dc)
3163 dc = next_dc;
3164
3165 blk_start_plug(&plug);
3166
3167 while (dc && dc->di.lstart <= end) {
3168 struct rb_node *node;
3169 int err = 0;
3170
3171 if (dc->di.len < dpolicy->granularity)
3172 goto skip;
3173
3174 if (dc->state != D_PREP) {
3175 list_move_tail(&dc->list, &dcc->fstrim_list);
3176 goto skip;
3177 }
3178
3179 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3180
3181 if (issued >= dpolicy->max_requests) {
3182 start = dc->di.lstart + dc->di.len;
3183
3184 if (err)
3185 __remove_discard_cmd(sbi, dc);
3186
3187 blk_finish_plug(&plug);
3188 mutex_unlock(&dcc->cmd_lock);
3189 trimmed += __wait_all_discard_cmd(sbi, NULL);
3190 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
3191 goto next;
3192 }
3193 skip:
3194 node = rb_next(&dc->rb_node);
3195 if (err)
3196 __remove_discard_cmd(sbi, dc);
3197 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3198
3199 if (fatal_signal_pending(current))
3200 break;
3201 }
3202
3203 blk_finish_plug(&plug);
3204 mutex_unlock(&dcc->cmd_lock);
3205
3206 return trimmed;
3207 }
3208
f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)3209 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3210 {
3211 __u64 start = F2FS_BYTES_TO_BLK(range->start);
3212 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3213 unsigned int start_segno, end_segno;
3214 block_t start_block, end_block;
3215 struct cp_control cpc;
3216 struct discard_policy dpolicy;
3217 unsigned long long trimmed = 0;
3218 int err = 0;
3219 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3220
3221 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3222 return -EINVAL;
3223
3224 if (end < MAIN_BLKADDR(sbi))
3225 goto out;
3226
3227 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3228 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3229 return -EFSCORRUPTED;
3230 }
3231
3232 /* start/end segment number in main_area */
3233 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3234 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3235 GET_SEGNO(sbi, end);
3236 if (need_align) {
3237 start_segno = rounddown(start_segno, sbi->segs_per_sec);
3238 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
3239 }
3240
3241 cpc.reason = CP_DISCARD;
3242 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3243 cpc.trim_start = start_segno;
3244 cpc.trim_end = end_segno;
3245
3246 if (sbi->discard_blks == 0)
3247 goto out;
3248
3249 f2fs_down_write(&sbi->gc_lock);
3250 stat_inc_cp_call_count(sbi, TOTAL_CALL);
3251 err = f2fs_write_checkpoint(sbi, &cpc);
3252 f2fs_up_write(&sbi->gc_lock);
3253 if (err)
3254 goto out;
3255
3256 /*
3257 * We filed discard candidates, but actually we don't need to wait for
3258 * all of them, since they'll be issued in idle time along with runtime
3259 * discard option. User configuration looks like using runtime discard
3260 * or periodic fstrim instead of it.
3261 */
3262 if (f2fs_realtime_discard_enable(sbi))
3263 goto out;
3264
3265 start_block = START_BLOCK(sbi, start_segno);
3266 end_block = START_BLOCK(sbi, end_segno + 1);
3267
3268 __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3269 trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3270 start_block, end_block);
3271
3272 trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3273 start_block, end_block);
3274 out:
3275 if (!err)
3276 range->len = F2FS_BLK_TO_BYTES(trimmed);
3277 return err;
3278 }
3279
f2fs_rw_hint_to_seg_type(enum rw_hint hint)3280 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
3281 {
3282 switch (hint) {
3283 case WRITE_LIFE_SHORT:
3284 return CURSEG_HOT_DATA;
3285 case WRITE_LIFE_EXTREME:
3286 return CURSEG_COLD_DATA;
3287 default:
3288 return CURSEG_WARM_DATA;
3289 }
3290 }
3291
__get_segment_type_2(struct f2fs_io_info *fio)3292 static int __get_segment_type_2(struct f2fs_io_info *fio)
3293 {
3294 if (fio->type == DATA)
3295 return CURSEG_HOT_DATA;
3296 else
3297 return CURSEG_HOT_NODE;
3298 }
3299
__get_segment_type_4(struct f2fs_io_info *fio)3300 static int __get_segment_type_4(struct f2fs_io_info *fio)
3301 {
3302 if (fio->type == DATA) {
3303 struct inode *inode = fio->page->mapping->host;
3304
3305 if (S_ISDIR(inode->i_mode))
3306 return CURSEG_HOT_DATA;
3307 else
3308 return CURSEG_COLD_DATA;
3309 } else {
3310 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3311 return CURSEG_WARM_NODE;
3312 else
3313 return CURSEG_COLD_NODE;
3314 }
3315 }
3316
__get_age_segment_type(struct inode *inode, pgoff_t pgofs)3317 static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3318 {
3319 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3320 struct extent_info ei = {};
3321
3322 if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3323 if (!ei.age)
3324 return NO_CHECK_TYPE;
3325 if (ei.age <= sbi->hot_data_age_threshold)
3326 return CURSEG_HOT_DATA;
3327 if (ei.age <= sbi->warm_data_age_threshold)
3328 return CURSEG_WARM_DATA;
3329 return CURSEG_COLD_DATA;
3330 }
3331 return NO_CHECK_TYPE;
3332 }
3333
__get_segment_type_6(struct f2fs_io_info *fio)3334 static int __get_segment_type_6(struct f2fs_io_info *fio)
3335 {
3336 if (fio->type == DATA) {
3337 struct inode *inode = fio->page->mapping->host;
3338 int type;
3339
3340 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3341 return CURSEG_COLD_DATA_PINNED;
3342
3343 if (page_private_gcing(fio->page)) {
3344 if (fio->sbi->am.atgc_enabled &&
3345 (fio->io_type == FS_DATA_IO) &&
3346 (fio->sbi->gc_mode != GC_URGENT_HIGH))
3347 return CURSEG_ALL_DATA_ATGC;
3348 else
3349 return CURSEG_COLD_DATA;
3350 }
3351 if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3352 return CURSEG_COLD_DATA;
3353
3354 type = __get_age_segment_type(inode, fio->page->index);
3355 if (type != NO_CHECK_TYPE)
3356 return type;
3357
3358 if (file_is_hot(inode) ||
3359 is_inode_flag_set(inode, FI_HOT_DATA) ||
3360 f2fs_is_cow_file(inode))
3361 return CURSEG_HOT_DATA;
3362 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3363 } else {
3364 if (IS_DNODE(fio->page))
3365 return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3366 CURSEG_HOT_NODE;
3367 return CURSEG_COLD_NODE;
3368 }
3369 }
3370
__get_segment_type(struct f2fs_io_info *fio)3371 static int __get_segment_type(struct f2fs_io_info *fio)
3372 {
3373 int type = 0;
3374
3375 switch (F2FS_OPTION(fio->sbi).active_logs) {
3376 case 2:
3377 type = __get_segment_type_2(fio);
3378 break;
3379 case 4:
3380 type = __get_segment_type_4(fio);
3381 break;
3382 case 6:
3383 type = __get_segment_type_6(fio);
3384 break;
3385 default:
3386 f2fs_bug_on(fio->sbi, true);
3387 }
3388
3389 if (IS_HOT(type))
3390 fio->temp = HOT;
3391 else if (IS_WARM(type))
3392 fio->temp = WARM;
3393 else
3394 fio->temp = COLD;
3395 return type;
3396 }
3397
f2fs_randomize_chunk(struct f2fs_sb_info *sbi, struct curseg_info *seg)3398 static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
3399 struct curseg_info *seg)
3400 {
3401 /* To allocate block chunks in different sizes, use random number */
3402 if (--seg->fragment_remained_chunk > 0)
3403 return;
3404
3405 seg->fragment_remained_chunk =
3406 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
3407 seg->next_blkoff +=
3408 get_random_u32_inclusive(1, sbi->max_fragment_hole);
3409 }
3410
f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio)3411 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3412 block_t old_blkaddr, block_t *new_blkaddr,
3413 struct f2fs_summary *sum, int type,
3414 struct f2fs_io_info *fio)
3415 {
3416 struct sit_info *sit_i = SIT_I(sbi);
3417 struct curseg_info *curseg = CURSEG_I(sbi, type);
3418 unsigned long long old_mtime;
3419 bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3420 struct seg_entry *se = NULL;
3421 bool segment_full = false;
3422
3423 f2fs_down_read(&SM_I(sbi)->curseg_lock);
3424
3425 mutex_lock(&curseg->curseg_mutex);
3426 down_write(&sit_i->sentry_lock);
3427
3428 if (from_gc) {
3429 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3430 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3431 sanity_check_seg_type(sbi, se->type);
3432 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3433 }
3434 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3435
3436 f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
3437
3438 f2fs_wait_discard_bio(sbi, *new_blkaddr);
3439
3440 curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3441 if (curseg->alloc_type == SSR) {
3442 curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
3443 } else {
3444 curseg->next_blkoff++;
3445 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
3446 f2fs_randomize_chunk(sbi, curseg);
3447 }
3448 if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
3449 segment_full = true;
3450 stat_inc_block_count(sbi, curseg);
3451
3452 if (from_gc) {
3453 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3454 } else {
3455 update_segment_mtime(sbi, old_blkaddr, 0);
3456 old_mtime = 0;
3457 }
3458 update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3459
3460 /*
3461 * SIT information should be updated before segment allocation,
3462 * since SSR needs latest valid block information.
3463 */
3464 update_sit_entry(sbi, *new_blkaddr, 1);
3465 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3466 update_sit_entry(sbi, old_blkaddr, -1);
3467
3468 /*
3469 * If the current segment is full, flush it out and replace it with a
3470 * new segment.
3471 */
3472 if (segment_full) {
3473 if (from_gc) {
3474 get_atssr_segment(sbi, type, se->type,
3475 AT_SSR, se->mtime);
3476 } else {
3477 if (need_new_seg(sbi, type))
3478 new_curseg(sbi, type, false);
3479 else
3480 change_curseg(sbi, type);
3481 stat_inc_seg_type(sbi, curseg);
3482 }
3483 }
3484 /*
3485 * segment dirty status should be updated after segment allocation,
3486 * so we just need to update status only one time after previous
3487 * segment being closed.
3488 */
3489 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3490 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3491
3492 if (IS_DATASEG(curseg->seg_type))
3493 atomic64_inc(&sbi->allocated_data_blocks);
3494
3495 up_write(&sit_i->sentry_lock);
3496
3497 if (page && IS_NODESEG(curseg->seg_type)) {
3498 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3499
3500 f2fs_inode_chksum_set(sbi, page);
3501 }
3502
3503 if (fio) {
3504 struct f2fs_bio_info *io;
3505
3506 if (F2FS_IO_ALIGNED(sbi))
3507 fio->retry = 0;
3508
3509 INIT_LIST_HEAD(&fio->list);
3510 fio->in_list = 1;
3511 io = sbi->write_io[fio->type] + fio->temp;
3512 spin_lock(&io->io_lock);
3513 list_add_tail(&fio->list, &io->io_list);
3514 spin_unlock(&io->io_lock);
3515 }
3516
3517 mutex_unlock(&curseg->curseg_mutex);
3518
3519 f2fs_up_read(&SM_I(sbi)->curseg_lock);
3520 }
3521
f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, block_t blkaddr, unsigned int blkcnt)3522 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3523 block_t blkaddr, unsigned int blkcnt)
3524 {
3525 if (!f2fs_is_multi_device(sbi))
3526 return;
3527
3528 while (1) {
3529 unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3530 unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
3531
3532 /* update device state for fsync */
3533 f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
3534
3535 /* update device state for checkpoint */
3536 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3537 spin_lock(&sbi->dev_lock);
3538 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3539 spin_unlock(&sbi->dev_lock);
3540 }
3541
3542 if (blkcnt <= blks)
3543 break;
3544 blkcnt -= blks;
3545 blkaddr += blks;
3546 }
3547 }
3548
do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)3549 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3550 {
3551 int type = __get_segment_type(fio);
3552 bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3553
3554 if (keep_order)
3555 f2fs_down_read(&fio->sbi->io_order_lock);
3556 reallocate:
3557 f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3558 &fio->new_blkaddr, sum, type, fio);
3559 if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3560 f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
3561
3562 /* writeout dirty page into bdev */
3563 f2fs_submit_page_write(fio);
3564 if (fio->retry) {
3565 fio->old_blkaddr = fio->new_blkaddr;
3566 goto reallocate;
3567 }
3568
3569 f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
3570
3571 if (keep_order)
3572 f2fs_up_read(&fio->sbi->io_order_lock);
3573 }
3574
f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, enum iostat_type io_type)3575 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3576 enum iostat_type io_type)
3577 {
3578 struct f2fs_io_info fio = {
3579 .sbi = sbi,
3580 .type = META,
3581 .temp = HOT,
3582 .op = REQ_OP_WRITE,
3583 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3584 .old_blkaddr = page->index,
3585 .new_blkaddr = page->index,
3586 .page = page,
3587 .encrypted_page = NULL,
3588 .in_list = 0,
3589 };
3590
3591 if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3592 fio.op_flags &= ~REQ_META;
3593
3594 set_page_writeback(page);
3595 f2fs_submit_page_write(&fio);
3596
3597 stat_inc_meta_count(sbi, page->index);
3598 f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
3599 }
3600
f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)3601 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3602 {
3603 struct f2fs_summary sum;
3604
3605 set_summary(&sum, nid, 0, 0);
3606 do_write_page(&sum, fio);
3607
3608 f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
3609 }
3610
f2fs_outplace_write_data(struct dnode_of_data *dn, struct f2fs_io_info *fio)3611 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3612 struct f2fs_io_info *fio)
3613 {
3614 struct f2fs_sb_info *sbi = fio->sbi;
3615 struct f2fs_summary sum;
3616
3617 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3618 if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3619 f2fs_update_age_extent_cache(dn);
3620 set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3621 do_write_page(&sum, fio);
3622 f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3623
3624 f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
3625 }
3626
f2fs_inplace_write_data(struct f2fs_io_info *fio)3627 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3628 {
3629 int err;
3630 struct f2fs_sb_info *sbi = fio->sbi;
3631 unsigned int segno;
3632
3633 fio->new_blkaddr = fio->old_blkaddr;
3634 /* i/o temperature is needed for passing down write hints */
3635 __get_segment_type(fio);
3636
3637 segno = GET_SEGNO(sbi, fio->new_blkaddr);
3638
3639 if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3640 set_sbi_flag(sbi, SBI_NEED_FSCK);
3641 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3642 __func__, segno);
3643 err = -EFSCORRUPTED;
3644 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
3645 goto drop_bio;
3646 }
3647
3648 if (f2fs_cp_error(sbi)) {
3649 err = -EIO;
3650 goto drop_bio;
3651 }
3652
3653 if (fio->post_read)
3654 f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
3655
3656 stat_inc_inplace_blocks(fio->sbi);
3657
3658 if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
3659 err = f2fs_merge_page_bio(fio);
3660 else
3661 err = f2fs_submit_page_bio(fio);
3662 if (!err) {
3663 f2fs_update_device_state(fio->sbi, fio->ino,
3664 fio->new_blkaddr, 1);
3665 f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
3666 fio->io_type, F2FS_BLKSIZE);
3667 }
3668
3669 return err;
3670 drop_bio:
3671 if (fio->bio && *(fio->bio)) {
3672 struct bio *bio = *(fio->bio);
3673
3674 bio->bi_status = BLK_STS_IOERR;
3675 bio_endio(bio);
3676 *(fio->bio) = NULL;
3677 }
3678 return err;
3679 }
3680
__f2fs_get_curseg(struct f2fs_sb_info *sbi, unsigned int segno)3681 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3682 unsigned int segno)
3683 {
3684 int i;
3685
3686 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3687 if (CURSEG_I(sbi, i)->segno == segno)
3688 break;
3689 }
3690 return i;
3691 }
3692
f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr, bool from_gc)3693 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3694 block_t old_blkaddr, block_t new_blkaddr,
3695 bool recover_curseg, bool recover_newaddr,
3696 bool from_gc)
3697 {
3698 struct sit_info *sit_i = SIT_I(sbi);
3699 struct curseg_info *curseg;
3700 unsigned int segno, old_cursegno;
3701 struct seg_entry *se;
3702 int type;
3703 unsigned short old_blkoff;
3704 unsigned char old_alloc_type;
3705
3706 segno = GET_SEGNO(sbi, new_blkaddr);
3707 se = get_seg_entry(sbi, segno);
3708 type = se->type;
3709
3710 f2fs_down_write(&SM_I(sbi)->curseg_lock);
3711
3712 if (!recover_curseg) {
3713 /* for recovery flow */
3714 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3715 if (old_blkaddr == NULL_ADDR)
3716 type = CURSEG_COLD_DATA;
3717 else
3718 type = CURSEG_WARM_DATA;
3719 }
3720 } else {
3721 if (IS_CURSEG(sbi, segno)) {
3722 /* se->type is volatile as SSR allocation */
3723 type = __f2fs_get_curseg(sbi, segno);
3724 f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3725 } else {
3726 type = CURSEG_WARM_DATA;
3727 }
3728 }
3729
3730 f2fs_bug_on(sbi, !IS_DATASEG(type));
3731 curseg = CURSEG_I(sbi, type);
3732
3733 mutex_lock(&curseg->curseg_mutex);
3734 down_write(&sit_i->sentry_lock);
3735
3736 old_cursegno = curseg->segno;
3737 old_blkoff = curseg->next_blkoff;
3738 old_alloc_type = curseg->alloc_type;
3739
3740 /* change the current segment */
3741 if (segno != curseg->segno) {
3742 curseg->next_segno = segno;
3743 change_curseg(sbi, type);
3744 }
3745
3746 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3747 curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3748
3749 if (!recover_curseg || recover_newaddr) {
3750 if (!from_gc)
3751 update_segment_mtime(sbi, new_blkaddr, 0);
3752 update_sit_entry(sbi, new_blkaddr, 1);
3753 }
3754 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3755 f2fs_invalidate_internal_cache(sbi, old_blkaddr);
3756 if (!from_gc)
3757 update_segment_mtime(sbi, old_blkaddr, 0);
3758 update_sit_entry(sbi, old_blkaddr, -1);
3759 }
3760
3761 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3762 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3763
3764 locate_dirty_segment(sbi, old_cursegno);
3765
3766 if (recover_curseg) {
3767 if (old_cursegno != curseg->segno) {
3768 curseg->next_segno = old_cursegno;
3769 change_curseg(sbi, type);
3770 }
3771 curseg->next_blkoff = old_blkoff;
3772 curseg->alloc_type = old_alloc_type;
3773 }
3774
3775 up_write(&sit_i->sentry_lock);
3776 mutex_unlock(&curseg->curseg_mutex);
3777 f2fs_up_write(&SM_I(sbi)->curseg_lock);
3778 }
3779
f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr)3780 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3781 block_t old_addr, block_t new_addr,
3782 unsigned char version, bool recover_curseg,
3783 bool recover_newaddr)
3784 {
3785 struct f2fs_summary sum;
3786
3787 set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3788
3789 f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3790 recover_curseg, recover_newaddr, false);
3791
3792 f2fs_update_data_blkaddr(dn, new_addr);
3793 }
3794
f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked)3795 void f2fs_wait_on_page_writeback(struct page *page,
3796 enum page_type type, bool ordered, bool locked)
3797 {
3798 if (PageWriteback(page)) {
3799 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3800
3801 /* submit cached LFS IO */
3802 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3803 /* submit cached IPU IO */
3804 f2fs_submit_merged_ipu_write(sbi, NULL, page);
3805 if (ordered) {
3806 wait_on_page_writeback(page);
3807 f2fs_bug_on(sbi, locked && PageWriteback(page));
3808 } else {
3809 wait_for_stable_page(page);
3810 }
3811 }
3812 }
3813
f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)3814 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3815 {
3816 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3817 struct page *cpage;
3818
3819 if (!f2fs_post_read_required(inode))
3820 return;
3821
3822 if (!__is_valid_data_blkaddr(blkaddr))
3823 return;
3824
3825 cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3826 if (cpage) {
3827 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3828 f2fs_put_page(cpage, 1);
3829 }
3830 }
3831
f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len)3832 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3833 block_t len)
3834 {
3835 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3836 block_t i;
3837
3838 if (!f2fs_post_read_required(inode))
3839 return;
3840
3841 for (i = 0; i < len; i++)
3842 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3843
3844 f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
3845 }
3846
read_compacted_summaries(struct f2fs_sb_info *sbi)3847 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3848 {
3849 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3850 struct curseg_info *seg_i;
3851 unsigned char *kaddr;
3852 struct page *page;
3853 block_t start;
3854 int i, j, offset;
3855
3856 start = start_sum_block(sbi);
3857
3858 page = f2fs_get_meta_page(sbi, start++);
3859 if (IS_ERR(page))
3860 return PTR_ERR(page);
3861 kaddr = (unsigned char *)page_address(page);
3862
3863 /* Step 1: restore nat cache */
3864 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3865 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3866
3867 /* Step 2: restore sit cache */
3868 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3869 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3870 offset = 2 * SUM_JOURNAL_SIZE;
3871
3872 /* Step 3: restore summary entries */
3873 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3874 unsigned short blk_off;
3875 unsigned int segno;
3876
3877 seg_i = CURSEG_I(sbi, i);
3878 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3879 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3880 seg_i->next_segno = segno;
3881 reset_curseg(sbi, i, 0);
3882 seg_i->alloc_type = ckpt->alloc_type[i];
3883 seg_i->next_blkoff = blk_off;
3884
3885 if (seg_i->alloc_type == SSR)
3886 blk_off = sbi->blocks_per_seg;
3887
3888 for (j = 0; j < blk_off; j++) {
3889 struct f2fs_summary *s;
3890
3891 s = (struct f2fs_summary *)(kaddr + offset);
3892 seg_i->sum_blk->entries[j] = *s;
3893 offset += SUMMARY_SIZE;
3894 if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3895 SUM_FOOTER_SIZE)
3896 continue;
3897
3898 f2fs_put_page(page, 1);
3899 page = NULL;
3900
3901 page = f2fs_get_meta_page(sbi, start++);
3902 if (IS_ERR(page))
3903 return PTR_ERR(page);
3904 kaddr = (unsigned char *)page_address(page);
3905 offset = 0;
3906 }
3907 }
3908 f2fs_put_page(page, 1);
3909 return 0;
3910 }
3911
read_normal_summaries(struct f2fs_sb_info *sbi, int type)3912 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3913 {
3914 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3915 struct f2fs_summary_block *sum;
3916 struct curseg_info *curseg;
3917 struct page *new;
3918 unsigned short blk_off;
3919 unsigned int segno = 0;
3920 block_t blk_addr = 0;
3921 int err = 0;
3922
3923 /* get segment number and block addr */
3924 if (IS_DATASEG(type)) {
3925 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3926 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3927 CURSEG_HOT_DATA]);
3928 if (__exist_node_summaries(sbi))
3929 blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
3930 else
3931 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3932 } else {
3933 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3934 CURSEG_HOT_NODE]);
3935 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3936 CURSEG_HOT_NODE]);
3937 if (__exist_node_summaries(sbi))
3938 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3939 type - CURSEG_HOT_NODE);
3940 else
3941 blk_addr = GET_SUM_BLOCK(sbi, segno);
3942 }
3943
3944 new = f2fs_get_meta_page(sbi, blk_addr);
3945 if (IS_ERR(new))
3946 return PTR_ERR(new);
3947 sum = (struct f2fs_summary_block *)page_address(new);
3948
3949 if (IS_NODESEG(type)) {
3950 if (__exist_node_summaries(sbi)) {
3951 struct f2fs_summary *ns = &sum->entries[0];
3952 int i;
3953
3954 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3955 ns->version = 0;
3956 ns->ofs_in_node = 0;
3957 }
3958 } else {
3959 err = f2fs_restore_node_summary(sbi, segno, sum);
3960 if (err)
3961 goto out;
3962 }
3963 }
3964
3965 /* set uncompleted segment to curseg */
3966 curseg = CURSEG_I(sbi, type);
3967 mutex_lock(&curseg->curseg_mutex);
3968
3969 /* update journal info */
3970 down_write(&curseg->journal_rwsem);
3971 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3972 up_write(&curseg->journal_rwsem);
3973
3974 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3975 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3976 curseg->next_segno = segno;
3977 reset_curseg(sbi, type, 0);
3978 curseg->alloc_type = ckpt->alloc_type[type];
3979 curseg->next_blkoff = blk_off;
3980 mutex_unlock(&curseg->curseg_mutex);
3981 out:
3982 f2fs_put_page(new, 1);
3983 return err;
3984 }
3985
restore_curseg_summaries(struct f2fs_sb_info *sbi)3986 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3987 {
3988 struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3989 struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3990 int type = CURSEG_HOT_DATA;
3991 int err;
3992
3993 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3994 int npages = f2fs_npages_for_summary_flush(sbi, true);
3995
3996 if (npages >= 2)
3997 f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3998 META_CP, true);
3999
4000 /* restore for compacted data summary */
4001 err = read_compacted_summaries(sbi);
4002 if (err)
4003 return err;
4004 type = CURSEG_HOT_NODE;
4005 }
4006
4007 if (__exist_node_summaries(sbi))
4008 f2fs_ra_meta_pages(sbi,
4009 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
4010 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
4011
4012 for (; type <= CURSEG_COLD_NODE; type++) {
4013 err = read_normal_summaries(sbi, type);
4014 if (err)
4015 return err;
4016 }
4017
4018 /* sanity check for summary blocks */
4019 if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
4020 sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
4021 f2fs_err(sbi, "invalid journal entries nats %u sits %u",
4022 nats_in_cursum(nat_j), sits_in_cursum(sit_j));
4023 return -EINVAL;
4024 }
4025
4026 return 0;
4027 }
4028
write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)4029 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
4030 {
4031 struct page *page;
4032 unsigned char *kaddr;
4033 struct f2fs_summary *summary;
4034 struct curseg_info *seg_i;
4035 int written_size = 0;
4036 int i, j;
4037
4038 page = f2fs_grab_meta_page(sbi, blkaddr++);
4039 kaddr = (unsigned char *)page_address(page);
4040 memset(kaddr, 0, PAGE_SIZE);
4041
4042 /* Step 1: write nat cache */
4043 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4044 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4045 written_size += SUM_JOURNAL_SIZE;
4046
4047 /* Step 2: write sit cache */
4048 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4049 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4050 written_size += SUM_JOURNAL_SIZE;
4051
4052 /* Step 3: write summary entries */
4053 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4054 seg_i = CURSEG_I(sbi, i);
4055 for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
4056 if (!page) {
4057 page = f2fs_grab_meta_page(sbi, blkaddr++);
4058 kaddr = (unsigned char *)page_address(page);
4059 memset(kaddr, 0, PAGE_SIZE);
4060 written_size = 0;
4061 }
4062 summary = (struct f2fs_summary *)(kaddr + written_size);
4063 *summary = seg_i->sum_blk->entries[j];
4064 written_size += SUMMARY_SIZE;
4065
4066 if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4067 SUM_FOOTER_SIZE)
4068 continue;
4069
4070 set_page_dirty(page);
4071 f2fs_put_page(page, 1);
4072 page = NULL;
4073 }
4074 }
4075 if (page) {
4076 set_page_dirty(page);
4077 f2fs_put_page(page, 1);
4078 }
4079 }
4080
write_normal_summaries(struct f2fs_sb_info *sbi, block_t blkaddr, int type)4081 static void write_normal_summaries(struct f2fs_sb_info *sbi,
4082 block_t blkaddr, int type)
4083 {
4084 int i, end;
4085
4086 if (IS_DATASEG(type))
4087 end = type + NR_CURSEG_DATA_TYPE;
4088 else
4089 end = type + NR_CURSEG_NODE_TYPE;
4090
4091 for (i = type; i < end; i++)
4092 write_current_sum_page(sbi, i, blkaddr + (i - type));
4093 }
4094
f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)4095 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4096 {
4097 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4098 write_compacted_summaries(sbi, start_blk);
4099 else
4100 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4101 }
4102
f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)4103 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4104 {
4105 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4106 }
4107
f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc)4108 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4109 unsigned int val, int alloc)
4110 {
4111 int i;
4112
4113 if (type == NAT_JOURNAL) {
4114 for (i = 0; i < nats_in_cursum(journal); i++) {
4115 if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4116 return i;
4117 }
4118 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4119 return update_nats_in_cursum(journal, 1);
4120 } else if (type == SIT_JOURNAL) {
4121 for (i = 0; i < sits_in_cursum(journal); i++)
4122 if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4123 return i;
4124 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4125 return update_sits_in_cursum(journal, 1);
4126 }
4127 return -1;
4128 }
4129
get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno)4130 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4131 unsigned int segno)
4132 {
4133 return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4134 }
4135
get_next_sit_page(struct f2fs_sb_info *sbi, unsigned int start)4136 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4137 unsigned int start)
4138 {
4139 struct sit_info *sit_i = SIT_I(sbi);
4140 struct page *page;
4141 pgoff_t src_off, dst_off;
4142
4143 src_off = current_sit_addr(sbi, start);
4144 dst_off = next_sit_addr(sbi, src_off);
4145
4146 page = f2fs_grab_meta_page(sbi, dst_off);
4147 seg_info_to_sit_page(sbi, page, start);
4148
4149 set_page_dirty(page);
4150 set_to_next_sit(sit_i, start);
4151
4152 return page;
4153 }
4154
grab_sit_entry_set(void)4155 static struct sit_entry_set *grab_sit_entry_set(void)
4156 {
4157 struct sit_entry_set *ses =
4158 f2fs_kmem_cache_alloc(sit_entry_set_slab,
4159 GFP_NOFS, true, NULL);
4160
4161 ses->entry_cnt = 0;
4162 INIT_LIST_HEAD(&ses->set_list);
4163 return ses;
4164 }
4165
release_sit_entry_set(struct sit_entry_set *ses)4166 static void release_sit_entry_set(struct sit_entry_set *ses)
4167 {
4168 list_del(&ses->set_list);
4169 kmem_cache_free(sit_entry_set_slab, ses);
4170 }
4171
adjust_sit_entry_set(struct sit_entry_set *ses, struct list_head *head)4172 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4173 struct list_head *head)
4174 {
4175 struct sit_entry_set *next = ses;
4176
4177 if (list_is_last(&ses->set_list, head))
4178 return;
4179
4180 list_for_each_entry_continue(next, head, set_list)
4181 if (ses->entry_cnt <= next->entry_cnt) {
4182 list_move_tail(&ses->set_list, &next->set_list);
4183 return;
4184 }
4185
4186 list_move_tail(&ses->set_list, head);
4187 }
4188
add_sit_entry(unsigned int segno, struct list_head *head)4189 static void add_sit_entry(unsigned int segno, struct list_head *head)
4190 {
4191 struct sit_entry_set *ses;
4192 unsigned int start_segno = START_SEGNO(segno);
4193
4194 list_for_each_entry(ses, head, set_list) {
4195 if (ses->start_segno == start_segno) {
4196 ses->entry_cnt++;
4197 adjust_sit_entry_set(ses, head);
4198 return;
4199 }
4200 }
4201
4202 ses = grab_sit_entry_set();
4203
4204 ses->start_segno = start_segno;
4205 ses->entry_cnt++;
4206 list_add(&ses->set_list, head);
4207 }
4208
add_sits_in_set(struct f2fs_sb_info *sbi)4209 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4210 {
4211 struct f2fs_sm_info *sm_info = SM_I(sbi);
4212 struct list_head *set_list = &sm_info->sit_entry_set;
4213 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4214 unsigned int segno;
4215
4216 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4217 add_sit_entry(segno, set_list);
4218 }
4219
remove_sits_in_journal(struct f2fs_sb_info *sbi)4220 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4221 {
4222 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4223 struct f2fs_journal *journal = curseg->journal;
4224 int i;
4225
4226 down_write(&curseg->journal_rwsem);
4227 for (i = 0; i < sits_in_cursum(journal); i++) {
4228 unsigned int segno;
4229 bool dirtied;
4230
4231 segno = le32_to_cpu(segno_in_journal(journal, i));
4232 dirtied = __mark_sit_entry_dirty(sbi, segno);
4233
4234 if (!dirtied)
4235 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4236 }
4237 update_sits_in_cursum(journal, -i);
4238 up_write(&curseg->journal_rwsem);
4239 }
4240
4241 /*
4242 * CP calls this function, which flushes SIT entries including sit_journal,
4243 * and moves prefree segs to free segs.
4244 */
f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)4245 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4246 {
4247 struct sit_info *sit_i = SIT_I(sbi);
4248 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4249 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4250 struct f2fs_journal *journal = curseg->journal;
4251 struct sit_entry_set *ses, *tmp;
4252 struct list_head *head = &SM_I(sbi)->sit_entry_set;
4253 bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4254 struct seg_entry *se;
4255
4256 down_write(&sit_i->sentry_lock);
4257
4258 if (!sit_i->dirty_sentries)
4259 goto out;
4260
4261 /*
4262 * add and account sit entries of dirty bitmap in sit entry
4263 * set temporarily
4264 */
4265 add_sits_in_set(sbi);
4266
4267 /*
4268 * if there are no enough space in journal to store dirty sit
4269 * entries, remove all entries from journal and add and account
4270 * them in sit entry set.
4271 */
4272 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4273 !to_journal)
4274 remove_sits_in_journal(sbi);
4275
4276 /*
4277 * there are two steps to flush sit entries:
4278 * #1, flush sit entries to journal in current cold data summary block.
4279 * #2, flush sit entries to sit page.
4280 */
4281 list_for_each_entry_safe(ses, tmp, head, set_list) {
4282 struct page *page = NULL;
4283 struct f2fs_sit_block *raw_sit = NULL;
4284 unsigned int start_segno = ses->start_segno;
4285 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4286 (unsigned long)MAIN_SEGS(sbi));
4287 unsigned int segno = start_segno;
4288
4289 if (to_journal &&
4290 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4291 to_journal = false;
4292
4293 if (to_journal) {
4294 down_write(&curseg->journal_rwsem);
4295 } else {
4296 page = get_next_sit_page(sbi, start_segno);
4297 raw_sit = page_address(page);
4298 }
4299
4300 /* flush dirty sit entries in region of current sit set */
4301 for_each_set_bit_from(segno, bitmap, end) {
4302 int offset, sit_offset;
4303
4304 se = get_seg_entry(sbi, segno);
4305 #ifdef CONFIG_F2FS_CHECK_FS
4306 if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4307 SIT_VBLOCK_MAP_SIZE))
4308 f2fs_bug_on(sbi, 1);
4309 #endif
4310
4311 /* add discard candidates */
4312 if (!(cpc->reason & CP_DISCARD)) {
4313 cpc->trim_start = segno;
4314 add_discard_addrs(sbi, cpc, false);
4315 }
4316
4317 if (to_journal) {
4318 offset = f2fs_lookup_journal_in_cursum(journal,
4319 SIT_JOURNAL, segno, 1);
4320 f2fs_bug_on(sbi, offset < 0);
4321 segno_in_journal(journal, offset) =
4322 cpu_to_le32(segno);
4323 seg_info_to_raw_sit(se,
4324 &sit_in_journal(journal, offset));
4325 check_block_count(sbi, segno,
4326 &sit_in_journal(journal, offset));
4327 } else {
4328 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4329 seg_info_to_raw_sit(se,
4330 &raw_sit->entries[sit_offset]);
4331 check_block_count(sbi, segno,
4332 &raw_sit->entries[sit_offset]);
4333 }
4334
4335 __clear_bit(segno, bitmap);
4336 sit_i->dirty_sentries--;
4337 ses->entry_cnt--;
4338 }
4339
4340 if (to_journal)
4341 up_write(&curseg->journal_rwsem);
4342 else
4343 f2fs_put_page(page, 1);
4344
4345 f2fs_bug_on(sbi, ses->entry_cnt);
4346 release_sit_entry_set(ses);
4347 }
4348
4349 f2fs_bug_on(sbi, !list_empty(head));
4350 f2fs_bug_on(sbi, sit_i->dirty_sentries);
4351 out:
4352 if (cpc->reason & CP_DISCARD) {
4353 __u64 trim_start = cpc->trim_start;
4354
4355 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4356 add_discard_addrs(sbi, cpc, false);
4357
4358 cpc->trim_start = trim_start;
4359 }
4360 up_write(&sit_i->sentry_lock);
4361
4362 set_prefree_as_free_segments(sbi);
4363 }
4364
build_sit_info(struct f2fs_sb_info *sbi)4365 static int build_sit_info(struct f2fs_sb_info *sbi)
4366 {
4367 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4368 struct sit_info *sit_i;
4369 unsigned int sit_segs, start;
4370 char *src_bitmap, *bitmap;
4371 unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4372 unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
4373
4374 /* allocate memory for SIT information */
4375 sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4376 if (!sit_i)
4377 return -ENOMEM;
4378
4379 SM_I(sbi)->sit_info = sit_i;
4380
4381 sit_i->sentries =
4382 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4383 MAIN_SEGS(sbi)),
4384 GFP_KERNEL);
4385 if (!sit_i->sentries)
4386 return -ENOMEM;
4387
4388 main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4389 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4390 GFP_KERNEL);
4391 if (!sit_i->dirty_sentries_bitmap)
4392 return -ENOMEM;
4393
4394 #ifdef CONFIG_F2FS_CHECK_FS
4395 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
4396 #else
4397 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
4398 #endif
4399 sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4400 if (!sit_i->bitmap)
4401 return -ENOMEM;
4402
4403 bitmap = sit_i->bitmap;
4404
4405 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4406 sit_i->sentries[start].cur_valid_map = bitmap;
4407 bitmap += SIT_VBLOCK_MAP_SIZE;
4408
4409 sit_i->sentries[start].ckpt_valid_map = bitmap;
4410 bitmap += SIT_VBLOCK_MAP_SIZE;
4411
4412 #ifdef CONFIG_F2FS_CHECK_FS
4413 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4414 bitmap += SIT_VBLOCK_MAP_SIZE;
4415 #endif
4416
4417 if (discard_map) {
4418 sit_i->sentries[start].discard_map = bitmap;
4419 bitmap += SIT_VBLOCK_MAP_SIZE;
4420 }
4421 }
4422
4423 sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4424 if (!sit_i->tmp_map)
4425 return -ENOMEM;
4426
4427 if (__is_large_section(sbi)) {
4428 sit_i->sec_entries =
4429 f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4430 MAIN_SECS(sbi)),
4431 GFP_KERNEL);
4432 if (!sit_i->sec_entries)
4433 return -ENOMEM;
4434 }
4435
4436 /* get information related with SIT */
4437 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4438
4439 /* setup SIT bitmap from ckeckpoint pack */
4440 sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4441 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4442
4443 sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4444 if (!sit_i->sit_bitmap)
4445 return -ENOMEM;
4446
4447 #ifdef CONFIG_F2FS_CHECK_FS
4448 sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4449 sit_bitmap_size, GFP_KERNEL);
4450 if (!sit_i->sit_bitmap_mir)
4451 return -ENOMEM;
4452
4453 sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4454 main_bitmap_size, GFP_KERNEL);
4455 if (!sit_i->invalid_segmap)
4456 return -ENOMEM;
4457 #endif
4458
4459 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4460 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4461 sit_i->written_valid_blocks = 0;
4462 sit_i->bitmap_size = sit_bitmap_size;
4463 sit_i->dirty_sentries = 0;
4464 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4465 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4466 sit_i->mounted_time = ktime_get_boottime_seconds();
4467 init_rwsem(&sit_i->sentry_lock);
4468 return 0;
4469 }
4470
build_free_segmap(struct f2fs_sb_info *sbi)4471 static int build_free_segmap(struct f2fs_sb_info *sbi)
4472 {
4473 struct free_segmap_info *free_i;
4474 unsigned int bitmap_size, sec_bitmap_size;
4475
4476 /* allocate memory for free segmap information */
4477 free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4478 if (!free_i)
4479 return -ENOMEM;
4480
4481 SM_I(sbi)->free_info = free_i;
4482
4483 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4484 free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4485 if (!free_i->free_segmap)
4486 return -ENOMEM;
4487
4488 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4489 free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4490 if (!free_i->free_secmap)
4491 return -ENOMEM;
4492
4493 /* set all segments as dirty temporarily */
4494 memset(free_i->free_segmap, 0xff, bitmap_size);
4495 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4496
4497 /* init free segmap information */
4498 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4499 free_i->free_segments = 0;
4500 free_i->free_sections = 0;
4501 spin_lock_init(&free_i->segmap_lock);
4502 return 0;
4503 }
4504
build_curseg(struct f2fs_sb_info *sbi)4505 static int build_curseg(struct f2fs_sb_info *sbi)
4506 {
4507 struct curseg_info *array;
4508 int i;
4509
4510 array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4511 sizeof(*array)), GFP_KERNEL);
4512 if (!array)
4513 return -ENOMEM;
4514
4515 SM_I(sbi)->curseg_array = array;
4516
4517 for (i = 0; i < NO_CHECK_TYPE; i++) {
4518 mutex_init(&array[i].curseg_mutex);
4519 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4520 if (!array[i].sum_blk)
4521 return -ENOMEM;
4522 init_rwsem(&array[i].journal_rwsem);
4523 array[i].journal = f2fs_kzalloc(sbi,
4524 sizeof(struct f2fs_journal), GFP_KERNEL);
4525 if (!array[i].journal)
4526 return -ENOMEM;
4527 if (i < NR_PERSISTENT_LOG)
4528 array[i].seg_type = CURSEG_HOT_DATA + i;
4529 else if (i == CURSEG_COLD_DATA_PINNED)
4530 array[i].seg_type = CURSEG_COLD_DATA;
4531 else if (i == CURSEG_ALL_DATA_ATGC)
4532 array[i].seg_type = CURSEG_COLD_DATA;
4533 array[i].segno = NULL_SEGNO;
4534 array[i].next_blkoff = 0;
4535 array[i].inited = false;
4536 }
4537 return restore_curseg_summaries(sbi);
4538 }
4539
build_sit_entries(struct f2fs_sb_info *sbi)4540 static int build_sit_entries(struct f2fs_sb_info *sbi)
4541 {
4542 struct sit_info *sit_i = SIT_I(sbi);
4543 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4544 struct f2fs_journal *journal = curseg->journal;
4545 struct seg_entry *se;
4546 struct f2fs_sit_entry sit;
4547 int sit_blk_cnt = SIT_BLK_CNT(sbi);
4548 unsigned int i, start, end;
4549 unsigned int readed, start_blk = 0;
4550 int err = 0;
4551 block_t sit_valid_blocks[2] = {0, 0};
4552
4553 do {
4554 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
4555 META_SIT, true);
4556
4557 start = start_blk * sit_i->sents_per_block;
4558 end = (start_blk + readed) * sit_i->sents_per_block;
4559
4560 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4561 struct f2fs_sit_block *sit_blk;
4562 struct page *page;
4563
4564 se = &sit_i->sentries[start];
4565 page = get_current_sit_page(sbi, start);
4566 if (IS_ERR(page))
4567 return PTR_ERR(page);
4568 sit_blk = (struct f2fs_sit_block *)page_address(page);
4569 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4570 f2fs_put_page(page, 1);
4571
4572 err = check_block_count(sbi, start, &sit);
4573 if (err)
4574 return err;
4575 seg_info_from_raw_sit(se, &sit);
4576
4577 if (se->type >= NR_PERSISTENT_LOG) {
4578 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4579 se->type, start);
4580 f2fs_handle_error(sbi,
4581 ERROR_INCONSISTENT_SUM_TYPE);
4582 return -EFSCORRUPTED;
4583 }
4584
4585 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4586
4587 if (f2fs_block_unit_discard(sbi)) {
4588 /* build discard map only one time */
4589 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4590 memset(se->discard_map, 0xff,
4591 SIT_VBLOCK_MAP_SIZE);
4592 } else {
4593 memcpy(se->discard_map,
4594 se->cur_valid_map,
4595 SIT_VBLOCK_MAP_SIZE);
4596 sbi->discard_blks +=
4597 sbi->blocks_per_seg -
4598 se->valid_blocks;
4599 }
4600 }
4601
4602 if (__is_large_section(sbi))
4603 get_sec_entry(sbi, start)->valid_blocks +=
4604 se->valid_blocks;
4605 }
4606 start_blk += readed;
4607 } while (start_blk < sit_blk_cnt);
4608
4609 down_read(&curseg->journal_rwsem);
4610 for (i = 0; i < sits_in_cursum(journal); i++) {
4611 unsigned int old_valid_blocks;
4612
4613 start = le32_to_cpu(segno_in_journal(journal, i));
4614 if (start >= MAIN_SEGS(sbi)) {
4615 f2fs_err(sbi, "Wrong journal entry on segno %u",
4616 start);
4617 err = -EFSCORRUPTED;
4618 f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
4619 break;
4620 }
4621
4622 se = &sit_i->sentries[start];
4623 sit = sit_in_journal(journal, i);
4624
4625 old_valid_blocks = se->valid_blocks;
4626
4627 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4628
4629 err = check_block_count(sbi, start, &sit);
4630 if (err)
4631 break;
4632 seg_info_from_raw_sit(se, &sit);
4633
4634 if (se->type >= NR_PERSISTENT_LOG) {
4635 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4636 se->type, start);
4637 err = -EFSCORRUPTED;
4638 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
4639 break;
4640 }
4641
4642 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4643
4644 if (f2fs_block_unit_discard(sbi)) {
4645 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4646 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4647 } else {
4648 memcpy(se->discard_map, se->cur_valid_map,
4649 SIT_VBLOCK_MAP_SIZE);
4650 sbi->discard_blks += old_valid_blocks;
4651 sbi->discard_blks -= se->valid_blocks;
4652 }
4653 }
4654
4655 if (__is_large_section(sbi)) {
4656 get_sec_entry(sbi, start)->valid_blocks +=
4657 se->valid_blocks;
4658 get_sec_entry(sbi, start)->valid_blocks -=
4659 old_valid_blocks;
4660 }
4661 }
4662 up_read(&curseg->journal_rwsem);
4663
4664 if (err)
4665 return err;
4666
4667 if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4668 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4669 sit_valid_blocks[NODE], valid_node_count(sbi));
4670 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
4671 return -EFSCORRUPTED;
4672 }
4673
4674 if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4675 valid_user_blocks(sbi)) {
4676 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4677 sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4678 valid_user_blocks(sbi));
4679 f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
4680 return -EFSCORRUPTED;
4681 }
4682
4683 return 0;
4684 }
4685
init_free_segmap(struct f2fs_sb_info *sbi)4686 static void init_free_segmap(struct f2fs_sb_info *sbi)
4687 {
4688 unsigned int start;
4689 int type;
4690 struct seg_entry *sentry;
4691
4692 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4693 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4694 continue;
4695 sentry = get_seg_entry(sbi, start);
4696 if (!sentry->valid_blocks)
4697 __set_free(sbi, start);
4698 else
4699 SIT_I(sbi)->written_valid_blocks +=
4700 sentry->valid_blocks;
4701 }
4702
4703 /* set use the current segments */
4704 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4705 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4706
4707 __set_test_and_inuse(sbi, curseg_t->segno);
4708 }
4709 }
4710
init_dirty_segmap(struct f2fs_sb_info *sbi)4711 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4712 {
4713 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4714 struct free_segmap_info *free_i = FREE_I(sbi);
4715 unsigned int segno = 0, offset = 0, secno;
4716 block_t valid_blocks, usable_blks_in_seg;
4717
4718 while (1) {
4719 /* find dirty segment based on free segmap */
4720 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4721 if (segno >= MAIN_SEGS(sbi))
4722 break;
4723 offset = segno + 1;
4724 valid_blocks = get_valid_blocks(sbi, segno, false);
4725 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4726 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4727 continue;
4728 if (valid_blocks > usable_blks_in_seg) {
4729 f2fs_bug_on(sbi, 1);
4730 continue;
4731 }
4732 mutex_lock(&dirty_i->seglist_lock);
4733 __locate_dirty_segment(sbi, segno, DIRTY);
4734 mutex_unlock(&dirty_i->seglist_lock);
4735 }
4736
4737 if (!__is_large_section(sbi))
4738 return;
4739
4740 mutex_lock(&dirty_i->seglist_lock);
4741 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4742 valid_blocks = get_valid_blocks(sbi, segno, true);
4743 secno = GET_SEC_FROM_SEG(sbi, segno);
4744
4745 if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
4746 continue;
4747 if (IS_CURSEC(sbi, secno))
4748 continue;
4749 set_bit(secno, dirty_i->dirty_secmap);
4750 }
4751 mutex_unlock(&dirty_i->seglist_lock);
4752 }
4753
init_victim_secmap(struct f2fs_sb_info *sbi)4754 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4755 {
4756 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4757 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4758
4759 dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4760 if (!dirty_i->victim_secmap)
4761 return -ENOMEM;
4762
4763 dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4764 if (!dirty_i->pinned_secmap)
4765 return -ENOMEM;
4766
4767 dirty_i->pinned_secmap_cnt = 0;
4768 dirty_i->enable_pin_section = true;
4769 return 0;
4770 }
4771
build_dirty_segmap(struct f2fs_sb_info *sbi)4772 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4773 {
4774 struct dirty_seglist_info *dirty_i;
4775 unsigned int bitmap_size, i;
4776
4777 /* allocate memory for dirty segments list information */
4778 dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4779 GFP_KERNEL);
4780 if (!dirty_i)
4781 return -ENOMEM;
4782
4783 SM_I(sbi)->dirty_info = dirty_i;
4784 mutex_init(&dirty_i->seglist_lock);
4785
4786 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4787
4788 for (i = 0; i < NR_DIRTY_TYPE; i++) {
4789 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4790 GFP_KERNEL);
4791 if (!dirty_i->dirty_segmap[i])
4792 return -ENOMEM;
4793 }
4794
4795 if (__is_large_section(sbi)) {
4796 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4797 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
4798 bitmap_size, GFP_KERNEL);
4799 if (!dirty_i->dirty_secmap)
4800 return -ENOMEM;
4801 }
4802
4803 init_dirty_segmap(sbi);
4804 return init_victim_secmap(sbi);
4805 }
4806
sanity_check_curseg(struct f2fs_sb_info *sbi)4807 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4808 {
4809 int i;
4810
4811 /*
4812 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4813 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4814 */
4815 for (i = 0; i < NR_PERSISTENT_LOG; i++) {
4816 struct curseg_info *curseg = CURSEG_I(sbi, i);
4817 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4818 unsigned int blkofs = curseg->next_blkoff;
4819
4820 if (f2fs_sb_has_readonly(sbi) &&
4821 i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
4822 continue;
4823
4824 sanity_check_seg_type(sbi, curseg->seg_type);
4825
4826 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
4827 f2fs_err(sbi,
4828 "Current segment has invalid alloc_type:%d",
4829 curseg->alloc_type);
4830 f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4831 return -EFSCORRUPTED;
4832 }
4833
4834 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4835 goto out;
4836
4837 if (curseg->alloc_type == SSR)
4838 continue;
4839
4840 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4841 if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4842 continue;
4843 out:
4844 f2fs_err(sbi,
4845 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4846 i, curseg->segno, curseg->alloc_type,
4847 curseg->next_blkoff, blkofs);
4848 f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4849 return -EFSCORRUPTED;
4850 }
4851 }
4852 return 0;
4853 }
4854
4855 #ifdef CONFIG_BLK_DEV_ZONED
4856
check_zone_write_pointer(struct f2fs_sb_info *sbi, struct f2fs_dev_info *fdev, struct blk_zone *zone)4857 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
4858 struct f2fs_dev_info *fdev,
4859 struct blk_zone *zone)
4860 {
4861 unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
4862 block_t zone_block, wp_block, last_valid_block;
4863 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4864 int i, s, b, ret;
4865 struct seg_entry *se;
4866
4867 if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4868 return 0;
4869
4870 wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
4871 wp_segno = GET_SEGNO(sbi, wp_block);
4872 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4873 zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
4874 zone_segno = GET_SEGNO(sbi, zone_block);
4875 zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
4876
4877 if (zone_segno >= MAIN_SEGS(sbi))
4878 return 0;
4879
4880 /*
4881 * Skip check of zones cursegs point to, since
4882 * fix_curseg_write_pointer() checks them.
4883 */
4884 for (i = 0; i < NO_CHECK_TYPE; i++)
4885 if (zone_secno == GET_SEC_FROM_SEG(sbi,
4886 CURSEG_I(sbi, i)->segno))
4887 return 0;
4888
4889 /*
4890 * Get last valid block of the zone.
4891 */
4892 last_valid_block = zone_block - 1;
4893 for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
4894 segno = zone_segno + s;
4895 se = get_seg_entry(sbi, segno);
4896 for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
4897 if (f2fs_test_bit(b, se->cur_valid_map)) {
4898 last_valid_block = START_BLOCK(sbi, segno) + b;
4899 break;
4900 }
4901 if (last_valid_block >= zone_block)
4902 break;
4903 }
4904
4905 /*
4906 * The write pointer matches with the valid blocks or
4907 * already points to the end of the zone.
4908 */
4909 if ((last_valid_block + 1 == wp_block) ||
4910 (zone->wp == zone->start + zone->len))
4911 return 0;
4912
4913 if (last_valid_block + 1 == zone_block) {
4914 /*
4915 * If there is no valid block in the zone and if write pointer
4916 * is not at zone start, reset the write pointer.
4917 */
4918 f2fs_notice(sbi,
4919 "Zone without valid block has non-zero write "
4920 "pointer. Reset the write pointer: wp[0x%x,0x%x]",
4921 wp_segno, wp_blkoff);
4922 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
4923 zone->len >> log_sectors_per_block);
4924 if (ret)
4925 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4926 fdev->path, ret);
4927
4928 return ret;
4929 }
4930
4931 /*
4932 * If there are valid blocks and the write pointer doesn't
4933 * match with them, we need to report the inconsistency and
4934 * fill the zone till the end to close the zone. This inconsistency
4935 * does not cause write error because the zone will not be selected
4936 * for write operation until it get discarded.
4937 */
4938 f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: "
4939 "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
4940 GET_SEGNO(sbi, last_valid_block),
4941 GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
4942 wp_segno, wp_blkoff);
4943
4944 ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
4945 zone->start, zone->len, GFP_NOFS);
4946 if (ret == -EOPNOTSUPP) {
4947 ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
4948 zone->len - (zone->wp - zone->start),
4949 GFP_NOFS, 0);
4950 if (ret)
4951 f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
4952 fdev->path, ret);
4953 } else if (ret) {
4954 f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)",
4955 fdev->path, ret);
4956 }
4957
4958 return ret;
4959 }
4960
get_target_zoned_dev(struct f2fs_sb_info *sbi, block_t zone_blkaddr)4961 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
4962 block_t zone_blkaddr)
4963 {
4964 int i;
4965
4966 for (i = 0; i < sbi->s_ndevs; i++) {
4967 if (!bdev_is_zoned(FDEV(i).bdev))
4968 continue;
4969 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
4970 zone_blkaddr <= FDEV(i).end_blk))
4971 return &FDEV(i);
4972 }
4973
4974 return NULL;
4975 }
4976
report_one_zone_cb(struct blk_zone *zone, unsigned int idx, void *data)4977 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
4978 void *data)
4979 {
4980 memcpy(data, zone, sizeof(struct blk_zone));
4981 return 0;
4982 }
4983
fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)4984 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
4985 {
4986 struct curseg_info *cs = CURSEG_I(sbi, type);
4987 struct f2fs_dev_info *zbd;
4988 struct blk_zone zone;
4989 unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
4990 block_t cs_zone_block, wp_block;
4991 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4992 sector_t zone_sector;
4993 int err;
4994
4995 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4996 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4997
4998 zbd = get_target_zoned_dev(sbi, cs_zone_block);
4999 if (!zbd)
5000 return 0;
5001
5002 /* report zone for the sector the curseg points to */
5003 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5004 << log_sectors_per_block;
5005 err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5006 report_one_zone_cb, &zone);
5007 if (err != 1) {
5008 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5009 zbd->path, err);
5010 return err;
5011 }
5012
5013 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5014 return 0;
5015
5016 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
5017 wp_segno = GET_SEGNO(sbi, wp_block);
5018 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
5019 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
5020
5021 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
5022 wp_sector_off == 0)
5023 return 0;
5024
5025 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
5026 "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
5027 type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
5028
5029 f2fs_notice(sbi, "Assign new section to curseg[%d]: "
5030 "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
5031
5032 f2fs_allocate_new_section(sbi, type, true);
5033
5034 /* check consistency of the zone curseg pointed to */
5035 if (check_zone_write_pointer(sbi, zbd, &zone))
5036 return -EIO;
5037
5038 /* check newly assigned zone */
5039 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5040 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5041
5042 zbd = get_target_zoned_dev(sbi, cs_zone_block);
5043 if (!zbd)
5044 return 0;
5045
5046 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5047 << log_sectors_per_block;
5048 err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5049 report_one_zone_cb, &zone);
5050 if (err != 1) {
5051 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5052 zbd->path, err);
5053 return err;
5054 }
5055
5056 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5057 return 0;
5058
5059 if (zone.wp != zone.start) {
5060 f2fs_notice(sbi,
5061 "New zone for curseg[%d] is not yet discarded. "
5062 "Reset the zone: curseg[0x%x,0x%x]",
5063 type, cs->segno, cs->next_blkoff);
5064 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
5065 zone.len >> log_sectors_per_block);
5066 if (err) {
5067 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5068 zbd->path, err);
5069 return err;
5070 }
5071 }
5072
5073 return 0;
5074 }
5075
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)5076 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5077 {
5078 int i, ret;
5079
5080 for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5081 ret = fix_curseg_write_pointer(sbi, i);
5082 if (ret)
5083 return ret;
5084 }
5085
5086 return 0;
5087 }
5088
5089 struct check_zone_write_pointer_args {
5090 struct f2fs_sb_info *sbi;
5091 struct f2fs_dev_info *fdev;
5092 };
5093
check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, void *data)5094 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5095 void *data)
5096 {
5097 struct check_zone_write_pointer_args *args;
5098
5099 args = (struct check_zone_write_pointer_args *)data;
5100
5101 return check_zone_write_pointer(args->sbi, args->fdev, zone);
5102 }
5103
f2fs_check_write_pointer(struct f2fs_sb_info *sbi)5104 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5105 {
5106 int i, ret;
5107 struct check_zone_write_pointer_args args;
5108
5109 for (i = 0; i < sbi->s_ndevs; i++) {
5110 if (!bdev_is_zoned(FDEV(i).bdev))
5111 continue;
5112
5113 args.sbi = sbi;
5114 args.fdev = &FDEV(i);
5115 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5116 check_zone_write_pointer_cb, &args);
5117 if (ret < 0)
5118 return ret;
5119 }
5120
5121 return 0;
5122 }
5123
5124 /*
5125 * Return the number of usable blocks in a segment. The number of blocks
5126 * returned is always equal to the number of blocks in a segment for
5127 * segments fully contained within a sequential zone capacity or a
5128 * conventional zone. For segments partially contained in a sequential
5129 * zone capacity, the number of usable blocks up to the zone capacity
5130 * is returned. 0 is returned in all other cases.
5131 */
f2fs_usable_zone_blks_in_seg( struct f2fs_sb_info *sbi, unsigned int segno)5132 static inline unsigned int f2fs_usable_zone_blks_in_seg(
5133 struct f2fs_sb_info *sbi, unsigned int segno)
5134 {
5135 block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5136 unsigned int secno;
5137
5138 if (!sbi->unusable_blocks_per_sec)
5139 return sbi->blocks_per_seg;
5140
5141 secno = GET_SEC_FROM_SEG(sbi, segno);
5142 seg_start = START_BLOCK(sbi, segno);
5143 sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5144 sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5145
5146 /*
5147 * If segment starts before zone capacity and spans beyond
5148 * zone capacity, then usable blocks are from seg start to
5149 * zone capacity. If the segment starts after the zone capacity,
5150 * then there are no usable blocks.
5151 */
5152 if (seg_start >= sec_cap_blkaddr)
5153 return 0;
5154 if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
5155 return sec_cap_blkaddr - seg_start;
5156
5157 return sbi->blocks_per_seg;
5158 }
5159 #else
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)5160 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5161 {
5162 return 0;
5163 }
5164
f2fs_check_write_pointer(struct f2fs_sb_info *sbi)5165 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5166 {
5167 return 0;
5168 }
5169
f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno)5170 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5171 unsigned int segno)
5172 {
5173 return 0;
5174 }
5175
5176 #endif
f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno)5177 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5178 unsigned int segno)
5179 {
5180 if (f2fs_sb_has_blkzoned(sbi))
5181 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5182
5183 return sbi->blocks_per_seg;
5184 }
5185
f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, unsigned int segno)5186 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
5187 unsigned int segno)
5188 {
5189 if (f2fs_sb_has_blkzoned(sbi))
5190 return CAP_SEGS_PER_SEC(sbi);
5191
5192 return sbi->segs_per_sec;
5193 }
5194
5195 /*
5196 * Update min, max modified time for cost-benefit GC algorithm
5197 */
init_min_max_mtime(struct f2fs_sb_info *sbi)5198 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5199 {
5200 struct sit_info *sit_i = SIT_I(sbi);
5201 unsigned int segno;
5202
5203 down_write(&sit_i->sentry_lock);
5204
5205 sit_i->min_mtime = ULLONG_MAX;
5206
5207 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
5208 unsigned int i;
5209 unsigned long long mtime = 0;
5210
5211 for (i = 0; i < sbi->segs_per_sec; i++)
5212 mtime += get_seg_entry(sbi, segno + i)->mtime;
5213
5214 mtime = div_u64(mtime, sbi->segs_per_sec);
5215
5216 if (sit_i->min_mtime > mtime)
5217 sit_i->min_mtime = mtime;
5218 }
5219 sit_i->max_mtime = get_mtime(sbi, false);
5220 sit_i->dirty_max_mtime = 0;
5221 up_write(&sit_i->sentry_lock);
5222 }
5223
f2fs_build_segment_manager(struct f2fs_sb_info *sbi)5224 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5225 {
5226 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5227 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5228 struct f2fs_sm_info *sm_info;
5229 int err;
5230
5231 sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5232 if (!sm_info)
5233 return -ENOMEM;
5234
5235 /* init sm info */
5236 sbi->sm_info = sm_info;
5237 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5238 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5239 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5240 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5241 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5242 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5243 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5244 sm_info->rec_prefree_segments = sm_info->main_segments *
5245 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5246 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5247 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5248
5249 if (!f2fs_lfs_mode(sbi))
5250 sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
5251 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5252 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5253 sm_info->min_seq_blocks = sbi->blocks_per_seg;
5254 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5255 sm_info->min_ssr_sections = reserved_sections(sbi);
5256
5257 INIT_LIST_HEAD(&sm_info->sit_entry_set);
5258
5259 init_f2fs_rwsem(&sm_info->curseg_lock);
5260
5261 err = f2fs_create_flush_cmd_control(sbi);
5262 if (err)
5263 return err;
5264
5265 err = create_discard_cmd_control(sbi);
5266 if (err)
5267 return err;
5268
5269 err = build_sit_info(sbi);
5270 if (err)
5271 return err;
5272 err = build_free_segmap(sbi);
5273 if (err)
5274 return err;
5275 err = build_curseg(sbi);
5276 if (err)
5277 return err;
5278
5279 /* reinit free segmap based on SIT */
5280 err = build_sit_entries(sbi);
5281 if (err)
5282 return err;
5283
5284 init_free_segmap(sbi);
5285 err = build_dirty_segmap(sbi);
5286 if (err)
5287 return err;
5288
5289 err = sanity_check_curseg(sbi);
5290 if (err)
5291 return err;
5292
5293 init_min_max_mtime(sbi);
5294 return 0;
5295 }
5296
discard_dirty_segmap(struct f2fs_sb_info *sbi, enum dirty_type dirty_type)5297 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5298 enum dirty_type dirty_type)
5299 {
5300 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5301
5302 mutex_lock(&dirty_i->seglist_lock);
5303 kvfree(dirty_i->dirty_segmap[dirty_type]);
5304 dirty_i->nr_dirty[dirty_type] = 0;
5305 mutex_unlock(&dirty_i->seglist_lock);
5306 }
5307
destroy_victim_secmap(struct f2fs_sb_info *sbi)5308 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5309 {
5310 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5311
5312 kvfree(dirty_i->pinned_secmap);
5313 kvfree(dirty_i->victim_secmap);
5314 }
5315
destroy_dirty_segmap(struct f2fs_sb_info *sbi)5316 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5317 {
5318 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5319 int i;
5320
5321 if (!dirty_i)
5322 return;
5323
5324 /* discard pre-free/dirty segments list */
5325 for (i = 0; i < NR_DIRTY_TYPE; i++)
5326 discard_dirty_segmap(sbi, i);
5327
5328 if (__is_large_section(sbi)) {
5329 mutex_lock(&dirty_i->seglist_lock);
5330 kvfree(dirty_i->dirty_secmap);
5331 mutex_unlock(&dirty_i->seglist_lock);
5332 }
5333
5334 destroy_victim_secmap(sbi);
5335 SM_I(sbi)->dirty_info = NULL;
5336 kfree(dirty_i);
5337 }
5338
destroy_curseg(struct f2fs_sb_info *sbi)5339 static void destroy_curseg(struct f2fs_sb_info *sbi)
5340 {
5341 struct curseg_info *array = SM_I(sbi)->curseg_array;
5342 int i;
5343
5344 if (!array)
5345 return;
5346 SM_I(sbi)->curseg_array = NULL;
5347 for (i = 0; i < NR_CURSEG_TYPE; i++) {
5348 kfree(array[i].sum_blk);
5349 kfree(array[i].journal);
5350 }
5351 kfree(array);
5352 }
5353
destroy_free_segmap(struct f2fs_sb_info *sbi)5354 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5355 {
5356 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5357
5358 if (!free_i)
5359 return;
5360 SM_I(sbi)->free_info = NULL;
5361 kvfree(free_i->free_segmap);
5362 kvfree(free_i->free_secmap);
5363 kfree(free_i);
5364 }
5365
destroy_sit_info(struct f2fs_sb_info *sbi)5366 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5367 {
5368 struct sit_info *sit_i = SIT_I(sbi);
5369
5370 if (!sit_i)
5371 return;
5372
5373 if (sit_i->sentries)
5374 kvfree(sit_i->bitmap);
5375 kfree(sit_i->tmp_map);
5376
5377 kvfree(sit_i->sentries);
5378 kvfree(sit_i->sec_entries);
5379 kvfree(sit_i->dirty_sentries_bitmap);
5380
5381 SM_I(sbi)->sit_info = NULL;
5382 kvfree(sit_i->sit_bitmap);
5383 #ifdef CONFIG_F2FS_CHECK_FS
5384 kvfree(sit_i->sit_bitmap_mir);
5385 kvfree(sit_i->invalid_segmap);
5386 #endif
5387 kfree(sit_i);
5388 }
5389
f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)5390 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5391 {
5392 struct f2fs_sm_info *sm_info = SM_I(sbi);
5393
5394 if (!sm_info)
5395 return;
5396 f2fs_destroy_flush_cmd_control(sbi, true);
5397 destroy_discard_cmd_control(sbi);
5398 destroy_dirty_segmap(sbi);
5399 destroy_curseg(sbi);
5400 destroy_free_segmap(sbi);
5401 destroy_sit_info(sbi);
5402 sbi->sm_info = NULL;
5403 kfree(sm_info);
5404 }
5405
f2fs_create_segment_manager_caches(void)5406 int __init f2fs_create_segment_manager_caches(void)
5407 {
5408 discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5409 sizeof(struct discard_entry));
5410 if (!discard_entry_slab)
5411 goto fail;
5412
5413 discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5414 sizeof(struct discard_cmd));
5415 if (!discard_cmd_slab)
5416 goto destroy_discard_entry;
5417
5418 sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5419 sizeof(struct sit_entry_set));
5420 if (!sit_entry_set_slab)
5421 goto destroy_discard_cmd;
5422
5423 revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
5424 sizeof(struct revoke_entry));
5425 if (!revoke_entry_slab)
5426 goto destroy_sit_entry_set;
5427 return 0;
5428
5429 destroy_sit_entry_set:
5430 kmem_cache_destroy(sit_entry_set_slab);
5431 destroy_discard_cmd:
5432 kmem_cache_destroy(discard_cmd_slab);
5433 destroy_discard_entry:
5434 kmem_cache_destroy(discard_entry_slab);
5435 fail:
5436 return -ENOMEM;
5437 }
5438
f2fs_destroy_segment_manager_caches(void)5439 void f2fs_destroy_segment_manager_caches(void)
5440 {
5441 kmem_cache_destroy(sit_entry_set_slab);
5442 kmem_cache_destroy(discard_cmd_slab);
5443 kmem_cache_destroy(discard_entry_slab);
5444 kmem_cache_destroy(revoke_entry_slab);
5445 }
5446