1// SPDX-License-Identifier: GPL-2.0 2/* 3 * fs/f2fs/segment.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8#include <linux/fs.h> 9#include <linux/f2fs_fs.h> 10#include <linux/bio.h> 11#include <linux/blkdev.h> 12#include <linux/prefetch.h> 13#include <linux/kthread.h> 14#include <linux/swap.h> 15#include <linux/timer.h> 16#include <linux/freezer.h> 17#include <linux/sched/signal.h> 18 19#include "f2fs.h" 20#include "segment.h" 21#include "node.h" 22#include "gc.h" 23#include "trace.h" 24#include <trace/events/f2fs.h> 25 26#define __reverse_ffz(x) __reverse_ffs(~(x)) 27 28static struct kmem_cache *discard_entry_slab; 29static struct kmem_cache *discard_cmd_slab; 30static struct kmem_cache *sit_entry_set_slab; 31static struct kmem_cache *inmem_entry_slab; 32 33static struct discard_policy dpolicys[MAX_DPOLICY] = { 34 {DPOLICY_BG, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, 35 MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, 36 {{1, 0}, {0, 0}, {0, 0}}}, 37 {DPOLICY_BALANCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, 38 MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_BL, 39 {{1, 0}, {2, 50}, {0, 0}}}, 40 {DPOLICY_FORCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, 41 MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_FORCE, 42 {{1, 0}, {2, 50}, {4, 2000}}}, 43 {DPOLICY_FSTRIM, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, 44 MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_FORCE, 45 {{8, 0}, {8, 0}, {8, 0}}}, 46 {DPOLICY_UMOUNT, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, 47 MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, 48 {{UINT_MAX, 0}, {0, 0}, {0, 0}}} 49}; 50 51static unsigned long __reverse_ulong(unsigned char *str) 52{ 53 unsigned long tmp = 0; 54 int shift = 24, idx = 0; 55 56#if BITS_PER_LONG == 64 57 shift = 56; 58#endif 59 while (shift >= 0) { 60 tmp |= (unsigned long)str[idx++] << shift; 61 shift -= BITS_PER_BYTE; 62 } 63 return tmp; 64} 65 66/* 67 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 68 * MSB and LSB are reversed in a byte by f2fs_set_bit. 69 */ 70static inline unsigned long __reverse_ffs(unsigned long word) 71{ 72 int num = 0; 73 74#if BITS_PER_LONG == 64 75 if ((word & 0xffffffff00000000UL) == 0) 76 num += 32; 77 else 78 word >>= 32; 79#endif 80 if ((word & 0xffff0000) == 0) 81 num += 16; 82 else 83 word >>= 16; 84 85 if ((word & 0xff00) == 0) 86 num += 8; 87 else 88 word >>= 8; 89 90 if ((word & 0xf0) == 0) 91 num += 4; 92 else 93 word >>= 4; 94 95 if ((word & 0xc) == 0) 96 num += 2; 97 else 98 word >>= 2; 99 100 if ((word & 0x2) == 0) 101 num += 1; 102 return num; 103} 104 105/* 106 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because 107 * f2fs_set_bit makes MSB and LSB reversed in a byte. 108 * @size must be integral times of unsigned long. 109 * Example: 110 * MSB <--> LSB 111 * f2fs_set_bit(0, bitmap) => 1000 0000 112 * f2fs_set_bit(7, bitmap) => 0000 0001 113 */ 114unsigned long find_rev_next_bit(const unsigned long *addr, 115 unsigned long size, unsigned long offset) 116{ 117 const unsigned long *p = addr + BIT_WORD(offset); 118 unsigned long result = size; 119 unsigned long tmp; 120 121 if (offset >= size) 122 return size; 123 124 size -= (offset & ~(BITS_PER_LONG - 1)); 125 offset %= BITS_PER_LONG; 126 127 while (1) { 128 if (*p == 0) 129 goto pass; 130 131 tmp = __reverse_ulong((unsigned char *)p); 132 133 tmp &= ~0UL >> offset; 134 if (size < BITS_PER_LONG) 135 tmp &= (~0UL << (BITS_PER_LONG - size)); 136 if (tmp) 137 goto found; 138pass: 139 if (size <= BITS_PER_LONG) 140 break; 141 size -= BITS_PER_LONG; 142 offset = 0; 143 p++; 144 } 145 return result; 146found: 147 return result - size + __reverse_ffs(tmp); 148} 149 150unsigned long find_rev_next_zero_bit(const unsigned long *addr, 151 unsigned long size, unsigned long offset) 152{ 153 const unsigned long *p = addr + BIT_WORD(offset); 154 unsigned long result = size; 155 unsigned long tmp; 156 157 if (offset >= size) 158 return size; 159 160 size -= (offset & ~(BITS_PER_LONG - 1)); 161 offset %= BITS_PER_LONG; 162 163 while (1) { 164 if (*p == ~0UL) 165 goto pass; 166 167 tmp = __reverse_ulong((unsigned char *)p); 168 169 if (offset) 170 tmp |= ~0UL << (BITS_PER_LONG - offset); 171 if (size < BITS_PER_LONG) 172 tmp |= ~0UL >> size; 173 if (tmp != ~0UL) 174 goto found; 175pass: 176 if (size <= BITS_PER_LONG) 177 break; 178 size -= BITS_PER_LONG; 179 offset = 0; 180 p++; 181 } 182 return result; 183found: 184 return result - size + __reverse_ffz(tmp); 185} 186 187bool f2fs_need_SSR(struct f2fs_sb_info *sbi) 188{ 189 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 190 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 191 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); 192 193 if (f2fs_lfs_mode(sbi)) 194 return false; 195 if (sbi->gc_mode == GC_URGENT_HIGH) 196 return true; 197 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 198 return true; 199 200 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + 201 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); 202} 203 204#ifdef CONFIG_F2FS_GRADING_SSR 205static bool need_ssr_by_type(struct f2fs_sb_info *sbi, int type, int contig_level) 206{ 207 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 208 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 209 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); 210 u64 valid_blocks = sbi->total_valid_block_count; 211 u64 total_blocks = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; 212 u64 left_space = (total_blocks - valid_blocks) << 2; 213 unsigned int free_segs = free_segments(sbi); 214 unsigned int ovp_segments = overprovision_segments(sbi); 215 unsigned int lower_limit = 0; 216 unsigned int waterline = 0; 217 int dirty_sum = node_secs + 2 * dent_secs + imeta_secs; 218 219 if (sbi->hot_cold_params.enable == GRADING_SSR_OFF) 220 return f2fs_need_SSR(sbi); 221 if (f2fs_lfs_mode(sbi)) 222 return false; 223 if (sbi->gc_mode == GC_URGENT_HIGH) 224 return true; 225 if (contig_level == SEQ_256BLKS && type == CURSEG_WARM_DATA && 226 free_sections(sbi) > dirty_sum + 3 * reserved_sections(sbi) / 2) 227 return false; 228 if (free_sections(sbi) <= (unsigned int)(dirty_sum + 2 * reserved_sections(sbi))) 229 return true; 230 if (contig_level >= SEQ_32BLKS || total_blocks <= SSR_MIN_BLKS_LIMIT) 231 return false; 232 233 left_space -= ovp_segments * KBS_PER_SEGMENT; 234 if (unlikely(left_space == 0)) 235 return false; 236 237 switch (type) { 238 case CURSEG_HOT_DATA: 239 lower_limit = sbi->hot_cold_params.hot_data_lower_limit; 240 waterline = sbi->hot_cold_params.hot_data_waterline; 241 break; 242 case CURSEG_WARM_DATA: 243 lower_limit = sbi->hot_cold_params.warm_data_lower_limit; 244 waterline = sbi->hot_cold_params.warm_data_waterline; 245 break; 246 case CURSEG_HOT_NODE: 247 lower_limit = sbi->hot_cold_params.hot_node_lower_limit; 248 waterline = sbi->hot_cold_params.hot_node_waterline; 249 break; 250 case CURSEG_WARM_NODE: 251 lower_limit = sbi->hot_cold_params.warm_node_lower_limit; 252 waterline = sbi->hot_cold_params.warm_node_waterline; 253 break; 254 default: 255 return false; 256 } 257 258 if (left_space > lower_limit) 259 return false; 260 261 if (div_u64((free_segs - ovp_segments) * 100, (left_space / KBS_PER_SEGMENT)) 262 <= waterline) { 263 trace_f2fs_grading_ssr_allocate( 264 (le64_to_cpu(sbi->raw_super->block_count) - sbi->total_valid_block_count), 265 free_segments(sbi), contig_level); 266 return true; 267 } else { 268 return false; 269 } 270} 271#endif 272 273void f2fs_register_inmem_page(struct inode *inode, struct page *page) 274{ 275 struct inmem_pages *new; 276 277 f2fs_trace_pid(page); 278 279 f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE); 280 281 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); 282 283 /* add atomic page indices to the list */ 284 new->page = page; 285 INIT_LIST_HEAD(&new->list); 286 287 /* increase reference count with clean state */ 288 get_page(page); 289 mutex_lock(&F2FS_I(inode)->inmem_lock); 290 list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages); 291 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 292 mutex_unlock(&F2FS_I(inode)->inmem_lock); 293 294 trace_f2fs_register_inmem_page(page, INMEM); 295} 296 297static int __revoke_inmem_pages(struct inode *inode, 298 struct list_head *head, bool drop, bool recover, 299 bool trylock) 300{ 301 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 302 struct inmem_pages *cur, *tmp; 303 int err = 0; 304 305 list_for_each_entry_safe(cur, tmp, head, list) { 306 struct page *page = cur->page; 307 308 if (drop) 309 trace_f2fs_commit_inmem_page(page, INMEM_DROP); 310 311 if (trylock) { 312 /* 313 * to avoid deadlock in between page lock and 314 * inmem_lock. 315 */ 316 if (!trylock_page(page)) 317 continue; 318 } else { 319 lock_page(page); 320 } 321 322 f2fs_wait_on_page_writeback(page, DATA, true, true); 323 324 if (recover) { 325 struct dnode_of_data dn; 326 struct node_info ni; 327 328 trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); 329retry: 330 set_new_dnode(&dn, inode, NULL, NULL, 0); 331 err = f2fs_get_dnode_of_data(&dn, page->index, 332 LOOKUP_NODE); 333 if (err) { 334 if (err == -ENOMEM) { 335 congestion_wait(BLK_RW_ASYNC, 336 DEFAULT_IO_TIMEOUT); 337 cond_resched(); 338 goto retry; 339 } 340 err = -EAGAIN; 341 goto next; 342 } 343 344 err = f2fs_get_node_info(sbi, dn.nid, &ni); 345 if (err) { 346 f2fs_put_dnode(&dn); 347 return err; 348 } 349 350 if (cur->old_addr == NEW_ADDR) { 351 f2fs_invalidate_blocks(sbi, dn.data_blkaddr); 352 f2fs_update_data_blkaddr(&dn, NEW_ADDR); 353 } else 354 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 355 cur->old_addr, ni.version, true, true); 356 f2fs_put_dnode(&dn); 357 } 358next: 359 /* we don't need to invalidate this in the sccessful status */ 360 if (drop || recover) { 361 ClearPageUptodate(page); 362 clear_cold_data(page); 363 } 364 f2fs_clear_page_private(page); 365 f2fs_put_page(page, 1); 366 367 list_del(&cur->list); 368 kmem_cache_free(inmem_entry_slab, cur); 369 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 370 } 371 return err; 372} 373 374void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) 375{ 376 struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; 377 struct inode *inode; 378 struct f2fs_inode_info *fi; 379 unsigned int count = sbi->atomic_files; 380 unsigned int looped = 0; 381next: 382 spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 383 if (list_empty(head)) { 384 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 385 return; 386 } 387 fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); 388 inode = igrab(&fi->vfs_inode); 389 if (inode) 390 list_move_tail(&fi->inmem_ilist, head); 391 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 392 393 if (inode) { 394 if (gc_failure) { 395 if (!fi->i_gc_failures[GC_FAILURE_ATOMIC]) 396 goto skip; 397 } 398 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); 399 f2fs_drop_inmem_pages(inode); 400skip: 401 iput(inode); 402 } 403 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); 404 cond_resched(); 405 if (gc_failure) { 406 if (++looped >= count) 407 return; 408 } 409 goto next; 410} 411 412void f2fs_drop_inmem_pages(struct inode *inode) 413{ 414 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 415 struct f2fs_inode_info *fi = F2FS_I(inode); 416 417 do { 418 mutex_lock(&fi->inmem_lock); 419 if (list_empty(&fi->inmem_pages)) { 420 fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; 421 422 spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 423 if (!list_empty(&fi->inmem_ilist)) 424 list_del_init(&fi->inmem_ilist); 425 if (f2fs_is_atomic_file(inode)) { 426 clear_inode_flag(inode, FI_ATOMIC_FILE); 427 sbi->atomic_files--; 428 } 429 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 430 431 mutex_unlock(&fi->inmem_lock); 432 break; 433 } 434 __revoke_inmem_pages(inode, &fi->inmem_pages, 435 true, false, true); 436 mutex_unlock(&fi->inmem_lock); 437 } while (1); 438} 439 440void f2fs_drop_inmem_page(struct inode *inode, struct page *page) 441{ 442 struct f2fs_inode_info *fi = F2FS_I(inode); 443 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 444 struct list_head *head = &fi->inmem_pages; 445 struct inmem_pages *cur = NULL; 446 struct inmem_pages *tmp; 447 448 f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); 449 450 mutex_lock(&fi->inmem_lock); 451 list_for_each_entry(tmp, head, list) { 452 if (tmp->page == page) { 453 cur = tmp; 454 break; 455 } 456 } 457 458 f2fs_bug_on(sbi, !cur); 459 list_del(&cur->list); 460 mutex_unlock(&fi->inmem_lock); 461 462 dec_page_count(sbi, F2FS_INMEM_PAGES); 463 kmem_cache_free(inmem_entry_slab, cur); 464 465 ClearPageUptodate(page); 466 f2fs_clear_page_private(page); 467 f2fs_put_page(page, 0); 468 469 trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); 470} 471 472static int __f2fs_commit_inmem_pages(struct inode *inode) 473{ 474 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 475 struct f2fs_inode_info *fi = F2FS_I(inode); 476 struct inmem_pages *cur, *tmp; 477 struct f2fs_io_info fio = { 478 .sbi = sbi, 479 .ino = inode->i_ino, 480 .type = DATA, 481 .op = REQ_OP_WRITE, 482 .op_flags = REQ_SYNC | REQ_PRIO, 483 .io_type = FS_DATA_IO, 484 }; 485 struct list_head revoke_list; 486 bool submit_bio = false; 487 int err = 0; 488 489 INIT_LIST_HEAD(&revoke_list); 490 491 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 492 struct page *page = cur->page; 493 494 lock_page(page); 495 if (page->mapping == inode->i_mapping) { 496 trace_f2fs_commit_inmem_page(page, INMEM); 497 498 f2fs_wait_on_page_writeback(page, DATA, true, true); 499 500 set_page_dirty(page); 501 if (clear_page_dirty_for_io(page)) { 502 inode_dec_dirty_pages(inode); 503 f2fs_remove_dirty_inode(inode); 504 } 505retry: 506 fio.page = page; 507 fio.old_blkaddr = NULL_ADDR; 508 fio.encrypted_page = NULL; 509 fio.need_lock = LOCK_DONE; 510 err = f2fs_do_write_data_page(&fio); 511 if (err) { 512 if (err == -ENOMEM) { 513 congestion_wait(BLK_RW_ASYNC, 514 DEFAULT_IO_TIMEOUT); 515 cond_resched(); 516 goto retry; 517 } 518 unlock_page(page); 519 break; 520 } 521 /* record old blkaddr for revoking */ 522 cur->old_addr = fio.old_blkaddr; 523 submit_bio = true; 524 } 525 unlock_page(page); 526 list_move_tail(&cur->list, &revoke_list); 527 } 528 529 if (submit_bio) 530 f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA); 531 532 if (err) { 533 /* 534 * try to revoke all committed pages, but still we could fail 535 * due to no memory or other reason, if that happened, EAGAIN 536 * will be returned, which means in such case, transaction is 537 * already not integrity, caller should use journal to do the 538 * recovery or rewrite & commit last transaction. For other 539 * error number, revoking was done by filesystem itself. 540 */ 541 err = __revoke_inmem_pages(inode, &revoke_list, 542 false, true, false); 543 544 /* drop all uncommitted pages */ 545 __revoke_inmem_pages(inode, &fi->inmem_pages, 546 true, false, false); 547 } else { 548 __revoke_inmem_pages(inode, &revoke_list, 549 false, false, false); 550 } 551 552 return err; 553} 554 555int f2fs_commit_inmem_pages(struct inode *inode) 556{ 557 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 558 struct f2fs_inode_info *fi = F2FS_I(inode); 559 int err; 560 561 f2fs_balance_fs(sbi, true); 562 563 down_write(&fi->i_gc_rwsem[WRITE]); 564 565 f2fs_lock_op(sbi); 566 set_inode_flag(inode, FI_ATOMIC_COMMIT); 567 568 mutex_lock(&fi->inmem_lock); 569 err = __f2fs_commit_inmem_pages(inode); 570 mutex_unlock(&fi->inmem_lock); 571 572 clear_inode_flag(inode, FI_ATOMIC_COMMIT); 573 574 f2fs_unlock_op(sbi); 575 up_write(&fi->i_gc_rwsem[WRITE]); 576 577 return err; 578} 579 580/* 581 * This function balances dirty node and dentry pages. 582 * In addition, it controls garbage collection. 583 */ 584void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) 585{ 586 if (time_to_inject(sbi, FAULT_CHECKPOINT)) { 587 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); 588 f2fs_stop_checkpoint(sbi, false); 589 } 590 591 /* balance_fs_bg is able to be pending */ 592 if (need && excess_cached_nats(sbi)) 593 f2fs_balance_fs_bg(sbi, false); 594 595 if (!f2fs_is_checkpoint_ready(sbi)) 596 return; 597 598 /* 599 * We should do GC or end up with checkpoint, if there are so many dirty 600 * dir/node pages without enough free segments. 601 */ 602 if (has_not_enough_free_secs(sbi, 0, 0)) { 603 if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && 604 sbi->gc_thread->f2fs_gc_task) { 605 DEFINE_WAIT(wait); 606 607 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, 608 TASK_UNINTERRUPTIBLE); 609 wake_up(&sbi->gc_thread->gc_wait_queue_head); 610 io_schedule(); 611 finish_wait(&sbi->gc_thread->fggc_wq, &wait); 612 } else { 613 down_write(&sbi->gc_lock); 614 f2fs_gc(sbi, false, false, false, NULL_SEGNO); 615 } 616 } 617} 618 619void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) 620{ 621 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 622 return; 623 624 /* try to shrink extent cache when there is no enough memory */ 625 if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) 626 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); 627 628 /* check the # of cached NAT entries */ 629 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) 630 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); 631 632 if (!f2fs_available_free_memory(sbi, FREE_NIDS)) 633 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS); 634 else 635 f2fs_build_free_nids(sbi, false, false); 636 637 if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || 638 excess_prefree_segs(sbi)) 639 goto do_sync; 640 641 /* there is background inflight IO or foreground operation recently */ 642 if (is_inflight_io(sbi, REQ_TIME) || 643 (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem))) 644 return; 645 646 /* exceed periodical checkpoint timeout threshold */ 647 if (f2fs_time_over(sbi, CP_TIME)) 648 goto do_sync; 649 650 /* checkpoint is the only way to shrink partial cached entries */ 651 if (f2fs_available_free_memory(sbi, NAT_ENTRIES) && 652 f2fs_available_free_memory(sbi, INO_ENTRIES)) 653 return; 654 655do_sync: 656 if (test_opt(sbi, DATA_FLUSH) && from_bg) { 657 struct blk_plug plug; 658 659 mutex_lock(&sbi->flush_lock); 660 661 blk_start_plug(&plug); 662 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false); 663 blk_finish_plug(&plug); 664 665 mutex_unlock(&sbi->flush_lock); 666 } 667 f2fs_sync_fs(sbi->sb, true); 668 stat_inc_bg_cp_count(sbi->stat_info); 669} 670 671static int __submit_flush_wait(struct f2fs_sb_info *sbi, 672 struct block_device *bdev) 673{ 674 struct bio *bio; 675 int ret; 676 677 bio = f2fs_bio_alloc(sbi, 0, false); 678 if (!bio) 679 return -ENOMEM; 680 681 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; 682 bio_set_dev(bio, bdev); 683 ret = submit_bio_wait(bio); 684 bio_put(bio); 685 686 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), 687 test_opt(sbi, FLUSH_MERGE), ret); 688 return ret; 689} 690 691static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) 692{ 693 int ret = 0; 694 int i; 695 696 if (!f2fs_is_multi_device(sbi)) 697 return __submit_flush_wait(sbi, sbi->sb->s_bdev); 698 699 for (i = 0; i < sbi->s_ndevs; i++) { 700 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO)) 701 continue; 702 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 703 if (ret) 704 break; 705 } 706 return ret; 707} 708 709static int issue_flush_thread(void *data) 710{ 711 struct f2fs_sb_info *sbi = data; 712 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 713 wait_queue_head_t *q = &fcc->flush_wait_queue; 714repeat: 715 if (kthread_should_stop()) 716 return 0; 717 718 sb_start_intwrite(sbi->sb); 719 720 if (!llist_empty(&fcc->issue_list)) { 721 struct flush_cmd *cmd, *next; 722 int ret; 723 724 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 725 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 726 727 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); 728 729 ret = submit_flush_wait(sbi, cmd->ino); 730 atomic_inc(&fcc->issued_flush); 731 732 llist_for_each_entry_safe(cmd, next, 733 fcc->dispatch_list, llnode) { 734 cmd->ret = ret; 735 complete(&cmd->wait); 736 } 737 fcc->dispatch_list = NULL; 738 } 739 740 sb_end_intwrite(sbi->sb); 741 742 wait_event_interruptible(*q, 743 kthread_should_stop() || !llist_empty(&fcc->issue_list)); 744 goto repeat; 745} 746 747int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) 748{ 749 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 750 struct flush_cmd cmd; 751 int ret; 752 753 if (test_opt(sbi, NOBARRIER)) 754 return 0; 755 756 if (!test_opt(sbi, FLUSH_MERGE)) { 757 atomic_inc(&fcc->queued_flush); 758 ret = submit_flush_wait(sbi, ino); 759 atomic_dec(&fcc->queued_flush); 760 atomic_inc(&fcc->issued_flush); 761 return ret; 762 } 763 764 if (atomic_inc_return(&fcc->queued_flush) == 1 || 765 f2fs_is_multi_device(sbi)) { 766 ret = submit_flush_wait(sbi, ino); 767 atomic_dec(&fcc->queued_flush); 768 769 atomic_inc(&fcc->issued_flush); 770 return ret; 771 } 772 773 cmd.ino = ino; 774 init_completion(&cmd.wait); 775 776 llist_add(&cmd.llnode, &fcc->issue_list); 777 778 /* update issue_list before we wake up issue_flush thread */ 779 smp_mb(); 780 781 if (waitqueue_active(&fcc->flush_wait_queue)) 782 wake_up(&fcc->flush_wait_queue); 783 784 if (fcc->f2fs_issue_flush) { 785 wait_for_completion(&cmd.wait); 786 atomic_dec(&fcc->queued_flush); 787 } else { 788 struct llist_node *list; 789 790 list = llist_del_all(&fcc->issue_list); 791 if (!list) { 792 wait_for_completion(&cmd.wait); 793 atomic_dec(&fcc->queued_flush); 794 } else { 795 struct flush_cmd *tmp, *next; 796 797 ret = submit_flush_wait(sbi, ino); 798 799 llist_for_each_entry_safe(tmp, next, list, llnode) { 800 if (tmp == &cmd) { 801 cmd.ret = ret; 802 atomic_dec(&fcc->queued_flush); 803 continue; 804 } 805 tmp->ret = ret; 806 complete(&tmp->wait); 807 } 808 } 809 } 810 811 return cmd.ret; 812} 813 814int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) 815{ 816 dev_t dev = sbi->sb->s_bdev->bd_dev; 817 struct flush_cmd_control *fcc; 818 int err = 0; 819 820 if (SM_I(sbi)->fcc_info) { 821 fcc = SM_I(sbi)->fcc_info; 822 if (fcc->f2fs_issue_flush) 823 return err; 824 goto init_thread; 825 } 826 827 fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL); 828 if (!fcc) 829 return -ENOMEM; 830 atomic_set(&fcc->issued_flush, 0); 831 atomic_set(&fcc->queued_flush, 0); 832 init_waitqueue_head(&fcc->flush_wait_queue); 833 init_llist_head(&fcc->issue_list); 834 SM_I(sbi)->fcc_info = fcc; 835 if (!test_opt(sbi, FLUSH_MERGE)) 836 return err; 837 838init_thread: 839 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 840 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 841 if (IS_ERR(fcc->f2fs_issue_flush)) { 842 err = PTR_ERR(fcc->f2fs_issue_flush); 843 kfree(fcc); 844 SM_I(sbi)->fcc_info = NULL; 845 return err; 846 } 847 848 return err; 849} 850 851void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) 852{ 853 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 854 855 if (fcc && fcc->f2fs_issue_flush) { 856 struct task_struct *flush_thread = fcc->f2fs_issue_flush; 857 858 fcc->f2fs_issue_flush = NULL; 859 kthread_stop(flush_thread); 860 } 861 if (free) { 862 kfree(fcc); 863 SM_I(sbi)->fcc_info = NULL; 864 } 865} 866 867int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) 868{ 869 int ret = 0, i; 870 871 if (!f2fs_is_multi_device(sbi)) 872 return 0; 873 874 if (test_opt(sbi, NOBARRIER)) 875 return 0; 876 877 for (i = 1; i < sbi->s_ndevs; i++) { 878 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) 879 continue; 880 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 881 if (ret) 882 break; 883 884 spin_lock(&sbi->dev_lock); 885 f2fs_clear_bit(i, (char *)&sbi->dirty_device); 886 spin_unlock(&sbi->dev_lock); 887 } 888 889 return ret; 890} 891 892static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 893 enum dirty_type dirty_type) 894{ 895 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 896 897 /* need not be added */ 898 if (IS_CURSEG(sbi, segno)) 899 return; 900 901 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) 902 dirty_i->nr_dirty[dirty_type]++; 903 904 if (dirty_type == DIRTY) { 905 struct seg_entry *sentry = get_seg_entry(sbi, segno); 906 enum dirty_type t = sentry->type; 907 908 if (unlikely(t >= DIRTY)) { 909 f2fs_bug_on(sbi, 1); 910 return; 911 } 912 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) 913 dirty_i->nr_dirty[t]++; 914 915 if (__is_large_section(sbi)) { 916 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 917 block_t valid_blocks = 918 get_valid_blocks(sbi, segno, true); 919 920 f2fs_bug_on(sbi, unlikely(!valid_blocks || 921 valid_blocks == BLKS_PER_SEC(sbi))); 922 923 if (!IS_CURSEC(sbi, secno)) 924 set_bit(secno, dirty_i->dirty_secmap); 925 } 926 } 927} 928 929static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 930 enum dirty_type dirty_type) 931{ 932 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 933 block_t valid_blocks; 934 935 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) 936 dirty_i->nr_dirty[dirty_type]--; 937 938 if (dirty_type == DIRTY) { 939 struct seg_entry *sentry = get_seg_entry(sbi, segno); 940 enum dirty_type t = sentry->type; 941 942 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) 943 dirty_i->nr_dirty[t]--; 944 945 valid_blocks = get_valid_blocks(sbi, segno, true); 946 if (valid_blocks == 0) { 947 clear_bit(GET_SEC_FROM_SEG(sbi, segno), 948 dirty_i->victim_secmap); 949#ifdef CONFIG_F2FS_CHECK_FS 950 clear_bit(segno, SIT_I(sbi)->invalid_segmap); 951#endif 952 } 953 if (__is_large_section(sbi)) { 954 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 955 956 if (!valid_blocks || 957 valid_blocks == BLKS_PER_SEC(sbi)) { 958 clear_bit(secno, dirty_i->dirty_secmap); 959 return; 960 } 961 962 if (!IS_CURSEC(sbi, secno)) 963 set_bit(secno, dirty_i->dirty_secmap); 964 } 965 } 966} 967 968/* 969 * Should not occur error such as -ENOMEM. 970 * Adding dirty entry into seglist is not critical operation. 971 * If a given segment is one of current working segments, it won't be added. 972 */ 973static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) 974{ 975 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 976 unsigned short valid_blocks, ckpt_valid_blocks; 977 unsigned int usable_blocks; 978 979 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) 980 return; 981 982 usable_blocks = f2fs_usable_blks_in_seg(sbi, segno); 983 mutex_lock(&dirty_i->seglist_lock); 984 985 valid_blocks = get_valid_blocks(sbi, segno, false); 986 ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false); 987 988 if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || 989 ckpt_valid_blocks == usable_blocks)) { 990 __locate_dirty_segment(sbi, segno, PRE); 991 __remove_dirty_segment(sbi, segno, DIRTY); 992 } else if (valid_blocks < usable_blocks) { 993 __locate_dirty_segment(sbi, segno, DIRTY); 994 } else { 995 /* Recovery routine with SSR needs this */ 996 __remove_dirty_segment(sbi, segno, DIRTY); 997 } 998 999 mutex_unlock(&dirty_i->seglist_lock); 1000} 1001 1002/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ 1003void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) 1004{ 1005 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1006 unsigned int segno; 1007 1008 mutex_lock(&dirty_i->seglist_lock); 1009 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 1010 if (get_valid_blocks(sbi, segno, false)) 1011 continue; 1012 if (IS_CURSEG(sbi, segno)) 1013 continue; 1014 __locate_dirty_segment(sbi, segno, PRE); 1015 __remove_dirty_segment(sbi, segno, DIRTY); 1016 } 1017 mutex_unlock(&dirty_i->seglist_lock); 1018} 1019 1020block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) 1021{ 1022 int ovp_hole_segs = 1023 (overprovision_segments(sbi) - reserved_segments(sbi)); 1024 block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; 1025 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1026 block_t holes[2] = {0, 0}; /* DATA and NODE */ 1027 block_t unusable; 1028 struct seg_entry *se; 1029 unsigned int segno; 1030 1031 mutex_lock(&dirty_i->seglist_lock); 1032 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 1033 se = get_seg_entry(sbi, segno); 1034 if (IS_NODESEG(se->type)) 1035 holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) - 1036 se->valid_blocks; 1037 else 1038 holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) - 1039 se->valid_blocks; 1040 } 1041 mutex_unlock(&dirty_i->seglist_lock); 1042 1043 unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; 1044 if (unusable > ovp_holes) 1045 return unusable - ovp_holes; 1046 return 0; 1047} 1048 1049int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) 1050{ 1051 int ovp_hole_segs = 1052 (overprovision_segments(sbi) - reserved_segments(sbi)); 1053 if (unusable > F2FS_OPTION(sbi).unusable_cap) 1054 return -EAGAIN; 1055 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && 1056 dirty_segments(sbi) > ovp_hole_segs) 1057 return -EAGAIN; 1058 return 0; 1059} 1060 1061/* This is only used by SBI_CP_DISABLED */ 1062static unsigned int get_free_segment(struct f2fs_sb_info *sbi) 1063{ 1064 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1065 unsigned int segno = 0; 1066 1067 mutex_lock(&dirty_i->seglist_lock); 1068 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 1069 if (get_valid_blocks(sbi, segno, false)) 1070 continue; 1071 if (get_ckpt_valid_blocks(sbi, segno, false)) 1072 continue; 1073 mutex_unlock(&dirty_i->seglist_lock); 1074 return segno; 1075 } 1076 mutex_unlock(&dirty_i->seglist_lock); 1077 return NULL_SEGNO; 1078} 1079 1080static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, 1081 struct block_device *bdev, block_t lstart, 1082 block_t start, block_t len) 1083{ 1084 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1085 struct list_head *pend_list; 1086 struct discard_cmd *dc; 1087 1088 f2fs_bug_on(sbi, !len); 1089 1090 pend_list = &dcc->pend_list[plist_idx(len)]; 1091 1092 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); 1093 INIT_LIST_HEAD(&dc->list); 1094 dc->bdev = bdev; 1095 dc->lstart = lstart; 1096 dc->start = start; 1097 dc->len = len; 1098 dc->ref = 0; 1099 dc->state = D_PREP; 1100 dc->queued = 0; 1101 dc->error = 0; 1102 init_completion(&dc->wait); 1103 list_add_tail(&dc->list, pend_list); 1104 spin_lock_init(&dc->lock); 1105 dc->bio_ref = 0; 1106 atomic_inc(&dcc->discard_cmd_cnt); 1107 dcc->undiscard_blks += len; 1108 1109 return dc; 1110} 1111 1112static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, 1113 struct block_device *bdev, block_t lstart, 1114 block_t start, block_t len, 1115 struct rb_node *parent, struct rb_node **p, 1116 bool leftmost) 1117{ 1118 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1119 struct discard_cmd *dc; 1120 1121 dc = __create_discard_cmd(sbi, bdev, lstart, start, len); 1122 1123 rb_link_node(&dc->rb_node, parent, p); 1124 rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost); 1125 1126 return dc; 1127} 1128 1129static void __detach_discard_cmd(struct discard_cmd_control *dcc, 1130 struct discard_cmd *dc) 1131{ 1132 if (dc->state == D_DONE) 1133 atomic_sub(dc->queued, &dcc->queued_discard); 1134 1135 list_del(&dc->list); 1136 rb_erase_cached(&dc->rb_node, &dcc->root); 1137 dcc->undiscard_blks -= dc->len; 1138 1139 kmem_cache_free(discard_cmd_slab, dc); 1140 1141 atomic_dec(&dcc->discard_cmd_cnt); 1142} 1143 1144static void __remove_discard_cmd(struct f2fs_sb_info *sbi, 1145 struct discard_cmd *dc) 1146{ 1147 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1148 unsigned long flags; 1149 1150 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); 1151 1152 spin_lock_irqsave(&dc->lock, flags); 1153 if (dc->bio_ref) { 1154 spin_unlock_irqrestore(&dc->lock, flags); 1155 return; 1156 } 1157 spin_unlock_irqrestore(&dc->lock, flags); 1158 1159 f2fs_bug_on(sbi, dc->ref); 1160 1161 if (dc->error == -EOPNOTSUPP) 1162 dc->error = 0; 1163 1164 if (dc->error) 1165 printk_ratelimited( 1166 "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", 1167 KERN_INFO, sbi->sb->s_id, 1168 dc->lstart, dc->start, dc->len, dc->error); 1169 __detach_discard_cmd(dcc, dc); 1170} 1171 1172static void f2fs_submit_discard_endio(struct bio *bio) 1173{ 1174 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 1175 unsigned long flags; 1176 1177 spin_lock_irqsave(&dc->lock, flags); 1178 if (!dc->error) 1179 dc->error = blk_status_to_errno(bio->bi_status); 1180 dc->bio_ref--; 1181 if (!dc->bio_ref && dc->state == D_SUBMIT) { 1182 dc->state = D_DONE; 1183 complete_all(&dc->wait); 1184 } 1185 spin_unlock_irqrestore(&dc->lock, flags); 1186 bio_put(bio); 1187} 1188 1189static void __check_sit_bitmap(struct f2fs_sb_info *sbi, 1190 block_t start, block_t end) 1191{ 1192#ifdef CONFIG_F2FS_CHECK_FS 1193 struct seg_entry *sentry; 1194 unsigned int segno; 1195 block_t blk = start; 1196 unsigned long offset, size, max_blocks = sbi->blocks_per_seg; 1197 unsigned long *map; 1198 1199 while (blk < end) { 1200 segno = GET_SEGNO(sbi, blk); 1201 sentry = get_seg_entry(sbi, segno); 1202 offset = GET_BLKOFF_FROM_SEG0(sbi, blk); 1203 1204 if (end < START_BLOCK(sbi, segno + 1)) 1205 size = GET_BLKOFF_FROM_SEG0(sbi, end); 1206 else 1207 size = max_blocks; 1208 map = (unsigned long *)(sentry->cur_valid_map); 1209 offset = find_rev_next_bit(map, size, offset); 1210 f2fs_bug_on(sbi, offset != size); 1211 blk = START_BLOCK(sbi, segno + 1); 1212 } 1213#endif 1214} 1215 1216static void __init_discard_policy(struct f2fs_sb_info *sbi, 1217 struct discard_policy *policy, 1218 int discard_type, unsigned int granularity) 1219{ 1220 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1221 1222 if (discard_type == DPOLICY_BG) { 1223 *policy = dpolicys[DPOLICY_BG]; 1224 } else if (discard_type == DPOLICY_BALANCE) { 1225 *policy = dpolicys[DPOLICY_BALANCE]; 1226 } else if (discard_type == DPOLICY_FORCE) { 1227 *policy = dpolicys[DPOLICY_FORCE]; 1228 } else if (discard_type == DPOLICY_FSTRIM) { 1229 *policy = dpolicys[DPOLICY_FSTRIM]; 1230 if (policy->granularity != granularity) 1231 policy->granularity = granularity; 1232 } else if (discard_type == DPOLICY_UMOUNT) { 1233 *policy = dpolicys[DPOLICY_UMOUNT]; 1234 } 1235 dcc->discard_type = discard_type; 1236} 1237 1238static void select_sub_discard_policy(struct discard_sub_policy **spolicy, 1239 int index, struct discard_policy *dpolicy) 1240{ 1241 if (dpolicy->type == DPOLICY_FSTRIM) { 1242 *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; 1243 return; 1244 } 1245 1246 if ((index + 1) >= DISCARD_GRAN_BG) 1247 *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; 1248 else if ((index + 1) >= DISCARD_GRAN_BL) 1249 *spolicy = &dpolicy->sub_policy[SUB_POLICY_MID]; 1250 else 1251 *spolicy = &dpolicy->sub_policy[SUB_POLICY_SMALL]; 1252} 1253 1254static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 1255 struct block_device *bdev, block_t lstart, 1256 block_t start, block_t len); 1257/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 1258static int __submit_discard_cmd(struct f2fs_sb_info *sbi, 1259 struct discard_policy *dpolicy, 1260 int spolicy_index, 1261 struct discard_cmd *dc, 1262 unsigned int *issued) 1263{ 1264 struct block_device *bdev = dc->bdev; 1265 struct request_queue *q = bdev_get_queue(bdev); 1266 unsigned int max_discard_blocks = 1267 SECTOR_TO_BLOCK(q->limits.max_discard_sectors); 1268 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1269 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? 1270 &(dcc->fstrim_list) : &(dcc->wait_list); 1271 int flag = dpolicy->sync ? REQ_SYNC : 0; 1272 struct discard_sub_policy *spolicy = NULL; 1273 block_t lstart, start, len, total_len; 1274 int err = 0; 1275 1276 select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); 1277 1278 if (dc->state != D_PREP) 1279 return 0; 1280 1281 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1282 return 0; 1283 1284 trace_f2fs_issue_discard(bdev, dc->start, dc->len); 1285 1286 lstart = dc->lstart; 1287 start = dc->start; 1288 len = dc->len; 1289 total_len = len; 1290 1291 dc->len = 0; 1292 1293 while (total_len && *issued < spolicy->max_requests && !err) { 1294 struct bio *bio = NULL; 1295 unsigned long flags; 1296 bool last = true; 1297 1298 if (len > max_discard_blocks) { 1299 len = max_discard_blocks; 1300 last = false; 1301 } 1302 1303 (*issued)++; 1304 if (*issued == spolicy->max_requests) 1305 last = true; 1306 1307 dc->len += len; 1308 1309 if (time_to_inject(sbi, FAULT_DISCARD)) { 1310 f2fs_show_injection_info(sbi, FAULT_DISCARD); 1311 err = -EIO; 1312 goto submit; 1313 } 1314 err = __blkdev_issue_discard(bdev, 1315 SECTOR_FROM_BLOCK(start), 1316 SECTOR_FROM_BLOCK(len), 1317 GFP_NOFS, 0, &bio); 1318submit: 1319 if (err) { 1320 spin_lock_irqsave(&dc->lock, flags); 1321 if (dc->state == D_PARTIAL) 1322 dc->state = D_SUBMIT; 1323 spin_unlock_irqrestore(&dc->lock, flags); 1324 1325 break; 1326 } 1327 1328 f2fs_bug_on(sbi, !bio); 1329 1330 /* 1331 * should keep before submission to avoid D_DONE 1332 * right away 1333 */ 1334 spin_lock_irqsave(&dc->lock, flags); 1335 if (last) 1336 dc->state = D_SUBMIT; 1337 else 1338 dc->state = D_PARTIAL; 1339 dc->bio_ref++; 1340 spin_unlock_irqrestore(&dc->lock, flags); 1341 1342 atomic_inc(&dcc->queued_discard); 1343 dc->queued++; 1344 list_move_tail(&dc->list, wait_list); 1345 1346 /* sanity check on discard range */ 1347 __check_sit_bitmap(sbi, lstart, lstart + len); 1348 1349 bio->bi_private = dc; 1350 bio->bi_end_io = f2fs_submit_discard_endio; 1351 bio->bi_opf |= flag; 1352 submit_bio(bio); 1353 1354 atomic_inc(&dcc->issued_discard); 1355 1356 f2fs_update_iostat(sbi, FS_DISCARD, 1); 1357 1358 lstart += len; 1359 start += len; 1360 total_len -= len; 1361 len = total_len; 1362 } 1363 1364 if (!err && len) { 1365 dcc->undiscard_blks -= len; 1366 __update_discard_tree_range(sbi, bdev, lstart, start, len); 1367 } 1368 return err; 1369} 1370 1371static void __insert_discard_tree(struct f2fs_sb_info *sbi, 1372 struct block_device *bdev, block_t lstart, 1373 block_t start, block_t len, 1374 struct rb_node **insert_p, 1375 struct rb_node *insert_parent) 1376{ 1377 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1378 struct rb_node **p; 1379 struct rb_node *parent = NULL; 1380 bool leftmost = true; 1381 1382 if (insert_p && insert_parent) { 1383 parent = insert_parent; 1384 p = insert_p; 1385 goto do_insert; 1386 } 1387 1388 p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, 1389 lstart, &leftmost); 1390do_insert: 1391 __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, 1392 p, leftmost); 1393} 1394 1395static void __relocate_discard_cmd(struct discard_cmd_control *dcc, 1396 struct discard_cmd *dc) 1397{ 1398 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); 1399} 1400 1401static void __punch_discard_cmd(struct f2fs_sb_info *sbi, 1402 struct discard_cmd *dc, block_t blkaddr) 1403{ 1404 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1405 struct discard_info di = dc->di; 1406 bool modified = false; 1407 1408 if (dc->state == D_DONE || dc->len == 1) { 1409 __remove_discard_cmd(sbi, dc); 1410 return; 1411 } 1412 1413 dcc->undiscard_blks -= di.len; 1414 1415 if (blkaddr > di.lstart) { 1416 dc->len = blkaddr - dc->lstart; 1417 dcc->undiscard_blks += dc->len; 1418 __relocate_discard_cmd(dcc, dc); 1419 modified = true; 1420 } 1421 1422 if (blkaddr < di.lstart + di.len - 1) { 1423 if (modified) { 1424 __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, 1425 di.start + blkaddr + 1 - di.lstart, 1426 di.lstart + di.len - 1 - blkaddr, 1427 NULL, NULL); 1428 } else { 1429 dc->lstart++; 1430 dc->len--; 1431 dc->start++; 1432 dcc->undiscard_blks += dc->len; 1433 __relocate_discard_cmd(dcc, dc); 1434 } 1435 } 1436} 1437 1438static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 1439 struct block_device *bdev, block_t lstart, 1440 block_t start, block_t len) 1441{ 1442 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1443 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 1444 struct discard_cmd *dc; 1445 struct discard_info di = {0}; 1446 struct rb_node **insert_p = NULL, *insert_parent = NULL; 1447 struct request_queue *q = bdev_get_queue(bdev); 1448 unsigned int max_discard_blocks = 1449 SECTOR_TO_BLOCK(q->limits.max_discard_sectors); 1450 block_t end = lstart + len; 1451 1452 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 1453 NULL, lstart, 1454 (struct rb_entry **)&prev_dc, 1455 (struct rb_entry **)&next_dc, 1456 &insert_p, &insert_parent, true, NULL); 1457 if (dc) 1458 prev_dc = dc; 1459 1460 if (!prev_dc) { 1461 di.lstart = lstart; 1462 di.len = next_dc ? next_dc->lstart - lstart : len; 1463 di.len = min(di.len, len); 1464 di.start = start; 1465 } 1466 1467 while (1) { 1468 struct rb_node *node; 1469 bool merged = false; 1470 struct discard_cmd *tdc = NULL; 1471 1472 if (prev_dc) { 1473 di.lstart = prev_dc->lstart + prev_dc->len; 1474 if (di.lstart < lstart) 1475 di.lstart = lstart; 1476 if (di.lstart >= end) 1477 break; 1478 1479 if (!next_dc || next_dc->lstart > end) 1480 di.len = end - di.lstart; 1481 else 1482 di.len = next_dc->lstart - di.lstart; 1483 di.start = start + di.lstart - lstart; 1484 } 1485 1486 if (!di.len) 1487 goto next; 1488 1489 if (prev_dc && prev_dc->state == D_PREP && 1490 prev_dc->bdev == bdev && 1491 __is_discard_back_mergeable(&di, &prev_dc->di, 1492 max_discard_blocks)) { 1493 prev_dc->di.len += di.len; 1494 dcc->undiscard_blks += di.len; 1495 __relocate_discard_cmd(dcc, prev_dc); 1496 di = prev_dc->di; 1497 tdc = prev_dc; 1498 merged = true; 1499 } 1500 1501 if (next_dc && next_dc->state == D_PREP && 1502 next_dc->bdev == bdev && 1503 __is_discard_front_mergeable(&di, &next_dc->di, 1504 max_discard_blocks)) { 1505 next_dc->di.lstart = di.lstart; 1506 next_dc->di.len += di.len; 1507 next_dc->di.start = di.start; 1508 dcc->undiscard_blks += di.len; 1509 __relocate_discard_cmd(dcc, next_dc); 1510 if (tdc) 1511 __remove_discard_cmd(sbi, tdc); 1512 merged = true; 1513 } 1514 1515 if (!merged) { 1516 __insert_discard_tree(sbi, bdev, di.lstart, di.start, 1517 di.len, NULL, NULL); 1518 } 1519 next: 1520 prev_dc = next_dc; 1521 if (!prev_dc) 1522 break; 1523 1524 node = rb_next(&prev_dc->rb_node); 1525 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); 1526 } 1527} 1528 1529static int __queue_discard_cmd(struct f2fs_sb_info *sbi, 1530 struct block_device *bdev, block_t blkstart, block_t blklen) 1531{ 1532 block_t lblkstart = blkstart; 1533 1534 if (!f2fs_bdev_support_discard(bdev)) 1535 return 0; 1536 1537 trace_f2fs_queue_discard(bdev, blkstart, blklen); 1538 1539 if (f2fs_is_multi_device(sbi)) { 1540 int devi = f2fs_target_device_index(sbi, blkstart); 1541 1542 blkstart -= FDEV(devi).start_blk; 1543 } 1544 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); 1545 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); 1546 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); 1547 return 0; 1548} 1549 1550static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, 1551 struct discard_policy *dpolicy, 1552 int spolicy_index) 1553{ 1554 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1555 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 1556 struct rb_node **insert_p = NULL, *insert_parent = NULL; 1557 struct discard_cmd *dc; 1558 struct blk_plug plug; 1559 unsigned int pos = dcc->next_pos; 1560 unsigned int issued = 0; 1561 bool io_interrupted = false; 1562 struct discard_sub_policy *spolicy = NULL; 1563 1564 select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); 1565 mutex_lock(&dcc->cmd_lock); 1566 1567 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 1568 NULL, pos, 1569 (struct rb_entry **)&prev_dc, 1570 (struct rb_entry **)&next_dc, 1571 &insert_p, &insert_parent, true, NULL); 1572 if (!dc) 1573 dc = next_dc; 1574 1575 blk_start_plug(&plug); 1576 1577 while (dc) { 1578 struct rb_node *node; 1579 int err = 0; 1580 1581 if (dc->state != D_PREP) 1582 goto next; 1583 1584 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { 1585 io_interrupted = true; 1586 break; 1587 } 1588 1589 dcc->next_pos = dc->lstart + dc->len; 1590 err = __submit_discard_cmd(sbi, dpolicy, spolicy_index, dc, &issued); 1591 1592 if (issued >= spolicy->max_requests) 1593 break; 1594next: 1595 node = rb_next(&dc->rb_node); 1596 if (err) 1597 __remove_discard_cmd(sbi, dc); 1598 dc = rb_entry_safe(node, struct discard_cmd, rb_node); 1599 } 1600 1601 blk_finish_plug(&plug); 1602 1603 if (!dc) 1604 dcc->next_pos = 0; 1605 1606 mutex_unlock(&dcc->cmd_lock); 1607 1608 if (!issued && io_interrupted) 1609 issued = -1; 1610 1611 return issued; 1612} 1613static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, 1614 struct discard_policy *dpolicy); 1615 1616static int __issue_discard_cmd(struct f2fs_sb_info *sbi, 1617 struct discard_policy *dpolicy) 1618{ 1619 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1620 struct list_head *pend_list; 1621 struct discard_cmd *dc, *tmp; 1622 struct blk_plug plug; 1623 int i, issued; 1624 bool io_interrupted = false; 1625 struct discard_sub_policy *spolicy = NULL; 1626 1627 if (dpolicy->timeout) 1628 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); 1629 1630 /* only do this check in CHECK_FS, may be time consumed */ 1631 if (unlikely(dcc->rbtree_check)) { 1632 mutex_lock(&dcc->cmd_lock); 1633 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); 1634 mutex_unlock(&dcc->cmd_lock); 1635 } 1636retry: 1637 blk_start_plug(&plug); 1638 issued = 0; 1639 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 1640 if (dpolicy->timeout && 1641 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) 1642 break; 1643 1644 if (i + 1 < dpolicy->granularity) 1645 break; 1646 1647 select_sub_discard_policy(&spolicy, i, dpolicy); 1648 1649 if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) { 1650 issued = __issue_discard_cmd_orderly(sbi, dpolicy, i); 1651 blk_finish_plug(&plug); 1652 return issued; 1653 } 1654 1655 pend_list = &dcc->pend_list[i]; 1656 1657 mutex_lock(&dcc->cmd_lock); 1658 if (list_empty(pend_list)) 1659 goto next; 1660 if (unlikely(dcc->rbtree_check)) 1661 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, 1662 &dcc->root, false)); 1663 list_for_each_entry_safe(dc, tmp, pend_list, list) { 1664 f2fs_bug_on(sbi, dc->state != D_PREP); 1665 1666 if (dpolicy->timeout && 1667 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) 1668 break; 1669 1670 if (dpolicy->io_aware && i < dpolicy->io_aware_gran && 1671 !is_idle(sbi, DISCARD_TIME)) { 1672 io_interrupted = true; 1673 goto skip; 1674 } 1675 __submit_discard_cmd(sbi, dpolicy, i, dc, &issued); 1676skip: 1677 if (issued >= spolicy->max_requests) 1678 break; 1679 } 1680next: 1681 mutex_unlock(&dcc->cmd_lock); 1682 1683 if (issued >= spolicy->max_requests || io_interrupted) 1684 break; 1685 } 1686 1687 blk_finish_plug(&plug); 1688 if (spolicy) 1689 dpolicy->min_interval = spolicy->interval; 1690 1691 if (dpolicy->type == DPOLICY_UMOUNT && issued) { 1692 __wait_all_discard_cmd(sbi, dpolicy); 1693 goto retry; 1694 } 1695 1696 if (!issued && io_interrupted) 1697 issued = -1; 1698 1699 return issued; 1700} 1701 1702static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) 1703{ 1704 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1705 struct list_head *pend_list; 1706 struct discard_cmd *dc, *tmp; 1707 int i; 1708 bool dropped = false; 1709 1710 mutex_lock(&dcc->cmd_lock); 1711 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 1712 pend_list = &dcc->pend_list[i]; 1713 list_for_each_entry_safe(dc, tmp, pend_list, list) { 1714 f2fs_bug_on(sbi, dc->state != D_PREP); 1715 __remove_discard_cmd(sbi, dc); 1716 dropped = true; 1717 } 1718 } 1719 mutex_unlock(&dcc->cmd_lock); 1720 1721 return dropped; 1722} 1723 1724void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi) 1725{ 1726 __drop_discard_cmd(sbi); 1727} 1728 1729static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, 1730 struct discard_cmd *dc) 1731{ 1732 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1733 unsigned int len = 0; 1734 1735 wait_for_completion_io(&dc->wait); 1736 mutex_lock(&dcc->cmd_lock); 1737 f2fs_bug_on(sbi, dc->state != D_DONE); 1738 dc->ref--; 1739 if (!dc->ref) { 1740 if (!dc->error) 1741 len = dc->len; 1742 __remove_discard_cmd(sbi, dc); 1743 } 1744 mutex_unlock(&dcc->cmd_lock); 1745 1746 return len; 1747} 1748 1749static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi, 1750 struct discard_policy *dpolicy, 1751 block_t start, block_t end) 1752{ 1753 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1754 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? 1755 &(dcc->fstrim_list) : &(dcc->wait_list); 1756 struct discard_cmd *dc, *tmp; 1757 bool need_wait; 1758 unsigned int trimmed = 0; 1759 1760next: 1761 need_wait = false; 1762 1763 mutex_lock(&dcc->cmd_lock); 1764 list_for_each_entry_safe(dc, tmp, wait_list, list) { 1765 if (dc->lstart + dc->len <= start || end <= dc->lstart) 1766 continue; 1767 if (dc->len < dpolicy->granularity) 1768 continue; 1769 if (dc->state == D_DONE && !dc->ref) { 1770 wait_for_completion_io(&dc->wait); 1771 if (!dc->error) 1772 trimmed += dc->len; 1773 __remove_discard_cmd(sbi, dc); 1774 } else { 1775 dc->ref++; 1776 need_wait = true; 1777 break; 1778 } 1779 } 1780 mutex_unlock(&dcc->cmd_lock); 1781 1782 if (need_wait) { 1783 trimmed += __wait_one_discard_bio(sbi, dc); 1784 goto next; 1785 } 1786 1787 return trimmed; 1788} 1789 1790static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, 1791 struct discard_policy *dpolicy) 1792{ 1793 struct discard_policy dp; 1794 unsigned int discard_blks; 1795 1796 if (dpolicy) 1797 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); 1798 1799 /* wait all */ 1800 __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1); 1801 discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); 1802 __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1); 1803 discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); 1804 1805 return discard_blks; 1806} 1807 1808/* This should be covered by global mutex, &sit_i->sentry_lock */ 1809static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) 1810{ 1811 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1812 struct discard_cmd *dc; 1813 bool need_wait = false; 1814 1815 mutex_lock(&dcc->cmd_lock); 1816 dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root, 1817 NULL, blkaddr); 1818 if (dc) { 1819 if (dc->state == D_PREP) { 1820 __punch_discard_cmd(sbi, dc, blkaddr); 1821 } else { 1822 dc->ref++; 1823 need_wait = true; 1824 } 1825 } 1826 mutex_unlock(&dcc->cmd_lock); 1827 1828 if (need_wait) 1829 __wait_one_discard_bio(sbi, dc); 1830} 1831 1832void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) 1833{ 1834 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1835 1836 if (dcc && dcc->f2fs_issue_discard) { 1837 struct task_struct *discard_thread = dcc->f2fs_issue_discard; 1838 1839 dcc->f2fs_issue_discard = NULL; 1840 kthread_stop(discard_thread); 1841 } 1842} 1843 1844/* This comes from f2fs_put_super */ 1845bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) 1846{ 1847 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1848 struct discard_policy dpolicy; 1849 bool dropped; 1850 1851 __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, 0); 1852 __issue_discard_cmd(sbi, &dpolicy); 1853 dropped = __drop_discard_cmd(sbi); 1854 1855 /* just to make sure there is no pending discard commands */ 1856 __wait_all_discard_cmd(sbi, NULL); 1857 1858 f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); 1859 return dropped; 1860} 1861 1862static int select_discard_type(struct f2fs_sb_info *sbi) 1863{ 1864 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1865 block_t user_block_count = sbi->user_block_count; 1866 block_t ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; 1867 block_t fs_available_blocks = user_block_count - 1868 valid_user_blocks(sbi) + ovp_count; 1869 int discard_type; 1870 1871 if (fs_available_blocks >= fs_free_space_threshold(sbi) && 1872 fs_available_blocks - dcc->undiscard_blks >= 1873 device_free_space_threshold(sbi)) { 1874 discard_type = DPOLICY_BG; 1875 } else if (fs_available_blocks < fs_free_space_threshold(sbi) && 1876 fs_available_blocks - dcc->undiscard_blks < 1877 device_free_space_threshold(sbi)) { 1878 discard_type = DPOLICY_FORCE; 1879 } else { 1880 discard_type = DPOLICY_BALANCE; 1881 } 1882 return discard_type; 1883} 1884 1885static int issue_discard_thread(void *data) 1886{ 1887 struct f2fs_sb_info *sbi = data; 1888 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1889 wait_queue_head_t *q = &dcc->discard_wait_queue; 1890 struct discard_policy dpolicy; 1891 unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; 1892 int issued, discard_type; 1893 1894 set_freezable(); 1895 1896 do { 1897 discard_type = select_discard_type(sbi); 1898 __init_discard_policy(sbi, &dpolicy, discard_type, 0); 1899 1900 wait_event_interruptible_timeout(*q, 1901 kthread_should_stop() || freezing(current) || 1902 dcc->discard_wake, 1903 msecs_to_jiffies(wait_ms)); 1904 1905 if (dcc->discard_wake) 1906 dcc->discard_wake = 0; 1907 1908 /* clean up pending candidates before going to sleep */ 1909 if (atomic_read(&dcc->queued_discard)) 1910 __wait_all_discard_cmd(sbi, NULL); 1911 1912 if (try_to_freeze()) 1913 continue; 1914 if (f2fs_readonly(sbi->sb)) 1915 continue; 1916 if (kthread_should_stop()) 1917 return 0; 1918 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 1919 wait_ms = dpolicy.max_interval; 1920 continue; 1921 } 1922 1923 if (sbi->gc_mode == GC_URGENT_HIGH) 1924 __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 0); 1925 1926 sb_start_intwrite(sbi->sb); 1927 1928 issued = __issue_discard_cmd(sbi, &dpolicy); 1929 if (issued > 0) { 1930 __wait_all_discard_cmd(sbi, &dpolicy); 1931 wait_ms = dpolicy.min_interval; 1932 } else if (issued == -1){ 1933 wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); 1934 if (!wait_ms) 1935 wait_ms = dpolicy.mid_interval; 1936 } else { 1937 wait_ms = dpolicy.max_interval; 1938 } 1939 1940 sb_end_intwrite(sbi->sb); 1941 1942 } while (!kthread_should_stop()); 1943 return 0; 1944} 1945 1946#ifdef CONFIG_BLK_DEV_ZONED 1947static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, 1948 struct block_device *bdev, block_t blkstart, block_t blklen) 1949{ 1950 sector_t sector, nr_sects; 1951 block_t lblkstart = blkstart; 1952 int devi = 0; 1953 1954 if (f2fs_is_multi_device(sbi)) { 1955 devi = f2fs_target_device_index(sbi, blkstart); 1956 if (blkstart < FDEV(devi).start_blk || 1957 blkstart > FDEV(devi).end_blk) { 1958 f2fs_err(sbi, "Invalid block %x", blkstart); 1959 return -EIO; 1960 } 1961 blkstart -= FDEV(devi).start_blk; 1962 } 1963 1964 /* For sequential zones, reset the zone write pointer */ 1965 if (f2fs_blkz_is_seq(sbi, devi, blkstart)) { 1966 sector = SECTOR_FROM_BLOCK(blkstart); 1967 nr_sects = SECTOR_FROM_BLOCK(blklen); 1968 1969 if (sector & (bdev_zone_sectors(bdev) - 1) || 1970 nr_sects != bdev_zone_sectors(bdev)) { 1971 f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", 1972 devi, sbi->s_ndevs ? FDEV(devi).path : "", 1973 blkstart, blklen); 1974 return -EIO; 1975 } 1976 trace_f2fs_issue_reset_zone(bdev, blkstart); 1977 return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, 1978 sector, nr_sects, GFP_NOFS); 1979 } 1980 1981 /* For conventional zones, use regular discard if supported */ 1982 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); 1983} 1984#endif 1985 1986static int __issue_discard_async(struct f2fs_sb_info *sbi, 1987 struct block_device *bdev, block_t blkstart, block_t blklen) 1988{ 1989#ifdef CONFIG_BLK_DEV_ZONED 1990 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) 1991 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); 1992#endif 1993 return __queue_discard_cmd(sbi, bdev, blkstart, blklen); 1994} 1995 1996static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 1997 block_t blkstart, block_t blklen) 1998{ 1999 sector_t start = blkstart, len = 0; 2000 struct block_device *bdev; 2001 struct seg_entry *se; 2002 unsigned int offset; 2003 block_t i; 2004 int err = 0; 2005 2006 bdev = f2fs_target_device(sbi, blkstart, NULL); 2007 2008 for (i = blkstart; i < blkstart + blklen; i++, len++) { 2009 if (i != start) { 2010 struct block_device *bdev2 = 2011 f2fs_target_device(sbi, i, NULL); 2012 2013 if (bdev2 != bdev) { 2014 err = __issue_discard_async(sbi, bdev, 2015 start, len); 2016 if (err) 2017 return err; 2018 bdev = bdev2; 2019 start = i; 2020 len = 0; 2021 } 2022 } 2023 2024 se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); 2025 offset = GET_BLKOFF_FROM_SEG0(sbi, i); 2026 2027 if (!f2fs_test_and_set_bit(offset, se->discard_map)) 2028 sbi->discard_blks--; 2029 } 2030 2031 if (len) 2032 err = __issue_discard_async(sbi, bdev, start, len); 2033 return err; 2034} 2035 2036static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, 2037 bool check_only) 2038{ 2039 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 2040 int max_blocks = sbi->blocks_per_seg; 2041 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); 2042 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 2043 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 2044 unsigned long *discard_map = (unsigned long *)se->discard_map; 2045 unsigned long *dmap = SIT_I(sbi)->tmp_map; 2046 unsigned int start = 0, end = -1; 2047 bool force = (cpc->reason & CP_DISCARD); 2048 struct discard_entry *de = NULL; 2049 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; 2050 int i; 2051 2052 if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi)) 2053 return false; 2054 2055 if (!force) { 2056 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || 2057 SM_I(sbi)->dcc_info->nr_discards >= 2058 SM_I(sbi)->dcc_info->max_discards) 2059 return false; 2060 } 2061 2062 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 2063 for (i = 0; i < entries; i++) 2064 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : 2065 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 2066 2067 while (force || SM_I(sbi)->dcc_info->nr_discards <= 2068 SM_I(sbi)->dcc_info->max_discards) { 2069 start = find_rev_next_bit(dmap, max_blocks, end + 1); 2070 if (start >= max_blocks) 2071 break; 2072 2073 end = find_rev_next_zero_bit(dmap, max_blocks, start + 1); 2074 if (force && start && end != max_blocks 2075 && (end - start) < cpc->trim_minlen) 2076 continue; 2077 2078 if (check_only) 2079 return true; 2080 2081 if (!de) { 2082 de = f2fs_kmem_cache_alloc(discard_entry_slab, 2083 GFP_F2FS_ZERO); 2084 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); 2085 list_add_tail(&de->list, head); 2086 } 2087 2088 for (i = start; i < end; i++) 2089 __set_bit_le(i, (void *)de->discard_map); 2090 2091 SM_I(sbi)->dcc_info->nr_discards += end - start; 2092 } 2093 return false; 2094} 2095 2096static void release_discard_addr(struct discard_entry *entry) 2097{ 2098 list_del(&entry->list); 2099 kmem_cache_free(discard_entry_slab, entry); 2100} 2101 2102void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi) 2103{ 2104 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); 2105 struct discard_entry *entry, *this; 2106 2107 /* drop caches */ 2108 list_for_each_entry_safe(entry, this, head, list) 2109 release_discard_addr(entry); 2110} 2111 2112/* 2113 * Should call f2fs_clear_prefree_segments after checkpoint is done. 2114 */ 2115static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 2116{ 2117 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2118 unsigned int segno; 2119 2120 mutex_lock(&dirty_i->seglist_lock); 2121 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) 2122 __set_test_and_free(sbi, segno, false); 2123 mutex_unlock(&dirty_i->seglist_lock); 2124} 2125 2126void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, 2127 struct cp_control *cpc) 2128{ 2129 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 2130 struct list_head *head = &dcc->entry_list; 2131 struct discard_entry *entry, *this; 2132 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2133 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 2134 unsigned int start = 0, end = -1; 2135 unsigned int secno, start_segno; 2136 bool force = (cpc->reason & CP_DISCARD); 2137 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); 2138 2139 mutex_lock(&dirty_i->seglist_lock); 2140 2141 while (1) { 2142 int i; 2143 2144 if (need_align && end != -1) 2145 end--; 2146 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); 2147 if (start >= MAIN_SEGS(sbi)) 2148 break; 2149 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), 2150 start + 1); 2151 2152 if (need_align) { 2153 start = rounddown(start, sbi->segs_per_sec); 2154 end = roundup(end, sbi->segs_per_sec); 2155 } 2156 2157 for (i = start; i < end; i++) { 2158 if (test_and_clear_bit(i, prefree_map)) 2159 dirty_i->nr_dirty[PRE]--; 2160 } 2161 2162 if (!f2fs_realtime_discard_enable(sbi)) 2163 continue; 2164 2165 if (force && start >= cpc->trim_start && 2166 (end - 1) <= cpc->trim_end) 2167 continue; 2168 2169 if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) { 2170 f2fs_issue_discard(sbi, START_BLOCK(sbi, start), 2171 (end - start) << sbi->log_blocks_per_seg); 2172 continue; 2173 } 2174next: 2175 secno = GET_SEC_FROM_SEG(sbi, start); 2176 start_segno = GET_SEG_FROM_SEC(sbi, secno); 2177 if (!IS_CURSEC(sbi, secno) && 2178 !get_valid_blocks(sbi, start, true)) 2179 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), 2180 sbi->segs_per_sec << sbi->log_blocks_per_seg); 2181 2182 start = start_segno + sbi->segs_per_sec; 2183 if (start < end) 2184 goto next; 2185 else 2186 end = start - 1; 2187 } 2188 mutex_unlock(&dirty_i->seglist_lock); 2189 2190 /* send small discards */ 2191 list_for_each_entry_safe(entry, this, head, list) { 2192 unsigned int cur_pos = 0, next_pos, len, total_len = 0; 2193 bool is_valid = test_bit_le(0, entry->discard_map); 2194 2195find_next: 2196 if (is_valid) { 2197 next_pos = find_next_zero_bit_le(entry->discard_map, 2198 sbi->blocks_per_seg, cur_pos); 2199 len = next_pos - cur_pos; 2200 2201 if (f2fs_sb_has_blkzoned(sbi) || 2202 (force && len < cpc->trim_minlen)) 2203 goto skip; 2204 2205 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, 2206 len); 2207 total_len += len; 2208 } else { 2209 next_pos = find_next_bit_le(entry->discard_map, 2210 sbi->blocks_per_seg, cur_pos); 2211 } 2212skip: 2213 cur_pos = next_pos; 2214 is_valid = !is_valid; 2215 2216 if (cur_pos < sbi->blocks_per_seg) 2217 goto find_next; 2218 2219 release_discard_addr(entry); 2220 dcc->nr_discards -= total_len; 2221 } 2222 2223 wake_up_discard_thread(sbi, false); 2224} 2225 2226static int create_discard_cmd_control(struct f2fs_sb_info *sbi) 2227{ 2228 dev_t dev = sbi->sb->s_bdev->bd_dev; 2229 struct discard_cmd_control *dcc; 2230 int err = 0, i; 2231 2232 if (SM_I(sbi)->dcc_info) { 2233 dcc = SM_I(sbi)->dcc_info; 2234 goto init_thread; 2235 } 2236 2237 dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL); 2238 if (!dcc) 2239 return -ENOMEM; 2240 2241 dcc->discard_granularity = DISCARD_GRAN_BG; 2242 INIT_LIST_HEAD(&dcc->entry_list); 2243 for (i = 0; i < MAX_PLIST_NUM; i++) 2244 INIT_LIST_HEAD(&dcc->pend_list[i]); 2245 INIT_LIST_HEAD(&dcc->wait_list); 2246 INIT_LIST_HEAD(&dcc->fstrim_list); 2247 mutex_init(&dcc->cmd_lock); 2248 atomic_set(&dcc->issued_discard, 0); 2249 atomic_set(&dcc->queued_discard, 0); 2250 atomic_set(&dcc->discard_cmd_cnt, 0); 2251 dcc->nr_discards = 0; 2252 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; 2253 dcc->undiscard_blks = 0; 2254 dcc->next_pos = 0; 2255 dcc->root = RB_ROOT_CACHED; 2256 dcc->rbtree_check = false; 2257 2258 init_waitqueue_head(&dcc->discard_wait_queue); 2259 SM_I(sbi)->dcc_info = dcc; 2260init_thread: 2261 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, 2262 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); 2263 if (IS_ERR(dcc->f2fs_issue_discard)) { 2264 err = PTR_ERR(dcc->f2fs_issue_discard); 2265 kfree(dcc); 2266 SM_I(sbi)->dcc_info = NULL; 2267 return err; 2268 } 2269 2270 return err; 2271} 2272 2273static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) 2274{ 2275 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 2276 2277 if (!dcc) 2278 return; 2279 2280 f2fs_stop_discard_thread(sbi); 2281 2282 /* 2283 * Recovery can cache discard commands, so in error path of 2284 * fill_super(), it needs to give a chance to handle them. 2285 */ 2286 if (unlikely(atomic_read(&dcc->discard_cmd_cnt))) 2287 f2fs_issue_discard_timeout(sbi); 2288 2289 kfree(dcc); 2290 SM_I(sbi)->dcc_info = NULL; 2291} 2292 2293static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 2294{ 2295 struct sit_info *sit_i = SIT_I(sbi); 2296 2297 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { 2298 sit_i->dirty_sentries++; 2299 return false; 2300 } 2301 2302 return true; 2303} 2304 2305static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, 2306 unsigned int segno, int modified) 2307{ 2308 struct seg_entry *se = get_seg_entry(sbi, segno); 2309 se->type = type; 2310 if (modified) 2311 __mark_sit_entry_dirty(sbi, segno); 2312} 2313 2314static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi, 2315 block_t blkaddr) 2316{ 2317 unsigned int segno = GET_SEGNO(sbi, blkaddr); 2318 2319 if (segno == NULL_SEGNO) 2320 return 0; 2321 return get_seg_entry(sbi, segno)->mtime; 2322} 2323 2324static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, 2325 unsigned long long old_mtime) 2326{ 2327 struct seg_entry *se; 2328 unsigned int segno = GET_SEGNO(sbi, blkaddr); 2329 unsigned long long ctime = get_mtime(sbi, false); 2330 unsigned long long mtime = old_mtime ? old_mtime : ctime; 2331 2332 if (segno == NULL_SEGNO) 2333 return; 2334 2335 se = get_seg_entry(sbi, segno); 2336 2337 if (!se->mtime) 2338 se->mtime = mtime; 2339 else 2340 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime, 2341 se->valid_blocks + 1); 2342 2343 if (ctime > SIT_I(sbi)->max_mtime) 2344 SIT_I(sbi)->max_mtime = ctime; 2345} 2346 2347static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) 2348{ 2349 struct seg_entry *se; 2350 unsigned int segno, offset; 2351 long int new_vblocks; 2352 bool exist; 2353#ifdef CONFIG_F2FS_CHECK_FS 2354 bool mir_exist; 2355#endif 2356 2357 segno = GET_SEGNO(sbi, blkaddr); 2358 2359 se = get_seg_entry(sbi, segno); 2360 new_vblocks = se->valid_blocks + del; 2361 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 2362 2363 f2fs_bug_on(sbi, (new_vblocks < 0 || 2364 (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); 2365 2366 se->valid_blocks = new_vblocks; 2367 2368 /* Update valid block bitmap */ 2369 if (del > 0) { 2370 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); 2371#ifdef CONFIG_F2FS_CHECK_FS 2372 mir_exist = f2fs_test_and_set_bit(offset, 2373 se->cur_valid_map_mir); 2374 if (unlikely(exist != mir_exist)) { 2375 f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", 2376 blkaddr, exist); 2377 f2fs_bug_on(sbi, 1); 2378 } 2379#endif 2380 if (unlikely(exist)) { 2381 f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", 2382 blkaddr); 2383 f2fs_bug_on(sbi, 1); 2384 se->valid_blocks--; 2385 del = 0; 2386 } 2387 2388 if (!f2fs_test_and_set_bit(offset, se->discard_map)) 2389 sbi->discard_blks--; 2390 2391 /* 2392 * SSR should never reuse block which is checkpointed 2393 * or newly invalidated. 2394 */ 2395 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 2396 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) 2397 se->ckpt_valid_blocks++; 2398 } 2399 } else { 2400 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); 2401#ifdef CONFIG_F2FS_CHECK_FS 2402 mir_exist = f2fs_test_and_clear_bit(offset, 2403 se->cur_valid_map_mir); 2404 if (unlikely(exist != mir_exist)) { 2405 f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", 2406 blkaddr, exist); 2407 f2fs_bug_on(sbi, 1); 2408 } 2409#endif 2410 if (unlikely(!exist)) { 2411 f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", 2412 blkaddr); 2413 f2fs_bug_on(sbi, 1); 2414 se->valid_blocks++; 2415 del = 0; 2416 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2417 /* 2418 * If checkpoints are off, we must not reuse data that 2419 * was used in the previous checkpoint. If it was used 2420 * before, we must track that to know how much space we 2421 * really have. 2422 */ 2423 if (f2fs_test_bit(offset, se->ckpt_valid_map)) { 2424 spin_lock(&sbi->stat_lock); 2425 sbi->unusable_block_count++; 2426 spin_unlock(&sbi->stat_lock); 2427 } 2428 } 2429 2430 if (f2fs_test_and_clear_bit(offset, se->discard_map)) 2431 sbi->discard_blks++; 2432 } 2433 if (!f2fs_test_bit(offset, se->ckpt_valid_map)) 2434 se->ckpt_valid_blocks += del; 2435 2436 __mark_sit_entry_dirty(sbi, segno); 2437 2438 /* update total number of valid blocks to be written in ckpt area */ 2439 SIT_I(sbi)->written_valid_blocks += del; 2440 2441 if (__is_large_section(sbi)) 2442 get_sec_entry(sbi, segno)->valid_blocks += del; 2443} 2444 2445void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 2446{ 2447 unsigned int segno = GET_SEGNO(sbi, addr); 2448 struct sit_info *sit_i = SIT_I(sbi); 2449 2450 f2fs_bug_on(sbi, addr == NULL_ADDR); 2451 if (addr == NEW_ADDR || addr == COMPRESS_ADDR) 2452 return; 2453 2454 invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); 2455 2456 /* add it into sit main buffer */ 2457 down_write(&sit_i->sentry_lock); 2458 2459 update_segment_mtime(sbi, addr, 0); 2460 update_sit_entry(sbi, addr, -1); 2461 2462 /* add it into dirty seglist */ 2463 locate_dirty_segment(sbi, segno); 2464 2465 up_write(&sit_i->sentry_lock); 2466} 2467 2468bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) 2469{ 2470 struct sit_info *sit_i = SIT_I(sbi); 2471 unsigned int segno, offset; 2472 struct seg_entry *se; 2473 bool is_cp = false; 2474 2475 if (!__is_valid_data_blkaddr(blkaddr)) 2476 return true; 2477 2478 down_read(&sit_i->sentry_lock); 2479 2480 segno = GET_SEGNO(sbi, blkaddr); 2481 se = get_seg_entry(sbi, segno); 2482 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 2483 2484 if (f2fs_test_bit(offset, se->ckpt_valid_map)) 2485 is_cp = true; 2486 2487 up_read(&sit_i->sentry_lock); 2488 2489 return is_cp; 2490} 2491 2492/* 2493 * This function should be resided under the curseg_mutex lock 2494 */ 2495static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, 2496 struct f2fs_summary *sum) 2497{ 2498 struct curseg_info *curseg = CURSEG_I(sbi, type); 2499 void *addr = curseg->sum_blk; 2500 addr += curseg->next_blkoff * sizeof(struct f2fs_summary); 2501 memcpy(addr, sum, sizeof(struct f2fs_summary)); 2502} 2503 2504/* 2505 * Calculate the number of current summary pages for writing 2506 */ 2507int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) 2508{ 2509 int valid_sum_count = 0; 2510 int i, sum_in_page; 2511 2512 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 2513 if (sbi->ckpt->alloc_type[i] == SSR) 2514 valid_sum_count += sbi->blocks_per_seg; 2515 else { 2516 if (for_ra) 2517 valid_sum_count += le16_to_cpu( 2518 F2FS_CKPT(sbi)->cur_data_blkoff[i]); 2519 else 2520 valid_sum_count += curseg_blkoff(sbi, i); 2521 } 2522 } 2523 2524 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE - 2525 SUM_FOOTER_SIZE) / SUMMARY_SIZE; 2526 if (valid_sum_count <= sum_in_page) 2527 return 1; 2528 else if ((valid_sum_count - sum_in_page) <= 2529 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) 2530 return 2; 2531 return 3; 2532} 2533 2534/* 2535 * Caller should put this summary page 2536 */ 2537struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) 2538{ 2539 if (unlikely(f2fs_cp_error(sbi))) 2540 return ERR_PTR(-EIO); 2541 return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno)); 2542} 2543 2544void f2fs_update_meta_page(struct f2fs_sb_info *sbi, 2545 void *src, block_t blk_addr) 2546{ 2547 struct page *page = f2fs_grab_meta_page(sbi, blk_addr); 2548 2549 memcpy(page_address(page), src, PAGE_SIZE); 2550 set_page_dirty(page); 2551 f2fs_put_page(page, 1); 2552} 2553 2554static void write_sum_page(struct f2fs_sb_info *sbi, 2555 struct f2fs_summary_block *sum_blk, block_t blk_addr) 2556{ 2557 f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); 2558} 2559 2560static void write_current_sum_page(struct f2fs_sb_info *sbi, 2561 int type, block_t blk_addr) 2562{ 2563 struct curseg_info *curseg = CURSEG_I(sbi, type); 2564 struct page *page = f2fs_grab_meta_page(sbi, blk_addr); 2565 struct f2fs_summary_block *src = curseg->sum_blk; 2566 struct f2fs_summary_block *dst; 2567 2568 dst = (struct f2fs_summary_block *)page_address(page); 2569 memset(dst, 0, PAGE_SIZE); 2570 2571 mutex_lock(&curseg->curseg_mutex); 2572 2573 down_read(&curseg->journal_rwsem); 2574 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE); 2575 up_read(&curseg->journal_rwsem); 2576 2577 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE); 2578 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE); 2579 2580 mutex_unlock(&curseg->curseg_mutex); 2581 2582 set_page_dirty(page); 2583 f2fs_put_page(page, 1); 2584} 2585 2586static int is_next_segment_free(struct f2fs_sb_info *sbi, 2587 struct curseg_info *curseg, int type) 2588{ 2589 unsigned int segno = curseg->segno + 1; 2590 struct free_segmap_info *free_i = FREE_I(sbi); 2591 2592 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) 2593 return !test_bit(segno, free_i->free_segmap); 2594 return 0; 2595} 2596 2597/* 2598 * Find a new segment from the free segments bitmap to right order 2599 * This function should be returned with success, otherwise BUG 2600 */ 2601static void get_new_segment(struct f2fs_sb_info *sbi, 2602 unsigned int *newseg, bool new_sec, int dir) 2603{ 2604 struct free_segmap_info *free_i = FREE_I(sbi); 2605 unsigned int segno, secno, zoneno; 2606 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; 2607 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); 2608 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); 2609 unsigned int left_start = hint; 2610 bool init = true; 2611 int go_left = 0; 2612 int i; 2613 2614 spin_lock(&free_i->segmap_lock); 2615 2616 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 2617 segno = find_next_zero_bit(free_i->free_segmap, 2618 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); 2619 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) 2620 goto got_it; 2621 } 2622find_other_zone: 2623 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); 2624 if (secno >= MAIN_SECS(sbi)) { 2625 if (dir == ALLOC_RIGHT) { 2626 secno = find_next_zero_bit(free_i->free_secmap, 2627 MAIN_SECS(sbi), 0); 2628 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); 2629 } else { 2630 go_left = 1; 2631 left_start = hint - 1; 2632 } 2633 } 2634 if (go_left == 0) 2635 goto skip_left; 2636 2637 while (test_bit(left_start, free_i->free_secmap)) { 2638 if (left_start > 0) { 2639 left_start--; 2640 continue; 2641 } 2642 left_start = find_next_zero_bit(free_i->free_secmap, 2643 MAIN_SECS(sbi), 0); 2644 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); 2645 break; 2646 } 2647 secno = left_start; 2648skip_left: 2649 segno = GET_SEG_FROM_SEC(sbi, secno); 2650 zoneno = GET_ZONE_FROM_SEC(sbi, secno); 2651 2652 /* give up on finding another zone */ 2653 if (!init) 2654 goto got_it; 2655 if (sbi->secs_per_zone == 1) 2656 goto got_it; 2657 if (zoneno == old_zoneno) 2658 goto got_it; 2659 if (dir == ALLOC_LEFT) { 2660 if (!go_left && zoneno + 1 >= total_zones) 2661 goto got_it; 2662 if (go_left && zoneno == 0) 2663 goto got_it; 2664 } 2665 for (i = 0; i < NR_CURSEG_TYPE; i++) 2666 if (CURSEG_I(sbi, i)->zone == zoneno) 2667 break; 2668 2669 if (i < NR_CURSEG_TYPE) { 2670 /* zone is in user, try another */ 2671 if (go_left) 2672 hint = zoneno * sbi->secs_per_zone - 1; 2673 else if (zoneno + 1 >= total_zones) 2674 hint = 0; 2675 else 2676 hint = (zoneno + 1) * sbi->secs_per_zone; 2677 init = false; 2678 goto find_other_zone; 2679 } 2680got_it: 2681 /* set it as dirty segment in free segmap */ 2682 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); 2683 __set_inuse(sbi, segno); 2684 *newseg = segno; 2685 spin_unlock(&free_i->segmap_lock); 2686} 2687 2688static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) 2689{ 2690 struct curseg_info *curseg = CURSEG_I(sbi, type); 2691 struct summary_footer *sum_footer; 2692 unsigned short seg_type = curseg->seg_type; 2693 2694 curseg->inited = true; 2695 curseg->segno = curseg->next_segno; 2696 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); 2697 curseg->next_blkoff = 0; 2698 curseg->next_segno = NULL_SEGNO; 2699 2700 sum_footer = &(curseg->sum_blk->footer); 2701 memset(sum_footer, 0, sizeof(struct summary_footer)); 2702 2703 sanity_check_seg_type(sbi, seg_type); 2704 2705 if (IS_DATASEG(seg_type)) 2706 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); 2707 if (IS_NODESEG(seg_type)) 2708 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); 2709 __set_sit_entry_type(sbi, seg_type, curseg->segno, modified); 2710} 2711 2712static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) 2713{ 2714 struct curseg_info *curseg = CURSEG_I(sbi, type); 2715 unsigned short seg_type = curseg->seg_type; 2716 2717 sanity_check_seg_type(sbi, seg_type); 2718 2719 /* if segs_per_sec is large than 1, we need to keep original policy. */ 2720 if (__is_large_section(sbi)) 2721 return curseg->segno; 2722 2723 /* inmem log may not locate on any segment after mount */ 2724 if (!curseg->inited) 2725 return 0; 2726 2727 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 2728 return 0; 2729 2730 if (test_opt(sbi, NOHEAP) && 2731 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))) 2732 return 0; 2733 2734 if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) 2735 return SIT_I(sbi)->last_victim[ALLOC_NEXT]; 2736 2737 /* find segments from 0 to reuse freed segments */ 2738 if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) 2739 return 0; 2740 2741 return curseg->segno; 2742} 2743 2744/* 2745 * Allocate a current working segment. 2746 * This function always allocates a free segment in LFS manner. 2747 */ 2748static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) 2749{ 2750 struct curseg_info *curseg = CURSEG_I(sbi, type); 2751 unsigned short seg_type = curseg->seg_type; 2752 unsigned int segno = curseg->segno; 2753 int dir = ALLOC_LEFT; 2754 2755 if (curseg->inited) 2756 write_sum_page(sbi, curseg->sum_blk, 2757 GET_SUM_BLOCK(sbi, segno)); 2758 if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA) 2759 dir = ALLOC_RIGHT; 2760 2761 if (test_opt(sbi, NOHEAP)) 2762 dir = ALLOC_RIGHT; 2763 2764 segno = __get_next_segno(sbi, type); 2765 get_new_segment(sbi, &segno, new_sec, dir); 2766 curseg->next_segno = segno; 2767 reset_curseg(sbi, type, 1); 2768 curseg->alloc_type = LFS; 2769} 2770 2771static void __next_free_blkoff(struct f2fs_sb_info *sbi, 2772 struct curseg_info *seg, block_t start) 2773{ 2774 struct seg_entry *se = get_seg_entry(sbi, seg->segno); 2775 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 2776 unsigned long *target_map = SIT_I(sbi)->tmp_map; 2777 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 2778 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 2779 int i, pos; 2780 2781 for (i = 0; i < entries; i++) 2782 target_map[i] = ckpt_map[i] | cur_map[i]; 2783 2784 pos = find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); 2785 2786 seg->next_blkoff = pos; 2787} 2788 2789/* 2790 * If a segment is written by LFS manner, next block offset is just obtained 2791 * by increasing the current block offset. However, if a segment is written by 2792 * SSR manner, next block offset obtained by calling __next_free_blkoff 2793 */ 2794static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, 2795 struct curseg_info *seg) 2796{ 2797 if (seg->alloc_type == SSR) 2798 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); 2799 else 2800 seg->next_blkoff++; 2801} 2802 2803bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) 2804{ 2805 struct seg_entry *se = get_seg_entry(sbi, segno); 2806 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 2807 unsigned long *target_map = SIT_I(sbi)->tmp_map; 2808 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 2809 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 2810 int i, pos; 2811 2812 for (i = 0; i < entries; i++) 2813 target_map[i] = ckpt_map[i] | cur_map[i]; 2814 2815 pos = find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0); 2816 2817 return pos < sbi->blocks_per_seg; 2818} 2819 2820/* 2821 * This function always allocates a used segment(from dirty seglist) by SSR 2822 * manner, so it should recover the existing segment information of valid blocks 2823 */ 2824static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush) 2825{ 2826 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2827 struct curseg_info *curseg = CURSEG_I(sbi, type); 2828 unsigned int new_segno = curseg->next_segno; 2829 struct f2fs_summary_block *sum_node; 2830 struct page *sum_page; 2831 2832 if (flush) 2833 write_sum_page(sbi, curseg->sum_blk, 2834 GET_SUM_BLOCK(sbi, curseg->segno)); 2835 2836 __set_test_and_inuse(sbi, new_segno); 2837 2838 mutex_lock(&dirty_i->seglist_lock); 2839 __remove_dirty_segment(sbi, new_segno, PRE); 2840 __remove_dirty_segment(sbi, new_segno, DIRTY); 2841 mutex_unlock(&dirty_i->seglist_lock); 2842 2843 reset_curseg(sbi, type, 1); 2844 curseg->alloc_type = SSR; 2845 __next_free_blkoff(sbi, curseg, 0); 2846 2847 sum_page = f2fs_get_sum_page(sbi, new_segno); 2848 if (IS_ERR(sum_page)) { 2849 /* GC won't be able to use stale summary pages by cp_error */ 2850 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); 2851 return; 2852 } 2853 sum_node = (struct f2fs_summary_block *)page_address(sum_page); 2854 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); 2855 f2fs_put_page(sum_page, 1); 2856} 2857 2858static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, 2859 int alloc_mode, unsigned long long age); 2860 2861static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, 2862 int target_type, int alloc_mode, 2863 unsigned long long age) 2864{ 2865 struct curseg_info *curseg = CURSEG_I(sbi, type); 2866 2867 curseg->seg_type = target_type; 2868 2869 if (get_ssr_segment(sbi, type, alloc_mode, age)) { 2870 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); 2871 2872 curseg->seg_type = se->type; 2873 change_curseg(sbi, type, true); 2874 } else { 2875 /* allocate cold segment by default */ 2876 curseg->seg_type = CURSEG_COLD_DATA; 2877 new_curseg(sbi, type, true); 2878 } 2879 stat_inc_seg_type(sbi, curseg); 2880} 2881 2882static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) 2883{ 2884 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); 2885 2886 if (!sbi->am.atgc_enabled) 2887 return; 2888 2889 down_read(&SM_I(sbi)->curseg_lock); 2890 2891 mutex_lock(&curseg->curseg_mutex); 2892 down_write(&SIT_I(sbi)->sentry_lock); 2893 2894 get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0); 2895 2896 up_write(&SIT_I(sbi)->sentry_lock); 2897 mutex_unlock(&curseg->curseg_mutex); 2898 2899 up_read(&SM_I(sbi)->curseg_lock); 2900 2901} 2902void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) 2903{ 2904 __f2fs_init_atgc_curseg(sbi); 2905} 2906 2907static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) 2908{ 2909 struct curseg_info *curseg = CURSEG_I(sbi, type); 2910 2911 mutex_lock(&curseg->curseg_mutex); 2912 if (!curseg->inited) 2913 goto out; 2914 2915 if (get_valid_blocks(sbi, curseg->segno, false)) { 2916 write_sum_page(sbi, curseg->sum_blk, 2917 GET_SUM_BLOCK(sbi, curseg->segno)); 2918 } else { 2919 mutex_lock(&DIRTY_I(sbi)->seglist_lock); 2920 __set_test_and_free(sbi, curseg->segno, true); 2921 mutex_unlock(&DIRTY_I(sbi)->seglist_lock); 2922 } 2923out: 2924 mutex_unlock(&curseg->curseg_mutex); 2925} 2926 2927void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi) 2928{ 2929 __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED); 2930 2931 if (sbi->am.atgc_enabled) 2932 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC); 2933} 2934 2935static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type) 2936{ 2937 struct curseg_info *curseg = CURSEG_I(sbi, type); 2938 2939 mutex_lock(&curseg->curseg_mutex); 2940 if (!curseg->inited) 2941 goto out; 2942 if (get_valid_blocks(sbi, curseg->segno, false)) 2943 goto out; 2944 2945 mutex_lock(&DIRTY_I(sbi)->seglist_lock); 2946 __set_test_and_inuse(sbi, curseg->segno); 2947 mutex_unlock(&DIRTY_I(sbi)->seglist_lock); 2948out: 2949 mutex_unlock(&curseg->curseg_mutex); 2950} 2951 2952void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi) 2953{ 2954 __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED); 2955 2956 if (sbi->am.atgc_enabled) 2957 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC); 2958} 2959 2960static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, 2961 int alloc_mode, unsigned long long age) 2962{ 2963 struct curseg_info *curseg = CURSEG_I(sbi, type); 2964 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; 2965 unsigned segno = NULL_SEGNO; 2966 unsigned short seg_type = curseg->seg_type; 2967 int i, cnt; 2968 bool reversed = false; 2969 2970 sanity_check_seg_type(sbi, seg_type); 2971 2972 /* f2fs_need_SSR() already forces to do this */ 2973 if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { 2974 curseg->next_segno = segno; 2975 return 1; 2976 } 2977 2978 /* For node segments, let's do SSR more intensively */ 2979 if (IS_NODESEG(seg_type)) { 2980 if (seg_type >= CURSEG_WARM_NODE) { 2981 reversed = true; 2982 i = CURSEG_COLD_NODE; 2983 } else { 2984 i = CURSEG_HOT_NODE; 2985 } 2986 cnt = NR_CURSEG_NODE_TYPE; 2987 } else { 2988 if (seg_type >= CURSEG_WARM_DATA) { 2989 reversed = true; 2990 i = CURSEG_COLD_DATA; 2991 } else { 2992 i = CURSEG_HOT_DATA; 2993 } 2994 cnt = NR_CURSEG_DATA_TYPE; 2995 } 2996 2997 for (; cnt-- > 0; reversed ? i-- : i++) { 2998 if (i == seg_type) 2999 continue; 3000 if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { 3001 curseg->next_segno = segno; 3002 return 1; 3003 } 3004 } 3005 3006 /* find valid_blocks=0 in dirty list */ 3007 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 3008 segno = get_free_segment(sbi); 3009 if (segno != NULL_SEGNO) { 3010 curseg->next_segno = segno; 3011 return 1; 3012 } 3013 } 3014 return 0; 3015} 3016 3017/* 3018 * flush out current segment and replace it with new segment 3019 * This function should be returned with success, otherwise BUG 3020 */ 3021static void allocate_segment_by_default(struct f2fs_sb_info *sbi, 3022 int type, bool force, int contig_level) 3023{ 3024 struct curseg_info *curseg = CURSEG_I(sbi, type); 3025 3026 if (force) 3027 new_curseg(sbi, type, true); 3028 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && 3029 curseg->seg_type == CURSEG_WARM_NODE) 3030 new_curseg(sbi, type, false); 3031 else if (curseg->alloc_type == LFS && 3032 is_next_segment_free(sbi, curseg, type) && 3033 likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 3034 new_curseg(sbi, type, false); 3035#ifdef CONFIG_F2FS_GRADING_SSR 3036 else if (need_ssr_by_type(sbi, type, contig_level) && get_ssr_segment(sbi, type, SSR, 0)) 3037#else 3038 else if (f2fs_need_SSR(sbi) && 3039 get_ssr_segment(sbi, type, SSR, 0)) 3040#endif 3041 change_curseg(sbi, type, true); 3042 else 3043 new_curseg(sbi, type, false); 3044 3045 stat_inc_seg_type(sbi, curseg); 3046} 3047 3048void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, 3049 unsigned int start, unsigned int end) 3050{ 3051 struct curseg_info *curseg = CURSEG_I(sbi, type); 3052 unsigned int segno; 3053 3054 down_read(&SM_I(sbi)->curseg_lock); 3055 mutex_lock(&curseg->curseg_mutex); 3056 down_write(&SIT_I(sbi)->sentry_lock); 3057 3058 segno = CURSEG_I(sbi, type)->segno; 3059 if (segno < start || segno > end) 3060 goto unlock; 3061 3062 if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) 3063 change_curseg(sbi, type, true); 3064 else 3065 new_curseg(sbi, type, true); 3066 3067 stat_inc_seg_type(sbi, curseg); 3068 3069 locate_dirty_segment(sbi, segno); 3070unlock: 3071 up_write(&SIT_I(sbi)->sentry_lock); 3072 3073 if (segno != curseg->segno) 3074 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", 3075 type, segno, curseg->segno); 3076 3077 mutex_unlock(&curseg->curseg_mutex); 3078 up_read(&SM_I(sbi)->curseg_lock); 3079} 3080 3081static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, 3082 bool new_sec) 3083{ 3084 struct curseg_info *curseg = CURSEG_I(sbi, type); 3085 unsigned int old_segno; 3086 3087 if (!curseg->inited) 3088 goto alloc; 3089 3090 if (curseg->next_blkoff || 3091 get_valid_blocks(sbi, curseg->segno, new_sec)) 3092 goto alloc; 3093 3094 if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) 3095 return; 3096alloc: 3097 old_segno = curseg->segno; 3098 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true, SEQ_NONE); 3099 locate_dirty_segment(sbi, old_segno); 3100} 3101 3102static void __allocate_new_section(struct f2fs_sb_info *sbi, int type) 3103{ 3104 __allocate_new_segment(sbi, type, true); 3105} 3106 3107void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type) 3108{ 3109 down_read(&SM_I(sbi)->curseg_lock); 3110 down_write(&SIT_I(sbi)->sentry_lock); 3111 __allocate_new_section(sbi, type); 3112 up_write(&SIT_I(sbi)->sentry_lock); 3113 up_read(&SM_I(sbi)->curseg_lock); 3114} 3115 3116void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) 3117{ 3118 int i; 3119 3120 down_read(&SM_I(sbi)->curseg_lock); 3121 down_write(&SIT_I(sbi)->sentry_lock); 3122 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) 3123 __allocate_new_segment(sbi, i, false); 3124 up_write(&SIT_I(sbi)->sentry_lock); 3125 up_read(&SM_I(sbi)->curseg_lock); 3126} 3127 3128static const struct segment_allocation default_salloc_ops = { 3129 .allocate_segment = allocate_segment_by_default, 3130}; 3131 3132bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, 3133 struct cp_control *cpc) 3134{ 3135 __u64 trim_start = cpc->trim_start; 3136 bool has_candidate = false; 3137 3138 down_write(&SIT_I(sbi)->sentry_lock); 3139 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { 3140 if (add_discard_addrs(sbi, cpc, true)) { 3141 has_candidate = true; 3142 break; 3143 } 3144 } 3145 up_write(&SIT_I(sbi)->sentry_lock); 3146 3147 cpc->trim_start = trim_start; 3148 return has_candidate; 3149} 3150 3151static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, 3152 struct discard_policy *dpolicy, 3153 unsigned int start, unsigned int end) 3154{ 3155 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 3156 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 3157 struct rb_node **insert_p = NULL, *insert_parent = NULL; 3158 struct discard_cmd *dc; 3159 struct blk_plug plug; 3160 struct discard_sub_policy *spolicy = NULL; 3161 int issued; 3162 unsigned int trimmed = 0; 3163 /* fstrim each time 8 discard without no interrupt */ 3164 select_sub_discard_policy(&spolicy, 0, dpolicy); 3165 3166 if (dcc->rbtree_check) { 3167 mutex_lock(&dcc->cmd_lock); 3168 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); 3169 mutex_unlock(&dcc->cmd_lock); 3170 } 3171 3172next: 3173 issued = 0; 3174 3175 mutex_lock(&dcc->cmd_lock); 3176 if (unlikely(dcc->rbtree_check)) 3177 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, 3178 &dcc->root, false)); 3179 3180 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 3181 NULL, start, 3182 (struct rb_entry **)&prev_dc, 3183 (struct rb_entry **)&next_dc, 3184 &insert_p, &insert_parent, true, NULL); 3185 if (!dc) 3186 dc = next_dc; 3187 3188 blk_start_plug(&plug); 3189 3190 while (dc && dc->lstart <= end) { 3191 struct rb_node *node; 3192 int err = 0; 3193 3194 if (dc->len < dpolicy->granularity) 3195 goto skip; 3196 3197 if (dc->state != D_PREP) { 3198 list_move_tail(&dc->list, &dcc->fstrim_list); 3199 goto skip; 3200 } 3201 3202 err = __submit_discard_cmd(sbi, dpolicy, 0, dc, &issued); 3203 3204 if (issued >= spolicy->max_requests) { 3205 start = dc->lstart + dc->len; 3206 3207 if (err) 3208 __remove_discard_cmd(sbi, dc); 3209 3210 blk_finish_plug(&plug); 3211 mutex_unlock(&dcc->cmd_lock); 3212 trimmed += __wait_all_discard_cmd(sbi, NULL); 3213 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); 3214 goto next; 3215 } 3216skip: 3217 node = rb_next(&dc->rb_node); 3218 if (err) 3219 __remove_discard_cmd(sbi, dc); 3220 dc = rb_entry_safe(node, struct discard_cmd, rb_node); 3221 3222 if (fatal_signal_pending(current)) 3223 break; 3224 } 3225 3226 blk_finish_plug(&plug); 3227 mutex_unlock(&dcc->cmd_lock); 3228 3229 return trimmed; 3230} 3231 3232int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 3233{ 3234 __u64 start = F2FS_BYTES_TO_BLK(range->start); 3235 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; 3236 unsigned int start_segno, end_segno; 3237 block_t start_block, end_block; 3238 struct cp_control cpc; 3239 struct discard_policy dpolicy; 3240 unsigned long long trimmed = 0; 3241 int err = 0; 3242 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); 3243 3244 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) 3245 return -EINVAL; 3246 3247 if (end < MAIN_BLKADDR(sbi)) 3248 goto out; 3249 3250 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 3251 f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); 3252 return -EFSCORRUPTED; 3253 } 3254 3255 /* start/end segment number in main_area */ 3256 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); 3257 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : 3258 GET_SEGNO(sbi, end); 3259 if (need_align) { 3260 start_segno = rounddown(start_segno, sbi->segs_per_sec); 3261 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; 3262 } 3263 3264 cpc.reason = CP_DISCARD; 3265 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); 3266 cpc.trim_start = start_segno; 3267 cpc.trim_end = end_segno; 3268 3269 if (sbi->discard_blks == 0) 3270 goto out; 3271 3272 down_write(&sbi->gc_lock); 3273 err = f2fs_write_checkpoint(sbi, &cpc); 3274 up_write(&sbi->gc_lock); 3275 if (err) 3276 goto out; 3277 3278 /* 3279 * We filed discard candidates, but actually we don't need to wait for 3280 * all of them, since they'll be issued in idle time along with runtime 3281 * discard option. User configuration looks like using runtime discard 3282 * or periodic fstrim instead of it. 3283 */ 3284 if (f2fs_realtime_discard_enable(sbi)) 3285 goto out; 3286 3287 start_block = START_BLOCK(sbi, start_segno); 3288 end_block = START_BLOCK(sbi, end_segno + 1); 3289 3290 __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); 3291 trimmed = __issue_discard_cmd_range(sbi, &dpolicy, 3292 start_block, end_block); 3293 3294 trimmed += __wait_discard_cmd_range(sbi, &dpolicy, 3295 start_block, end_block); 3296out: 3297 if (!err) 3298 range->len = F2FS_BLK_TO_BYTES(trimmed); 3299 return err; 3300} 3301 3302static bool __has_curseg_space(struct f2fs_sb_info *sbi, 3303 struct curseg_info *curseg) 3304{ 3305 return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi, 3306 curseg->segno); 3307} 3308 3309int f2fs_rw_hint_to_seg_type(enum rw_hint hint) 3310{ 3311 switch (hint) { 3312 case WRITE_LIFE_SHORT: 3313 return CURSEG_HOT_DATA; 3314 case WRITE_LIFE_EXTREME: 3315 return CURSEG_COLD_DATA; 3316 default: 3317 return CURSEG_WARM_DATA; 3318 } 3319} 3320 3321/* This returns write hints for each segment type. This hints will be 3322 * passed down to block layer. There are mapping tables which depend on 3323 * the mount option 'whint_mode'. 3324 * 3325 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. 3326 * 3327 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. 3328 * 3329 * User F2FS Block 3330 * ---- ---- ----- 3331 * META WRITE_LIFE_NOT_SET 3332 * HOT_NODE " 3333 * WARM_NODE " 3334 * COLD_NODE " 3335 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME 3336 * extension list " " 3337 * 3338 * -- buffered io 3339 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME 3340 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT 3341 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET 3342 * WRITE_LIFE_NONE " " 3343 * WRITE_LIFE_MEDIUM " " 3344 * WRITE_LIFE_LONG " " 3345 * 3346 * -- direct io 3347 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME 3348 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT 3349 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET 3350 * WRITE_LIFE_NONE " WRITE_LIFE_NONE 3351 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM 3352 * WRITE_LIFE_LONG " WRITE_LIFE_LONG 3353 * 3354 * 3) whint_mode=fs-based. F2FS passes down hints with its policy. 3355 * 3356 * User F2FS Block 3357 * ---- ---- ----- 3358 * META WRITE_LIFE_MEDIUM; 3359 * HOT_NODE WRITE_LIFE_NOT_SET 3360 * WARM_NODE " 3361 * COLD_NODE WRITE_LIFE_NONE 3362 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME 3363 * extension list " " 3364 * 3365 * -- buffered io 3366 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME 3367 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT 3368 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG 3369 * WRITE_LIFE_NONE " " 3370 * WRITE_LIFE_MEDIUM " " 3371 * WRITE_LIFE_LONG " " 3372 * 3373 * -- direct io 3374 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME 3375 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT 3376 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET 3377 * WRITE_LIFE_NONE " WRITE_LIFE_NONE 3378 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM 3379 * WRITE_LIFE_LONG " WRITE_LIFE_LONG 3380 */ 3381 3382enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, 3383 enum page_type type, enum temp_type temp) 3384{ 3385 if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { 3386 if (type == DATA) { 3387 if (temp == WARM) 3388 return WRITE_LIFE_NOT_SET; 3389 else if (temp == HOT) 3390 return WRITE_LIFE_SHORT; 3391 else if (temp == COLD) 3392 return WRITE_LIFE_EXTREME; 3393 } else { 3394 return WRITE_LIFE_NOT_SET; 3395 } 3396 } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { 3397 if (type == DATA) { 3398 if (temp == WARM) 3399 return WRITE_LIFE_LONG; 3400 else if (temp == HOT) 3401 return WRITE_LIFE_SHORT; 3402 else if (temp == COLD) 3403 return WRITE_LIFE_EXTREME; 3404 } else if (type == NODE) { 3405 if (temp == WARM || temp == HOT) 3406 return WRITE_LIFE_NOT_SET; 3407 else if (temp == COLD) 3408 return WRITE_LIFE_NONE; 3409 } else if (type == META) { 3410 return WRITE_LIFE_MEDIUM; 3411 } 3412 } 3413 return WRITE_LIFE_NOT_SET; 3414} 3415 3416static int __get_segment_type_2(struct f2fs_io_info *fio) 3417{ 3418 if (fio->type == DATA) 3419 return CURSEG_HOT_DATA; 3420 else 3421 return CURSEG_HOT_NODE; 3422} 3423 3424static int __get_segment_type_4(struct f2fs_io_info *fio) 3425{ 3426 if (fio->type == DATA) { 3427 struct inode *inode = fio->page->mapping->host; 3428 3429 if (S_ISDIR(inode->i_mode)) 3430 return CURSEG_HOT_DATA; 3431 else 3432 return CURSEG_COLD_DATA; 3433 } else { 3434 if (IS_DNODE(fio->page) && is_cold_node(fio->page)) 3435 return CURSEG_WARM_NODE; 3436 else 3437 return CURSEG_COLD_NODE; 3438 } 3439} 3440 3441static int __get_segment_type_6(struct f2fs_io_info *fio) 3442{ 3443 if (fio->type == DATA) { 3444 struct inode *inode = fio->page->mapping->host; 3445 3446 if (is_cold_data(fio->page)) { 3447 if (fio->sbi->am.atgc_enabled) 3448 return CURSEG_ALL_DATA_ATGC; 3449 else 3450 return CURSEG_COLD_DATA; 3451 } 3452 if (file_is_cold(inode) || f2fs_compressed_file(inode)) 3453 return CURSEG_COLD_DATA; 3454 if (file_is_hot(inode) || 3455 is_inode_flag_set(inode, FI_HOT_DATA) || 3456 f2fs_is_atomic_file(inode) || 3457 f2fs_is_volatile_file(inode)) 3458 return CURSEG_HOT_DATA; 3459 return f2fs_rw_hint_to_seg_type(inode->i_write_hint); 3460 } else { 3461 if (IS_DNODE(fio->page)) 3462 return is_cold_node(fio->page) ? CURSEG_WARM_NODE : 3463 CURSEG_HOT_NODE; 3464 return CURSEG_COLD_NODE; 3465 } 3466} 3467 3468static int __get_segment_type(struct f2fs_io_info *fio) 3469{ 3470 int type = 0; 3471 3472 switch (F2FS_OPTION(fio->sbi).active_logs) { 3473 case 2: 3474 type = __get_segment_type_2(fio); 3475 break; 3476 case 4: 3477 type = __get_segment_type_4(fio); 3478 break; 3479 case 6: 3480 type = __get_segment_type_6(fio); 3481 break; 3482 default: 3483 f2fs_bug_on(fio->sbi, true); 3484 } 3485 3486 if (IS_HOT(type)) 3487 fio->temp = HOT; 3488 else if (IS_WARM(type)) 3489 fio->temp = WARM; 3490 else 3491 fio->temp = COLD; 3492 return type; 3493} 3494 3495void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, 3496 block_t old_blkaddr, block_t *new_blkaddr, 3497 struct f2fs_summary *sum, int type, 3498 struct f2fs_io_info *fio, int contig_level) 3499{ 3500 struct sit_info *sit_i = SIT_I(sbi); 3501 struct curseg_info *curseg = CURSEG_I(sbi, type); 3502 unsigned long long old_mtime; 3503 bool from_gc = (type == CURSEG_ALL_DATA_ATGC); 3504 struct seg_entry *se = NULL; 3505#ifdef CONFIG_F2FS_GRADING_SSR 3506 struct inode *inode = NULL; 3507#endif 3508 int contig = SEQ_NONE; 3509 3510 down_read(&SM_I(sbi)->curseg_lock); 3511 3512 mutex_lock(&curseg->curseg_mutex); 3513 down_write(&sit_i->sentry_lock); 3514 3515 if (from_gc) { 3516 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO); 3517 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr)); 3518 sanity_check_seg_type(sbi, se->type); 3519 f2fs_bug_on(sbi, IS_NODESEG(se->type)); 3520 } 3521 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 3522 3523 f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg); 3524 3525 f2fs_wait_discard_bio(sbi, *new_blkaddr); 3526 3527 /* 3528 * __add_sum_entry should be resided under the curseg_mutex 3529 * because, this function updates a summary entry in the 3530 * current summary block. 3531 */ 3532 __add_sum_entry(sbi, type, sum); 3533 3534 __refresh_next_blkoff(sbi, curseg); 3535 3536 stat_inc_block_count(sbi, curseg); 3537 3538 if (from_gc) { 3539 old_mtime = get_segment_mtime(sbi, old_blkaddr); 3540 } else { 3541 update_segment_mtime(sbi, old_blkaddr, 0); 3542 old_mtime = 0; 3543 } 3544 update_segment_mtime(sbi, *new_blkaddr, old_mtime); 3545 3546 /* 3547 * SIT information should be updated before segment allocation, 3548 * since SSR needs latest valid block information. 3549 */ 3550 update_sit_entry(sbi, *new_blkaddr, 1); 3551 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 3552 update_sit_entry(sbi, old_blkaddr, -1); 3553 3554 if (!__has_curseg_space(sbi, curseg)) { 3555 if (from_gc) { 3556 get_atssr_segment(sbi, type, se->type, 3557 AT_SSR, se->mtime); 3558 } else { 3559#ifdef CONFIG_F2FS_GRADING_SSR 3560 if (contig_level != SEQ_NONE) { 3561 contig = contig_level; 3562 goto allocate_label; 3563 } 3564 3565 if (page && page->mapping && page->mapping != NODE_MAPPING(sbi) && 3566 page->mapping != META_MAPPING(sbi)) { 3567 inode = page->mapping->host; 3568 contig = check_io_seq(get_dirty_pages(inode)); 3569 } 3570allocate_label: 3571#endif 3572 sit_i->s_ops->allocate_segment(sbi, type, false, contig); 3573 } 3574 } 3575 /* 3576 * segment dirty status should be updated after segment allocation, 3577 * so we just need to update status only one time after previous 3578 * segment being closed. 3579 */ 3580 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 3581 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); 3582 3583 up_write(&sit_i->sentry_lock); 3584 3585 if (page && IS_NODESEG(type)) { 3586 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 3587 3588 f2fs_inode_chksum_set(sbi, page); 3589 } 3590 3591 if (fio) { 3592 struct f2fs_bio_info *io; 3593 3594 if (F2FS_IO_ALIGNED(sbi)) 3595 fio->retry = false; 3596 3597 INIT_LIST_HEAD(&fio->list); 3598 fio->in_list = true; 3599 io = sbi->write_io[fio->type] + fio->temp; 3600 spin_lock(&io->io_lock); 3601 list_add_tail(&fio->list, &io->io_list); 3602 spin_unlock(&io->io_lock); 3603 } 3604 3605 mutex_unlock(&curseg->curseg_mutex); 3606 3607 up_read(&SM_I(sbi)->curseg_lock); 3608} 3609 3610static void update_device_state(struct f2fs_io_info *fio) 3611{ 3612 struct f2fs_sb_info *sbi = fio->sbi; 3613 unsigned int devidx; 3614 3615 if (!f2fs_is_multi_device(sbi)) 3616 return; 3617 3618 devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); 3619 3620 /* update device state for fsync */ 3621 f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); 3622 3623 /* update device state for checkpoint */ 3624 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { 3625 spin_lock(&sbi->dev_lock); 3626 f2fs_set_bit(devidx, (char *)&sbi->dirty_device); 3627 spin_unlock(&sbi->dev_lock); 3628 } 3629} 3630 3631static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 3632{ 3633 int type = __get_segment_type(fio); 3634 bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); 3635 3636 if (keep_order) 3637 down_read(&fio->sbi->io_order_lock); 3638reallocate: 3639 f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, 3640 &fio->new_blkaddr, sum, type, fio, SEQ_NONE); 3641 if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) 3642 invalidate_mapping_pages(META_MAPPING(fio->sbi), 3643 fio->old_blkaddr, fio->old_blkaddr); 3644 3645 /* writeout dirty page into bdev */ 3646 f2fs_submit_page_write(fio); 3647 if (fio->retry) { 3648 fio->old_blkaddr = fio->new_blkaddr; 3649 goto reallocate; 3650 } 3651 3652 update_device_state(fio); 3653 3654 if (keep_order) 3655 up_read(&fio->sbi->io_order_lock); 3656} 3657 3658void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, 3659 enum iostat_type io_type) 3660{ 3661 struct f2fs_io_info fio = { 3662 .sbi = sbi, 3663 .type = META, 3664 .temp = HOT, 3665 .op = REQ_OP_WRITE, 3666 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, 3667 .old_blkaddr = page->index, 3668 .new_blkaddr = page->index, 3669 .page = page, 3670 .encrypted_page = NULL, 3671 .in_list = false, 3672 }; 3673 3674 if (unlikely(page->index >= MAIN_BLKADDR(sbi))) 3675 fio.op_flags &= ~REQ_META; 3676 3677 set_page_writeback(page); 3678 ClearPageError(page); 3679 f2fs_submit_page_write(&fio); 3680 3681 stat_inc_meta_count(sbi, page->index); 3682 f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); 3683} 3684 3685void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) 3686{ 3687 struct f2fs_summary sum; 3688 3689 set_summary(&sum, nid, 0, 0); 3690 do_write_page(&sum, fio); 3691 3692 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); 3693} 3694 3695void f2fs_outplace_write_data(struct dnode_of_data *dn, 3696 struct f2fs_io_info *fio) 3697{ 3698 struct f2fs_sb_info *sbi = fio->sbi; 3699 struct f2fs_summary sum; 3700 3701 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); 3702 set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); 3703 do_write_page(&sum, fio); 3704 f2fs_update_data_blkaddr(dn, fio->new_blkaddr); 3705 3706 f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); 3707} 3708 3709int f2fs_inplace_write_data(struct f2fs_io_info *fio) 3710{ 3711 int err; 3712 struct f2fs_sb_info *sbi = fio->sbi; 3713 unsigned int segno; 3714 3715 fio->new_blkaddr = fio->old_blkaddr; 3716 /* i/o temperature is needed for passing down write hints */ 3717 __get_segment_type(fio); 3718 3719 segno = GET_SEGNO(sbi, fio->new_blkaddr); 3720 3721 if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { 3722 set_sbi_flag(sbi, SBI_NEED_FSCK); 3723 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", 3724 __func__, segno); 3725 return -EFSCORRUPTED; 3726 } 3727 3728 stat_inc_inplace_blocks(fio->sbi); 3729 3730 if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE))) 3731 err = f2fs_merge_page_bio(fio); 3732 else 3733 err = f2fs_submit_page_bio(fio); 3734 if (!err) { 3735 update_device_state(fio); 3736 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); 3737 } 3738 3739 return err; 3740} 3741 3742static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, 3743 unsigned int segno) 3744{ 3745 int i; 3746 3747 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { 3748 if (CURSEG_I(sbi, i)->segno == segno) 3749 break; 3750 } 3751 return i; 3752} 3753 3754void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 3755 block_t old_blkaddr, block_t new_blkaddr, 3756 bool recover_curseg, bool recover_newaddr, 3757 bool from_gc) 3758{ 3759 struct sit_info *sit_i = SIT_I(sbi); 3760 struct curseg_info *curseg; 3761 unsigned int segno, old_cursegno; 3762 struct seg_entry *se; 3763 int type; 3764 unsigned short old_blkoff; 3765 3766 segno = GET_SEGNO(sbi, new_blkaddr); 3767 se = get_seg_entry(sbi, segno); 3768 type = se->type; 3769 3770 down_write(&SM_I(sbi)->curseg_lock); 3771 3772 if (!recover_curseg) { 3773 /* for recovery flow */ 3774 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { 3775 if (old_blkaddr == NULL_ADDR) 3776 type = CURSEG_COLD_DATA; 3777 else 3778 type = CURSEG_WARM_DATA; 3779 } 3780 } else { 3781 if (IS_CURSEG(sbi, segno)) { 3782 /* se->type is volatile as SSR allocation */ 3783 type = __f2fs_get_curseg(sbi, segno); 3784 f2fs_bug_on(sbi, type == NO_CHECK_TYPE); 3785 } else { 3786 type = CURSEG_WARM_DATA; 3787 } 3788 } 3789 3790 f2fs_bug_on(sbi, !IS_DATASEG(type)); 3791 curseg = CURSEG_I(sbi, type); 3792 3793 mutex_lock(&curseg->curseg_mutex); 3794 down_write(&sit_i->sentry_lock); 3795 3796 old_cursegno = curseg->segno; 3797 old_blkoff = curseg->next_blkoff; 3798 3799 /* change the current segment */ 3800 if (segno != curseg->segno) { 3801 curseg->next_segno = segno; 3802 change_curseg(sbi, type, true); 3803 } 3804 3805 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); 3806 __add_sum_entry(sbi, type, sum); 3807 3808 if (!recover_curseg || recover_newaddr) { 3809 if (!from_gc) 3810 update_segment_mtime(sbi, new_blkaddr, 0); 3811 update_sit_entry(sbi, new_blkaddr, 1); 3812 } 3813 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { 3814 invalidate_mapping_pages(META_MAPPING(sbi), 3815 old_blkaddr, old_blkaddr); 3816 if (!from_gc) 3817 update_segment_mtime(sbi, old_blkaddr, 0); 3818 update_sit_entry(sbi, old_blkaddr, -1); 3819 } 3820 3821 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 3822 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); 3823 3824 locate_dirty_segment(sbi, old_cursegno); 3825 3826 if (recover_curseg) { 3827 if (old_cursegno != curseg->segno) { 3828 curseg->next_segno = old_cursegno; 3829 change_curseg(sbi, type, true); 3830 } 3831 curseg->next_blkoff = old_blkoff; 3832 } 3833 3834 up_write(&sit_i->sentry_lock); 3835 mutex_unlock(&curseg->curseg_mutex); 3836 up_write(&SM_I(sbi)->curseg_lock); 3837} 3838 3839void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, 3840 block_t old_addr, block_t new_addr, 3841 unsigned char version, bool recover_curseg, 3842 bool recover_newaddr) 3843{ 3844 struct f2fs_summary sum; 3845 3846 set_summary(&sum, dn->nid, dn->ofs_in_node, version); 3847 3848 f2fs_do_replace_block(sbi, &sum, old_addr, new_addr, 3849 recover_curseg, recover_newaddr, false); 3850 3851 f2fs_update_data_blkaddr(dn, new_addr); 3852} 3853 3854void f2fs_wait_on_page_writeback(struct page *page, 3855 enum page_type type, bool ordered, bool locked) 3856{ 3857 if (PageWriteback(page)) { 3858 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 3859 3860 /* submit cached LFS IO */ 3861 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); 3862 /* sbumit cached IPU IO */ 3863 f2fs_submit_merged_ipu_write(sbi, NULL, page); 3864 if (ordered) { 3865 wait_on_page_writeback(page); 3866 f2fs_bug_on(sbi, locked && PageWriteback(page)); 3867 } else { 3868 wait_for_stable_page(page); 3869 } 3870 } 3871} 3872 3873void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) 3874{ 3875 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3876 struct page *cpage; 3877 3878 if (!f2fs_post_read_required(inode)) 3879 return; 3880 3881 if (!__is_valid_data_blkaddr(blkaddr)) 3882 return; 3883 3884 cpage = find_lock_page(META_MAPPING(sbi), blkaddr); 3885 if (cpage) { 3886 f2fs_wait_on_page_writeback(cpage, DATA, true, true); 3887 f2fs_put_page(cpage, 1); 3888 } 3889} 3890 3891void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, 3892 block_t len) 3893{ 3894 block_t i; 3895 3896 for (i = 0; i < len; i++) 3897 f2fs_wait_on_block_writeback(inode, blkaddr + i); 3898} 3899 3900static int read_compacted_summaries(struct f2fs_sb_info *sbi) 3901{ 3902 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 3903 struct curseg_info *seg_i; 3904 unsigned char *kaddr; 3905 struct page *page; 3906 block_t start; 3907 int i, j, offset; 3908 3909 start = start_sum_block(sbi); 3910 3911 page = f2fs_get_meta_page(sbi, start++); 3912 if (IS_ERR(page)) 3913 return PTR_ERR(page); 3914 kaddr = (unsigned char *)page_address(page); 3915 3916 /* Step 1: restore nat cache */ 3917 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 3918 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE); 3919 3920 /* Step 2: restore sit cache */ 3921 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 3922 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE); 3923 offset = 2 * SUM_JOURNAL_SIZE; 3924 3925 /* Step 3: restore summary entries */ 3926 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 3927 unsigned short blk_off; 3928 unsigned int segno; 3929 3930 seg_i = CURSEG_I(sbi, i); 3931 segno = le32_to_cpu(ckpt->cur_data_segno[i]); 3932 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); 3933 seg_i->next_segno = segno; 3934 reset_curseg(sbi, i, 0); 3935 seg_i->alloc_type = ckpt->alloc_type[i]; 3936 seg_i->next_blkoff = blk_off; 3937 3938 if (seg_i->alloc_type == SSR) 3939 blk_off = sbi->blocks_per_seg; 3940 3941 for (j = 0; j < blk_off; j++) { 3942 struct f2fs_summary *s; 3943 s = (struct f2fs_summary *)(kaddr + offset); 3944 seg_i->sum_blk->entries[j] = *s; 3945 offset += SUMMARY_SIZE; 3946 if (offset + SUMMARY_SIZE <= PAGE_SIZE - 3947 SUM_FOOTER_SIZE) 3948 continue; 3949 3950 f2fs_put_page(page, 1); 3951 page = NULL; 3952 3953 page = f2fs_get_meta_page(sbi, start++); 3954 if (IS_ERR(page)) 3955 return PTR_ERR(page); 3956 kaddr = (unsigned char *)page_address(page); 3957 offset = 0; 3958 } 3959 } 3960 f2fs_put_page(page, 1); 3961 return 0; 3962} 3963 3964static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) 3965{ 3966 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 3967 struct f2fs_summary_block *sum; 3968 struct curseg_info *curseg; 3969 struct page *new; 3970 unsigned short blk_off; 3971 unsigned int segno = 0; 3972 block_t blk_addr = 0; 3973 int err = 0; 3974 3975 /* get segment number and block addr */ 3976 if (IS_DATASEG(type)) { 3977 segno = le32_to_cpu(ckpt->cur_data_segno[type]); 3978 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - 3979 CURSEG_HOT_DATA]); 3980 if (__exist_node_summaries(sbi)) 3981 blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type); 3982 else 3983 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); 3984 } else { 3985 segno = le32_to_cpu(ckpt->cur_node_segno[type - 3986 CURSEG_HOT_NODE]); 3987 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - 3988 CURSEG_HOT_NODE]); 3989 if (__exist_node_summaries(sbi)) 3990 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, 3991 type - CURSEG_HOT_NODE); 3992 else 3993 blk_addr = GET_SUM_BLOCK(sbi, segno); 3994 } 3995 3996 new = f2fs_get_meta_page(sbi, blk_addr); 3997 if (IS_ERR(new)) 3998 return PTR_ERR(new); 3999 sum = (struct f2fs_summary_block *)page_address(new); 4000 4001 if (IS_NODESEG(type)) { 4002 if (__exist_node_summaries(sbi)) { 4003 struct f2fs_summary *ns = &sum->entries[0]; 4004 int i; 4005 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { 4006 ns->version = 0; 4007 ns->ofs_in_node = 0; 4008 } 4009 } else { 4010 err = f2fs_restore_node_summary(sbi, segno, sum); 4011 if (err) 4012 goto out; 4013 } 4014 } 4015 4016 /* set uncompleted segment to curseg */ 4017 curseg = CURSEG_I(sbi, type); 4018 mutex_lock(&curseg->curseg_mutex); 4019 4020 /* update journal info */ 4021 down_write(&curseg->journal_rwsem); 4022 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE); 4023 up_write(&curseg->journal_rwsem); 4024 4025 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE); 4026 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE); 4027 curseg->next_segno = segno; 4028 reset_curseg(sbi, type, 0); 4029 curseg->alloc_type = ckpt->alloc_type[type]; 4030 curseg->next_blkoff = blk_off; 4031 mutex_unlock(&curseg->curseg_mutex); 4032out: 4033 f2fs_put_page(new, 1); 4034 return err; 4035} 4036 4037static int restore_curseg_summaries(struct f2fs_sb_info *sbi) 4038{ 4039 struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal; 4040 struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal; 4041 int type = CURSEG_HOT_DATA; 4042 int err; 4043 4044 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { 4045 int npages = f2fs_npages_for_summary_flush(sbi, true); 4046 4047 if (npages >= 2) 4048 f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages, 4049 META_CP, true); 4050 4051 /* restore for compacted data summary */ 4052 err = read_compacted_summaries(sbi); 4053 if (err) 4054 return err; 4055 type = CURSEG_HOT_NODE; 4056 } 4057 4058 if (__exist_node_summaries(sbi)) 4059 f2fs_ra_meta_pages(sbi, 4060 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type), 4061 NR_CURSEG_PERSIST_TYPE - type, META_CP, true); 4062 4063 for (; type <= CURSEG_COLD_NODE; type++) { 4064 err = read_normal_summaries(sbi, type); 4065 if (err) 4066 return err; 4067 } 4068 4069 /* sanity check for summary blocks */ 4070 if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || 4071 sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { 4072 f2fs_err(sbi, "invalid journal entries nats %u sits %u\n", 4073 nats_in_cursum(nat_j), sits_in_cursum(sit_j)); 4074 return -EINVAL; 4075 } 4076 4077 return 0; 4078} 4079 4080static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) 4081{ 4082 struct page *page; 4083 unsigned char *kaddr; 4084 struct f2fs_summary *summary; 4085 struct curseg_info *seg_i; 4086 int written_size = 0; 4087 int i, j; 4088 4089 page = f2fs_grab_meta_page(sbi, blkaddr++); 4090 kaddr = (unsigned char *)page_address(page); 4091 memset(kaddr, 0, PAGE_SIZE); 4092 4093 /* Step 1: write nat cache */ 4094 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 4095 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE); 4096 written_size += SUM_JOURNAL_SIZE; 4097 4098 /* Step 2: write sit cache */ 4099 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 4100 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE); 4101 written_size += SUM_JOURNAL_SIZE; 4102 4103 /* Step 3: write summary entries */ 4104 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 4105 unsigned short blkoff; 4106 seg_i = CURSEG_I(sbi, i); 4107 if (sbi->ckpt->alloc_type[i] == SSR) 4108 blkoff = sbi->blocks_per_seg; 4109 else 4110 blkoff = curseg_blkoff(sbi, i); 4111 4112 for (j = 0; j < blkoff; j++) { 4113 if (!page) { 4114 page = f2fs_grab_meta_page(sbi, blkaddr++); 4115 kaddr = (unsigned char *)page_address(page); 4116 memset(kaddr, 0, PAGE_SIZE); 4117 written_size = 0; 4118 } 4119 summary = (struct f2fs_summary *)(kaddr + written_size); 4120 *summary = seg_i->sum_blk->entries[j]; 4121 written_size += SUMMARY_SIZE; 4122 4123 if (written_size + SUMMARY_SIZE <= PAGE_SIZE - 4124 SUM_FOOTER_SIZE) 4125 continue; 4126 4127 set_page_dirty(page); 4128 f2fs_put_page(page, 1); 4129 page = NULL; 4130 } 4131 } 4132 if (page) { 4133 set_page_dirty(page); 4134 f2fs_put_page(page, 1); 4135 } 4136} 4137 4138static void write_normal_summaries(struct f2fs_sb_info *sbi, 4139 block_t blkaddr, int type) 4140{ 4141 int i, end; 4142 if (IS_DATASEG(type)) 4143 end = type + NR_CURSEG_DATA_TYPE; 4144 else 4145 end = type + NR_CURSEG_NODE_TYPE; 4146 4147 for (i = type; i < end; i++) 4148 write_current_sum_page(sbi, i, blkaddr + (i - type)); 4149} 4150 4151void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 4152{ 4153 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) 4154 write_compacted_summaries(sbi, start_blk); 4155 else 4156 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); 4157} 4158 4159void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 4160{ 4161 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); 4162} 4163 4164int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, 4165 unsigned int val, int alloc) 4166{ 4167 int i; 4168 4169 if (type == NAT_JOURNAL) { 4170 for (i = 0; i < nats_in_cursum(journal); i++) { 4171 if (le32_to_cpu(nid_in_journal(journal, i)) == val) 4172 return i; 4173 } 4174 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL)) 4175 return update_nats_in_cursum(journal, 1); 4176 } else if (type == SIT_JOURNAL) { 4177 for (i = 0; i < sits_in_cursum(journal); i++) 4178 if (le32_to_cpu(segno_in_journal(journal, i)) == val) 4179 return i; 4180 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL)) 4181 return update_sits_in_cursum(journal, 1); 4182 } 4183 return -1; 4184} 4185 4186static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, 4187 unsigned int segno) 4188{ 4189 return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno)); 4190} 4191 4192static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, 4193 unsigned int start) 4194{ 4195 struct sit_info *sit_i = SIT_I(sbi); 4196 struct page *page; 4197 pgoff_t src_off, dst_off; 4198 4199 src_off = current_sit_addr(sbi, start); 4200 dst_off = next_sit_addr(sbi, src_off); 4201 4202 page = f2fs_grab_meta_page(sbi, dst_off); 4203 seg_info_to_sit_page(sbi, page, start); 4204 4205 set_page_dirty(page); 4206 set_to_next_sit(sit_i, start); 4207 4208 return page; 4209} 4210 4211static struct sit_entry_set *grab_sit_entry_set(void) 4212{ 4213 struct sit_entry_set *ses = 4214 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS); 4215 4216 ses->entry_cnt = 0; 4217 INIT_LIST_HEAD(&ses->set_list); 4218 return ses; 4219} 4220 4221static void release_sit_entry_set(struct sit_entry_set *ses) 4222{ 4223 list_del(&ses->set_list); 4224 kmem_cache_free(sit_entry_set_slab, ses); 4225} 4226 4227static void adjust_sit_entry_set(struct sit_entry_set *ses, 4228 struct list_head *head) 4229{ 4230 struct sit_entry_set *next = ses; 4231 4232 if (list_is_last(&ses->set_list, head)) 4233 return; 4234 4235 list_for_each_entry_continue(next, head, set_list) 4236 if (ses->entry_cnt <= next->entry_cnt) 4237 break; 4238 4239 list_move_tail(&ses->set_list, &next->set_list); 4240} 4241 4242static void add_sit_entry(unsigned int segno, struct list_head *head) 4243{ 4244 struct sit_entry_set *ses; 4245 unsigned int start_segno = START_SEGNO(segno); 4246 4247 list_for_each_entry(ses, head, set_list) { 4248 if (ses->start_segno == start_segno) { 4249 ses->entry_cnt++; 4250 adjust_sit_entry_set(ses, head); 4251 return; 4252 } 4253 } 4254 4255 ses = grab_sit_entry_set(); 4256 4257 ses->start_segno = start_segno; 4258 ses->entry_cnt++; 4259 list_add(&ses->set_list, head); 4260} 4261 4262static void add_sits_in_set(struct f2fs_sb_info *sbi) 4263{ 4264 struct f2fs_sm_info *sm_info = SM_I(sbi); 4265 struct list_head *set_list = &sm_info->sit_entry_set; 4266 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; 4267 unsigned int segno; 4268 4269 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) 4270 add_sit_entry(segno, set_list); 4271} 4272 4273static void remove_sits_in_journal(struct f2fs_sb_info *sbi) 4274{ 4275 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4276 struct f2fs_journal *journal = curseg->journal; 4277 int i; 4278 4279 down_write(&curseg->journal_rwsem); 4280 for (i = 0; i < sits_in_cursum(journal); i++) { 4281 unsigned int segno; 4282 bool dirtied; 4283 4284 segno = le32_to_cpu(segno_in_journal(journal, i)); 4285 dirtied = __mark_sit_entry_dirty(sbi, segno); 4286 4287 if (!dirtied) 4288 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); 4289 } 4290 update_sits_in_cursum(journal, -i); 4291 up_write(&curseg->journal_rwsem); 4292} 4293 4294/* 4295 * CP calls this function, which flushes SIT entries including sit_journal, 4296 * and moves prefree segs to free segs. 4297 */ 4298void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) 4299{ 4300 struct sit_info *sit_i = SIT_I(sbi); 4301 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 4302 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4303 struct f2fs_journal *journal = curseg->journal; 4304 struct sit_entry_set *ses, *tmp; 4305 struct list_head *head = &SM_I(sbi)->sit_entry_set; 4306 bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); 4307 struct seg_entry *se; 4308 4309 down_write(&sit_i->sentry_lock); 4310 4311 if (!sit_i->dirty_sentries) 4312 goto out; 4313 4314 /* 4315 * add and account sit entries of dirty bitmap in sit entry 4316 * set temporarily 4317 */ 4318 add_sits_in_set(sbi); 4319 4320 /* 4321 * if there are no enough space in journal to store dirty sit 4322 * entries, remove all entries from journal and add and account 4323 * them in sit entry set. 4324 */ 4325 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || 4326 !to_journal) 4327 remove_sits_in_journal(sbi); 4328 4329 /* 4330 * there are two steps to flush sit entries: 4331 * #1, flush sit entries to journal in current cold data summary block. 4332 * #2, flush sit entries to sit page. 4333 */ 4334 list_for_each_entry_safe(ses, tmp, head, set_list) { 4335 struct page *page = NULL; 4336 struct f2fs_sit_block *raw_sit = NULL; 4337 unsigned int start_segno = ses->start_segno; 4338 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, 4339 (unsigned long)MAIN_SEGS(sbi)); 4340 unsigned int segno = start_segno; 4341 4342 if (to_journal && 4343 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL)) 4344 to_journal = false; 4345 4346 if (to_journal) { 4347 down_write(&curseg->journal_rwsem); 4348 } else { 4349 page = get_next_sit_page(sbi, start_segno); 4350 raw_sit = page_address(page); 4351 } 4352 4353 /* flush dirty sit entries in region of current sit set */ 4354 for_each_set_bit_from(segno, bitmap, end) { 4355 int offset, sit_offset; 4356 4357 se = get_seg_entry(sbi, segno); 4358#ifdef CONFIG_F2FS_CHECK_FS 4359 if (memcmp(se->cur_valid_map, se->cur_valid_map_mir, 4360 SIT_VBLOCK_MAP_SIZE)) 4361 f2fs_bug_on(sbi, 1); 4362#endif 4363 4364 /* add discard candidates */ 4365 if (!(cpc->reason & CP_DISCARD)) { 4366 cpc->trim_start = segno; 4367 add_discard_addrs(sbi, cpc, false); 4368 } 4369 4370 if (to_journal) { 4371 offset = f2fs_lookup_journal_in_cursum(journal, 4372 SIT_JOURNAL, segno, 1); 4373 f2fs_bug_on(sbi, offset < 0); 4374 segno_in_journal(journal, offset) = 4375 cpu_to_le32(segno); 4376 seg_info_to_raw_sit(se, 4377 &sit_in_journal(journal, offset)); 4378 check_block_count(sbi, segno, 4379 &sit_in_journal(journal, offset)); 4380 } else { 4381 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 4382 seg_info_to_raw_sit(se, 4383 &raw_sit->entries[sit_offset]); 4384 check_block_count(sbi, segno, 4385 &raw_sit->entries[sit_offset]); 4386 } 4387 4388 __clear_bit(segno, bitmap); 4389 sit_i->dirty_sentries--; 4390 ses->entry_cnt--; 4391 } 4392 4393 if (to_journal) 4394 up_write(&curseg->journal_rwsem); 4395 else 4396 f2fs_put_page(page, 1); 4397 4398 f2fs_bug_on(sbi, ses->entry_cnt); 4399 release_sit_entry_set(ses); 4400 } 4401 4402 f2fs_bug_on(sbi, !list_empty(head)); 4403 f2fs_bug_on(sbi, sit_i->dirty_sentries); 4404out: 4405 if (cpc->reason & CP_DISCARD) { 4406 __u64 trim_start = cpc->trim_start; 4407 4408 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) 4409 add_discard_addrs(sbi, cpc, false); 4410 4411 cpc->trim_start = trim_start; 4412 } 4413 up_write(&sit_i->sentry_lock); 4414 4415 set_prefree_as_free_segments(sbi); 4416} 4417 4418static int build_sit_info(struct f2fs_sb_info *sbi) 4419{ 4420 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 4421 struct sit_info *sit_i; 4422 unsigned int sit_segs, start; 4423 char *src_bitmap, *bitmap; 4424 unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size; 4425 4426 /* allocate memory for SIT information */ 4427 sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); 4428 if (!sit_i) 4429 return -ENOMEM; 4430 4431 SM_I(sbi)->sit_info = sit_i; 4432 4433 sit_i->sentries = 4434 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry), 4435 MAIN_SEGS(sbi)), 4436 GFP_KERNEL); 4437 if (!sit_i->sentries) 4438 return -ENOMEM; 4439 4440 main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4441 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size, 4442 GFP_KERNEL); 4443 if (!sit_i->dirty_sentries_bitmap) 4444 return -ENOMEM; 4445 4446#ifdef CONFIG_F2FS_CHECK_FS 4447 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4; 4448#else 4449 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3; 4450#endif 4451 sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); 4452 if (!sit_i->bitmap) 4453 return -ENOMEM; 4454 4455 bitmap = sit_i->bitmap; 4456 4457 for (start = 0; start < MAIN_SEGS(sbi); start++) { 4458 sit_i->sentries[start].cur_valid_map = bitmap; 4459 bitmap += SIT_VBLOCK_MAP_SIZE; 4460 4461 sit_i->sentries[start].ckpt_valid_map = bitmap; 4462 bitmap += SIT_VBLOCK_MAP_SIZE; 4463 4464#ifdef CONFIG_F2FS_CHECK_FS 4465 sit_i->sentries[start].cur_valid_map_mir = bitmap; 4466 bitmap += SIT_VBLOCK_MAP_SIZE; 4467#endif 4468 4469 sit_i->sentries[start].discard_map = bitmap; 4470 bitmap += SIT_VBLOCK_MAP_SIZE; 4471 } 4472 4473 sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 4474 if (!sit_i->tmp_map) 4475 return -ENOMEM; 4476 4477 if (__is_large_section(sbi)) { 4478 sit_i->sec_entries = 4479 f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), 4480 MAIN_SECS(sbi)), 4481 GFP_KERNEL); 4482 if (!sit_i->sec_entries) 4483 return -ENOMEM; 4484 } 4485 4486 /* get information related with SIT */ 4487 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; 4488 4489 /* setup SIT bitmap from ckeckpoint pack */ 4490 sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP); 4491 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); 4492 4493 sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); 4494 if (!sit_i->sit_bitmap) 4495 return -ENOMEM; 4496 4497#ifdef CONFIG_F2FS_CHECK_FS 4498 sit_i->sit_bitmap_mir = kmemdup(src_bitmap, 4499 sit_bitmap_size, GFP_KERNEL); 4500 if (!sit_i->sit_bitmap_mir) 4501 return -ENOMEM; 4502 4503 sit_i->invalid_segmap = f2fs_kvzalloc(sbi, 4504 main_bitmap_size, GFP_KERNEL); 4505 if (!sit_i->invalid_segmap) 4506 return -ENOMEM; 4507#endif 4508 4509 /* init SIT information */ 4510 sit_i->s_ops = &default_salloc_ops; 4511 4512 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); 4513 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; 4514 sit_i->written_valid_blocks = 0; 4515 sit_i->bitmap_size = sit_bitmap_size; 4516 sit_i->dirty_sentries = 0; 4517 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; 4518 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); 4519 sit_i->mounted_time = ktime_get_boottime_seconds(); 4520 init_rwsem(&sit_i->sentry_lock); 4521 return 0; 4522} 4523 4524static int build_free_segmap(struct f2fs_sb_info *sbi) 4525{ 4526 struct free_segmap_info *free_i; 4527 unsigned int bitmap_size, sec_bitmap_size; 4528 4529 /* allocate memory for free segmap information */ 4530 free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL); 4531 if (!free_i) 4532 return -ENOMEM; 4533 4534 SM_I(sbi)->free_info = free_i; 4535 4536 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4537 free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL); 4538 if (!free_i->free_segmap) 4539 return -ENOMEM; 4540 4541 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4542 free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL); 4543 if (!free_i->free_secmap) 4544 return -ENOMEM; 4545 4546 /* set all segments as dirty temporarily */ 4547 memset(free_i->free_segmap, 0xff, bitmap_size); 4548 memset(free_i->free_secmap, 0xff, sec_bitmap_size); 4549 4550 /* init free segmap information */ 4551 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); 4552 free_i->free_segments = 0; 4553 free_i->free_sections = 0; 4554 spin_lock_init(&free_i->segmap_lock); 4555 return 0; 4556} 4557 4558static int build_curseg(struct f2fs_sb_info *sbi) 4559{ 4560 struct curseg_info *array; 4561 int i; 4562 4563 array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, 4564 sizeof(*array)), GFP_KERNEL); 4565 if (!array) 4566 return -ENOMEM; 4567 4568 SM_I(sbi)->curseg_array = array; 4569 4570 for (i = 0; i < NO_CHECK_TYPE; i++) { 4571 mutex_init(&array[i].curseg_mutex); 4572 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL); 4573 if (!array[i].sum_blk) 4574 return -ENOMEM; 4575 init_rwsem(&array[i].journal_rwsem); 4576 array[i].journal = f2fs_kzalloc(sbi, 4577 sizeof(struct f2fs_journal), GFP_KERNEL); 4578 if (!array[i].journal) 4579 return -ENOMEM; 4580 if (i < NR_PERSISTENT_LOG) 4581 array[i].seg_type = CURSEG_HOT_DATA + i; 4582 else if (i == CURSEG_COLD_DATA_PINNED) 4583 array[i].seg_type = CURSEG_COLD_DATA; 4584 else if (i == CURSEG_ALL_DATA_ATGC) 4585 array[i].seg_type = CURSEG_COLD_DATA; 4586 array[i].segno = NULL_SEGNO; 4587 array[i].next_blkoff = 0; 4588 array[i].inited = false; 4589 } 4590 return restore_curseg_summaries(sbi); 4591} 4592 4593static int build_sit_entries(struct f2fs_sb_info *sbi) 4594{ 4595 struct sit_info *sit_i = SIT_I(sbi); 4596 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4597 struct f2fs_journal *journal = curseg->journal; 4598 struct seg_entry *se; 4599 struct f2fs_sit_entry sit; 4600 int sit_blk_cnt = SIT_BLK_CNT(sbi); 4601 unsigned int i, start, end; 4602 unsigned int readed, start_blk = 0; 4603 int err = 0; 4604 block_t sit_valid_blocks[2] = {0, 0}; 4605 4606 do { 4607 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, 4608 META_SIT, true); 4609 4610 start = start_blk * sit_i->sents_per_block; 4611 end = (start_blk + readed) * sit_i->sents_per_block; 4612 4613 for (; start < end && start < MAIN_SEGS(sbi); start++) { 4614 struct f2fs_sit_block *sit_blk; 4615 struct page *page; 4616 4617 se = &sit_i->sentries[start]; 4618 page = get_current_sit_page(sbi, start); 4619 if (IS_ERR(page)) 4620 return PTR_ERR(page); 4621 sit_blk = (struct f2fs_sit_block *)page_address(page); 4622 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; 4623 f2fs_put_page(page, 1); 4624 4625 err = check_block_count(sbi, start, &sit); 4626 if (err) 4627 return err; 4628 seg_info_from_raw_sit(se, &sit); 4629 4630 if (se->type >= NR_PERSISTENT_LOG) { 4631 f2fs_err(sbi, "Invalid segment type: %u, segno: %u", 4632 se->type, start); 4633 return -EFSCORRUPTED; 4634 } 4635 4636 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; 4637 4638 /* build discard map only one time */ 4639 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 4640 memset(se->discard_map, 0xff, 4641 SIT_VBLOCK_MAP_SIZE); 4642 } else { 4643 memcpy(se->discard_map, 4644 se->cur_valid_map, 4645 SIT_VBLOCK_MAP_SIZE); 4646 sbi->discard_blks += 4647 sbi->blocks_per_seg - 4648 se->valid_blocks; 4649 } 4650 4651 if (__is_large_section(sbi)) 4652 get_sec_entry(sbi, start)->valid_blocks += 4653 se->valid_blocks; 4654 } 4655 start_blk += readed; 4656 } while (start_blk < sit_blk_cnt); 4657 4658 down_read(&curseg->journal_rwsem); 4659 for (i = 0; i < sits_in_cursum(journal); i++) { 4660 unsigned int old_valid_blocks; 4661 4662 start = le32_to_cpu(segno_in_journal(journal, i)); 4663 if (start >= MAIN_SEGS(sbi)) { 4664 f2fs_err(sbi, "Wrong journal entry on segno %u", 4665 start); 4666 err = -EFSCORRUPTED; 4667 break; 4668 } 4669 4670 se = &sit_i->sentries[start]; 4671 sit = sit_in_journal(journal, i); 4672 4673 old_valid_blocks = se->valid_blocks; 4674 4675 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks; 4676 4677 err = check_block_count(sbi, start, &sit); 4678 if (err) 4679 break; 4680 seg_info_from_raw_sit(se, &sit); 4681 4682 if (se->type >= NR_PERSISTENT_LOG) { 4683 f2fs_err(sbi, "Invalid segment type: %u, segno: %u", 4684 se->type, start); 4685 err = -EFSCORRUPTED; 4686 break; 4687 } 4688 4689 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; 4690 4691 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 4692 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); 4693 } else { 4694 memcpy(se->discard_map, se->cur_valid_map, 4695 SIT_VBLOCK_MAP_SIZE); 4696 sbi->discard_blks += old_valid_blocks; 4697 sbi->discard_blks -= se->valid_blocks; 4698 } 4699 4700 if (__is_large_section(sbi)) { 4701 get_sec_entry(sbi, start)->valid_blocks += 4702 se->valid_blocks; 4703 get_sec_entry(sbi, start)->valid_blocks -= 4704 old_valid_blocks; 4705 } 4706 } 4707 up_read(&curseg->journal_rwsem); 4708 4709 if (err) 4710 return err; 4711 4712 if (sit_valid_blocks[NODE] != valid_node_count(sbi)) { 4713 f2fs_err(sbi, "SIT is corrupted node# %u vs %u", 4714 sit_valid_blocks[NODE], valid_node_count(sbi)); 4715 return -EFSCORRUPTED; 4716 } 4717 4718 if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] > 4719 valid_user_blocks(sbi)) { 4720 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u", 4721 sit_valid_blocks[DATA], sit_valid_blocks[NODE], 4722 valid_user_blocks(sbi)); 4723 return -EFSCORRUPTED; 4724 } 4725 4726 return 0; 4727} 4728 4729static void init_free_segmap(struct f2fs_sb_info *sbi) 4730{ 4731 unsigned int start; 4732 int type; 4733 struct seg_entry *sentry; 4734 4735 for (start = 0; start < MAIN_SEGS(sbi); start++) { 4736 if (f2fs_usable_blks_in_seg(sbi, start) == 0) 4737 continue; 4738 sentry = get_seg_entry(sbi, start); 4739 if (!sentry->valid_blocks) 4740 __set_free(sbi, start); 4741 else 4742 SIT_I(sbi)->written_valid_blocks += 4743 sentry->valid_blocks; 4744 } 4745 4746 /* set use the current segments */ 4747 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { 4748 struct curseg_info *curseg_t = CURSEG_I(sbi, type); 4749 __set_test_and_inuse(sbi, curseg_t->segno); 4750 } 4751} 4752 4753static void init_dirty_segmap(struct f2fs_sb_info *sbi) 4754{ 4755 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 4756 struct free_segmap_info *free_i = FREE_I(sbi); 4757 unsigned int segno = 0, offset = 0, secno; 4758 block_t valid_blocks, usable_blks_in_seg; 4759 block_t blks_per_sec = BLKS_PER_SEC(sbi); 4760 4761 while (1) { 4762 /* find dirty segment based on free segmap */ 4763 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); 4764 if (segno >= MAIN_SEGS(sbi)) 4765 break; 4766 offset = segno + 1; 4767 valid_blocks = get_valid_blocks(sbi, segno, false); 4768 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno); 4769 if (valid_blocks == usable_blks_in_seg || !valid_blocks) 4770 continue; 4771 if (valid_blocks > usable_blks_in_seg) { 4772 f2fs_bug_on(sbi, 1); 4773 continue; 4774 } 4775 mutex_lock(&dirty_i->seglist_lock); 4776 __locate_dirty_segment(sbi, segno, DIRTY); 4777 mutex_unlock(&dirty_i->seglist_lock); 4778 } 4779 4780 if (!__is_large_section(sbi)) 4781 return; 4782 4783 mutex_lock(&dirty_i->seglist_lock); 4784 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 4785 valid_blocks = get_valid_blocks(sbi, segno, true); 4786 secno = GET_SEC_FROM_SEG(sbi, segno); 4787 4788 if (!valid_blocks || valid_blocks == blks_per_sec) 4789 continue; 4790 if (IS_CURSEC(sbi, secno)) 4791 continue; 4792 set_bit(secno, dirty_i->dirty_secmap); 4793 } 4794 mutex_unlock(&dirty_i->seglist_lock); 4795} 4796 4797static int init_victim_secmap(struct f2fs_sb_info *sbi) 4798{ 4799 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 4800 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4801 4802 dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); 4803 if (!dirty_i->victim_secmap) 4804 return -ENOMEM; 4805 return 0; 4806} 4807 4808static int build_dirty_segmap(struct f2fs_sb_info *sbi) 4809{ 4810 struct dirty_seglist_info *dirty_i; 4811 unsigned int bitmap_size, i; 4812 4813 /* allocate memory for dirty segments list information */ 4814 dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info), 4815 GFP_KERNEL); 4816 if (!dirty_i) 4817 return -ENOMEM; 4818 4819 SM_I(sbi)->dirty_info = dirty_i; 4820 mutex_init(&dirty_i->seglist_lock); 4821 4822 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4823 4824 for (i = 0; i < NR_DIRTY_TYPE; i++) { 4825 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size, 4826 GFP_KERNEL); 4827 if (!dirty_i->dirty_segmap[i]) 4828 return -ENOMEM; 4829 } 4830 4831 if (__is_large_section(sbi)) { 4832 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4833 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi, 4834 bitmap_size, GFP_KERNEL); 4835 if (!dirty_i->dirty_secmap) 4836 return -ENOMEM; 4837 } 4838 4839 init_dirty_segmap(sbi); 4840 return init_victim_secmap(sbi); 4841} 4842 4843static int sanity_check_curseg(struct f2fs_sb_info *sbi) 4844{ 4845 int i; 4846 4847 /* 4848 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr; 4849 * In LFS curseg, all blkaddr after .next_blkoff should be unused. 4850 */ 4851 for (i = 0; i < NR_PERSISTENT_LOG; i++) { 4852 struct curseg_info *curseg = CURSEG_I(sbi, i); 4853 struct seg_entry *se = get_seg_entry(sbi, curseg->segno); 4854 unsigned int blkofs = curseg->next_blkoff; 4855 4856 sanity_check_seg_type(sbi, curseg->seg_type); 4857 4858 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) { 4859 f2fs_err(sbi, 4860 "Current segment has invalid alloc_type:%d", 4861 curseg->alloc_type); 4862 return -EFSCORRUPTED; 4863 } 4864 4865 if (f2fs_test_bit(blkofs, se->cur_valid_map)) 4866 goto out; 4867 4868 if (curseg->alloc_type == SSR) 4869 continue; 4870 4871 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { 4872 if (!f2fs_test_bit(blkofs, se->cur_valid_map)) 4873 continue; 4874out: 4875 f2fs_err(sbi, 4876 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", 4877 i, curseg->segno, curseg->alloc_type, 4878 curseg->next_blkoff, blkofs); 4879 return -EFSCORRUPTED; 4880 } 4881 } 4882 return 0; 4883} 4884 4885#ifdef CONFIG_BLK_DEV_ZONED 4886 4887static int check_zone_write_pointer(struct f2fs_sb_info *sbi, 4888 struct f2fs_dev_info *fdev, 4889 struct blk_zone *zone) 4890{ 4891 unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno; 4892 block_t zone_block, wp_block, last_valid_block; 4893 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; 4894 int i, s, b, ret; 4895 struct seg_entry *se; 4896 4897 if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) 4898 return 0; 4899 4900 wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block); 4901 wp_segno = GET_SEGNO(sbi, wp_block); 4902 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); 4903 zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); 4904 zone_segno = GET_SEGNO(sbi, zone_block); 4905 zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno); 4906 4907 if (zone_segno >= MAIN_SEGS(sbi)) 4908 return 0; 4909 4910 /* 4911 * Skip check of zones cursegs point to, since 4912 * fix_curseg_write_pointer() checks them. 4913 */ 4914 for (i = 0; i < NO_CHECK_TYPE; i++) 4915 if (zone_secno == GET_SEC_FROM_SEG(sbi, 4916 CURSEG_I(sbi, i)->segno)) 4917 return 0; 4918 4919 /* 4920 * Get last valid block of the zone. 4921 */ 4922 last_valid_block = zone_block - 1; 4923 for (s = sbi->segs_per_sec - 1; s >= 0; s--) { 4924 segno = zone_segno + s; 4925 se = get_seg_entry(sbi, segno); 4926 for (b = sbi->blocks_per_seg - 1; b >= 0; b--) 4927 if (f2fs_test_bit(b, se->cur_valid_map)) { 4928 last_valid_block = START_BLOCK(sbi, segno) + b; 4929 break; 4930 } 4931 if (last_valid_block >= zone_block) 4932 break; 4933 } 4934 4935 /* 4936 * If last valid block is beyond the write pointer, report the 4937 * inconsistency. This inconsistency does not cause write error 4938 * because the zone will not be selected for write operation until 4939 * it get discarded. Just report it. 4940 */ 4941 if (last_valid_block >= wp_block) { 4942 f2fs_notice(sbi, "Valid block beyond write pointer: " 4943 "valid block[0x%x,0x%x] wp[0x%x,0x%x]", 4944 GET_SEGNO(sbi, last_valid_block), 4945 GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), 4946 wp_segno, wp_blkoff); 4947 return 0; 4948 } 4949 4950 /* 4951 * If there is no valid block in the zone and if write pointer is 4952 * not at zone start, reset the write pointer. 4953 */ 4954 if (last_valid_block + 1 == zone_block && zone->wp != zone->start) { 4955 f2fs_notice(sbi, 4956 "Zone without valid block has non-zero write " 4957 "pointer. Reset the write pointer: wp[0x%x,0x%x]", 4958 wp_segno, wp_blkoff); 4959 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, 4960 zone->len >> log_sectors_per_block); 4961 if (ret) { 4962 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", 4963 fdev->path, ret); 4964 return ret; 4965 } 4966 } 4967 4968 return 0; 4969} 4970 4971static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi, 4972 block_t zone_blkaddr) 4973{ 4974 int i; 4975 4976 for (i = 0; i < sbi->s_ndevs; i++) { 4977 if (!bdev_is_zoned(FDEV(i).bdev)) 4978 continue; 4979 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr && 4980 zone_blkaddr <= FDEV(i).end_blk)) 4981 return &FDEV(i); 4982 } 4983 4984 return NULL; 4985} 4986 4987static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, 4988 void *data) { 4989 memcpy(data, zone, sizeof(struct blk_zone)); 4990 return 0; 4991} 4992 4993static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) 4994{ 4995 struct curseg_info *cs = CURSEG_I(sbi, type); 4996 struct f2fs_dev_info *zbd; 4997 struct blk_zone zone; 4998 unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; 4999 block_t cs_zone_block, wp_block; 5000 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; 5001 sector_t zone_sector; 5002 int err; 5003 5004 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); 5005 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); 5006 5007 zbd = get_target_zoned_dev(sbi, cs_zone_block); 5008 if (!zbd) 5009 return 0; 5010 5011 /* report zone for the sector the curseg points to */ 5012 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) 5013 << log_sectors_per_block; 5014 err = blkdev_report_zones(zbd->bdev, zone_sector, 1, 5015 report_one_zone_cb, &zone); 5016 if (err != 1) { 5017 f2fs_err(sbi, "Report zone failed: %s errno=(%d)", 5018 zbd->path, err); 5019 return err; 5020 } 5021 5022 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) 5023 return 0; 5024 5025 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); 5026 wp_segno = GET_SEGNO(sbi, wp_block); 5027 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); 5028 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); 5029 5030 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && 5031 wp_sector_off == 0) 5032 return 0; 5033 5034 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: " 5035 "curseg[0x%x,0x%x] wp[0x%x,0x%x]", 5036 type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); 5037 5038 f2fs_notice(sbi, "Assign new section to curseg[%d]: " 5039 "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); 5040 allocate_segment_by_default(sbi, type, true, SEQ_NONE); 5041 5042 /* check consistency of the zone curseg pointed to */ 5043 if (check_zone_write_pointer(sbi, zbd, &zone)) 5044 return -EIO; 5045 5046 /* check newly assigned zone */ 5047 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); 5048 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); 5049 5050 zbd = get_target_zoned_dev(sbi, cs_zone_block); 5051 if (!zbd) 5052 return 0; 5053 5054 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) 5055 << log_sectors_per_block; 5056 err = blkdev_report_zones(zbd->bdev, zone_sector, 1, 5057 report_one_zone_cb, &zone); 5058 if (err != 1) { 5059 f2fs_err(sbi, "Report zone failed: %s errno=(%d)", 5060 zbd->path, err); 5061 return err; 5062 } 5063 5064 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) 5065 return 0; 5066 5067 if (zone.wp != zone.start) { 5068 f2fs_notice(sbi, 5069 "New zone for curseg[%d] is not yet discarded. " 5070 "Reset the zone: curseg[0x%x,0x%x]", 5071 type, cs->segno, cs->next_blkoff); 5072 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, 5073 zone_sector >> log_sectors_per_block, 5074 zone.len >> log_sectors_per_block); 5075 if (err) { 5076 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", 5077 zbd->path, err); 5078 return err; 5079 } 5080 } 5081 5082 return 0; 5083} 5084 5085int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) 5086{ 5087 int i, ret; 5088 5089 for (i = 0; i < NR_PERSISTENT_LOG; i++) { 5090 ret = fix_curseg_write_pointer(sbi, i); 5091 if (ret) 5092 return ret; 5093 } 5094 5095 return 0; 5096} 5097 5098struct check_zone_write_pointer_args { 5099 struct f2fs_sb_info *sbi; 5100 struct f2fs_dev_info *fdev; 5101}; 5102 5103static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, 5104 void *data) { 5105 struct check_zone_write_pointer_args *args; 5106 args = (struct check_zone_write_pointer_args *)data; 5107 5108 return check_zone_write_pointer(args->sbi, args->fdev, zone); 5109} 5110 5111int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) 5112{ 5113 int i, ret; 5114 struct check_zone_write_pointer_args args; 5115 5116 for (i = 0; i < sbi->s_ndevs; i++) { 5117 if (!bdev_is_zoned(FDEV(i).bdev)) 5118 continue; 5119 5120 args.sbi = sbi; 5121 args.fdev = &FDEV(i); 5122 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES, 5123 check_zone_write_pointer_cb, &args); 5124 if (ret < 0) 5125 return ret; 5126 } 5127 5128 return 0; 5129} 5130 5131/* 5132 * Return the number of usable blocks in a segment. The number of blocks 5133 * returned is always equal to the number of blocks in a segment for 5134 * segments fully contained within a sequential zone capacity or a 5135 * conventional zone. For segments partially contained in a sequential 5136 * zone capacity, the number of usable blocks up to the zone capacity 5137 * is returned. 0 is returned in all other cases. 5138 */ 5139static inline unsigned int f2fs_usable_zone_blks_in_seg( 5140 struct f2fs_sb_info *sbi, unsigned int segno) 5141{ 5142 block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr; 5143 unsigned int secno; 5144 5145 if (!sbi->unusable_blocks_per_sec) 5146 return sbi->blocks_per_seg; 5147 5148 secno = GET_SEC_FROM_SEG(sbi, segno); 5149 seg_start = START_BLOCK(sbi, segno); 5150 sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); 5151 sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi); 5152 5153 /* 5154 * If segment starts before zone capacity and spans beyond 5155 * zone capacity, then usable blocks are from seg start to 5156 * zone capacity. If the segment starts after the zone capacity, 5157 * then there are no usable blocks. 5158 */ 5159 if (seg_start >= sec_cap_blkaddr) 5160 return 0; 5161 if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr) 5162 return sec_cap_blkaddr - seg_start; 5163 5164 return sbi->blocks_per_seg; 5165} 5166#else 5167int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) 5168{ 5169 return 0; 5170} 5171 5172int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) 5173{ 5174 return 0; 5175} 5176 5177static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi, 5178 unsigned int segno) 5179{ 5180 return 0; 5181} 5182 5183#endif 5184unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, 5185 unsigned int segno) 5186{ 5187 if (f2fs_sb_has_blkzoned(sbi)) 5188 return f2fs_usable_zone_blks_in_seg(sbi, segno); 5189 5190 return sbi->blocks_per_seg; 5191} 5192 5193unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, 5194 unsigned int segno) 5195{ 5196 if (f2fs_sb_has_blkzoned(sbi)) 5197 return CAP_SEGS_PER_SEC(sbi); 5198 5199 return sbi->segs_per_sec; 5200} 5201 5202/* 5203 * Update min, max modified time for cost-benefit GC algorithm 5204 */ 5205static void init_min_max_mtime(struct f2fs_sb_info *sbi) 5206{ 5207 struct sit_info *sit_i = SIT_I(sbi); 5208 unsigned int segno; 5209 5210 down_write(&sit_i->sentry_lock); 5211 5212 sit_i->min_mtime = ULLONG_MAX; 5213 5214 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 5215 unsigned int i; 5216 unsigned long long mtime = 0; 5217 5218 for (i = 0; i < sbi->segs_per_sec; i++) 5219 mtime += get_seg_entry(sbi, segno + i)->mtime; 5220 5221 mtime = div_u64(mtime, sbi->segs_per_sec); 5222 5223 if (sit_i->min_mtime > mtime) 5224 sit_i->min_mtime = mtime; 5225 } 5226 sit_i->max_mtime = get_mtime(sbi, false); 5227 sit_i->dirty_max_mtime = 0; 5228 up_write(&sit_i->sentry_lock); 5229} 5230 5231int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) 5232{ 5233 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 5234 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 5235 struct f2fs_sm_info *sm_info; 5236 int err; 5237 5238 sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL); 5239 if (!sm_info) 5240 return -ENOMEM; 5241 5242 /* init sm info */ 5243 sbi->sm_info = sm_info; 5244 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 5245 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); 5246 sm_info->segment_count = le32_to_cpu(raw_super->segment_count); 5247 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); 5248 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 5249 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 5250 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 5251 sm_info->rec_prefree_segments = sm_info->main_segments * 5252 DEF_RECLAIM_PREFREE_SEGMENTS / 100; 5253 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) 5254 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; 5255 5256 if (!f2fs_lfs_mode(sbi)) 5257 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; 5258 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 5259 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 5260 sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec; 5261 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; 5262 sm_info->min_ssr_sections = reserved_sections(sbi); 5263 5264 INIT_LIST_HEAD(&sm_info->sit_entry_set); 5265 5266 init_rwsem(&sm_info->curseg_lock); 5267 5268 if (!f2fs_readonly(sbi->sb)) { 5269 err = f2fs_create_flush_cmd_control(sbi); 5270 if (err) 5271 return err; 5272 } 5273 5274 err = create_discard_cmd_control(sbi); 5275 if (err) 5276 return err; 5277 5278 err = build_sit_info(sbi); 5279 if (err) 5280 return err; 5281 err = build_free_segmap(sbi); 5282 if (err) 5283 return err; 5284 err = build_curseg(sbi); 5285 if (err) 5286 return err; 5287 5288 /* reinit free segmap based on SIT */ 5289 err = build_sit_entries(sbi); 5290 if (err) 5291 return err; 5292 5293 init_free_segmap(sbi); 5294 err = build_dirty_segmap(sbi); 5295 if (err) 5296 return err; 5297 5298 err = sanity_check_curseg(sbi); 5299 if (err) 5300 return err; 5301 5302 init_min_max_mtime(sbi); 5303 return 0; 5304} 5305 5306static void discard_dirty_segmap(struct f2fs_sb_info *sbi, 5307 enum dirty_type dirty_type) 5308{ 5309 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5310 5311 mutex_lock(&dirty_i->seglist_lock); 5312 kvfree(dirty_i->dirty_segmap[dirty_type]); 5313 dirty_i->nr_dirty[dirty_type] = 0; 5314 mutex_unlock(&dirty_i->seglist_lock); 5315} 5316 5317static void destroy_victim_secmap(struct f2fs_sb_info *sbi) 5318{ 5319 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5320 kvfree(dirty_i->victim_secmap); 5321} 5322 5323static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) 5324{ 5325 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5326 int i; 5327 5328 if (!dirty_i) 5329 return; 5330 5331 /* discard pre-free/dirty segments list */ 5332 for (i = 0; i < NR_DIRTY_TYPE; i++) 5333 discard_dirty_segmap(sbi, i); 5334 5335 if (__is_large_section(sbi)) { 5336 mutex_lock(&dirty_i->seglist_lock); 5337 kvfree(dirty_i->dirty_secmap); 5338 mutex_unlock(&dirty_i->seglist_lock); 5339 } 5340 5341 destroy_victim_secmap(sbi); 5342 SM_I(sbi)->dirty_info = NULL; 5343 kfree(dirty_i); 5344} 5345 5346static void destroy_curseg(struct f2fs_sb_info *sbi) 5347{ 5348 struct curseg_info *array = SM_I(sbi)->curseg_array; 5349 int i; 5350 5351 if (!array) 5352 return; 5353 SM_I(sbi)->curseg_array = NULL; 5354 for (i = 0; i < NR_CURSEG_TYPE; i++) { 5355 kfree(array[i].sum_blk); 5356 kfree(array[i].journal); 5357 } 5358 kfree(array); 5359} 5360 5361static void destroy_free_segmap(struct f2fs_sb_info *sbi) 5362{ 5363 struct free_segmap_info *free_i = SM_I(sbi)->free_info; 5364 if (!free_i) 5365 return; 5366 SM_I(sbi)->free_info = NULL; 5367 kvfree(free_i->free_segmap); 5368 kvfree(free_i->free_secmap); 5369 kfree(free_i); 5370} 5371 5372static void destroy_sit_info(struct f2fs_sb_info *sbi) 5373{ 5374 struct sit_info *sit_i = SIT_I(sbi); 5375 5376 if (!sit_i) 5377 return; 5378 5379 if (sit_i->sentries) 5380 kvfree(sit_i->bitmap); 5381 kfree(sit_i->tmp_map); 5382 5383 kvfree(sit_i->sentries); 5384 kvfree(sit_i->sec_entries); 5385 kvfree(sit_i->dirty_sentries_bitmap); 5386 5387 SM_I(sbi)->sit_info = NULL; 5388 kvfree(sit_i->sit_bitmap); 5389#ifdef CONFIG_F2FS_CHECK_FS 5390 kvfree(sit_i->sit_bitmap_mir); 5391 kvfree(sit_i->invalid_segmap); 5392#endif 5393 kfree(sit_i); 5394} 5395 5396void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) 5397{ 5398 struct f2fs_sm_info *sm_info = SM_I(sbi); 5399 5400 if (!sm_info) 5401 return; 5402 f2fs_destroy_flush_cmd_control(sbi, true); 5403 destroy_discard_cmd_control(sbi); 5404 destroy_dirty_segmap(sbi); 5405 destroy_curseg(sbi); 5406 destroy_free_segmap(sbi); 5407 destroy_sit_info(sbi); 5408 sbi->sm_info = NULL; 5409 kfree(sm_info); 5410} 5411 5412int __init f2fs_create_segment_manager_caches(void) 5413{ 5414 discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry", 5415 sizeof(struct discard_entry)); 5416 if (!discard_entry_slab) 5417 goto fail; 5418 5419 discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd", 5420 sizeof(struct discard_cmd)); 5421 if (!discard_cmd_slab) 5422 goto destroy_discard_entry; 5423 5424 sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set", 5425 sizeof(struct sit_entry_set)); 5426 if (!sit_entry_set_slab) 5427 goto destroy_discard_cmd; 5428 5429 inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry", 5430 sizeof(struct inmem_pages)); 5431 if (!inmem_entry_slab) 5432 goto destroy_sit_entry_set; 5433 return 0; 5434 5435destroy_sit_entry_set: 5436 kmem_cache_destroy(sit_entry_set_slab); 5437destroy_discard_cmd: 5438 kmem_cache_destroy(discard_cmd_slab); 5439destroy_discard_entry: 5440 kmem_cache_destroy(discard_entry_slab); 5441fail: 5442 return -ENOMEM; 5443} 5444 5445void f2fs_destroy_segment_manager_caches(void) 5446{ 5447 kmem_cache_destroy(sit_entry_set_slab); 5448 kmem_cache_destroy(discard_cmd_slab); 5449 kmem_cache_destroy(discard_entry_slab); 5450 kmem_cache_destroy(inmem_entry_slab); 5451} 5452