162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * fs/hmdfs/client_writeback.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (c) 2020-2021 Huawei Device Co., Ltd. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/backing-dev.h> 962306a36Sopenharmony_ci#include <linux/file.h> 1062306a36Sopenharmony_ci#include <linux/fs.h> 1162306a36Sopenharmony_ci#include <linux/page-flags.h> 1262306a36Sopenharmony_ci#include <linux/pagemap.h> 1362306a36Sopenharmony_ci#include <linux/pagevec.h> 1462306a36Sopenharmony_ci#include <linux/sched/signal.h> 1562306a36Sopenharmony_ci#include <linux/slab.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include "hmdfs.h" 1862306a36Sopenharmony_ci#include "hmdfs_trace.h" 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci/* 200ms */ 2162306a36Sopenharmony_ci#define HMDFS_MAX_PAUSE max((HZ / 5), 1) 2262306a36Sopenharmony_ci#define HMDFS_BANDWIDTH_INTERVAL max((HZ / 5), 1) 2362306a36Sopenharmony_ci/* Dirty type */ 2462306a36Sopenharmony_ci#define HMDFS_DIRTY_FS 0 2562306a36Sopenharmony_ci#define HMDFS_DIRTY_FILE 1 2662306a36Sopenharmony_ci/* Exceed flags */ 2762306a36Sopenharmony_ci#define HMDFS_FS_EXCEED (1 << HMDFS_DIRTY_FS) 2862306a36Sopenharmony_ci#define HMDFS_FILE_EXCEED (1 << HMDFS_DIRTY_FILE) 2962306a36Sopenharmony_ci/* Ratelimit calculate shift */ 3062306a36Sopenharmony_ci#define HMDFS_LIMIT_SHIFT 10 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_civoid hmdfs_writeback_inodes_sb_handler(struct work_struct *work) 3362306a36Sopenharmony_ci{ 3462306a36Sopenharmony_ci struct hmdfs_writeback *hwb = container_of( 3562306a36Sopenharmony_ci work, struct hmdfs_writeback, dirty_sb_writeback_work.work); 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci try_to_writeback_inodes_sb(hwb->sbi->sb, WB_REASON_FS_FREE_SPACE); 3862306a36Sopenharmony_ci} 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_civoid hmdfs_writeback_inode_handler(struct work_struct *work) 4162306a36Sopenharmony_ci{ 4262306a36Sopenharmony_ci struct hmdfs_inode_info *info = NULL; 4362306a36Sopenharmony_ci struct inode *inode = NULL; 4462306a36Sopenharmony_ci struct hmdfs_writeback *hwb = container_of( 4562306a36Sopenharmony_ci work, struct hmdfs_writeback, dirty_inode_writeback_work.work); 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci spin_lock(&hwb->inode_list_lock); 4862306a36Sopenharmony_ci while (likely(!list_empty(&hwb->inode_list_head))) { 4962306a36Sopenharmony_ci info = list_first_entry(&hwb->inode_list_head, 5062306a36Sopenharmony_ci struct hmdfs_inode_info, wb_list); 5162306a36Sopenharmony_ci list_del_init(&info->wb_list); 5262306a36Sopenharmony_ci spin_unlock(&hwb->inode_list_lock); 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci inode = &info->vfs_inode; 5562306a36Sopenharmony_ci write_inode_now(inode, 0); 5662306a36Sopenharmony_ci iput(inode); 5762306a36Sopenharmony_ci spin_lock(&hwb->inode_list_lock); 5862306a36Sopenharmony_ci } 5962306a36Sopenharmony_ci spin_unlock(&hwb->inode_list_lock); 6062306a36Sopenharmony_ci} 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic void hmdfs_writeback_inodes_sb_delayed(struct super_block *sb, 6362306a36Sopenharmony_ci unsigned int delay) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci struct hmdfs_sb_info *sbi = sb->s_fs_info; 6662306a36Sopenharmony_ci unsigned long timeout; 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci timeout = msecs_to_jiffies(delay); 6962306a36Sopenharmony_ci if (!timeout || !work_busy(&sbi->h_wb->dirty_sb_writeback_work.work)) 7062306a36Sopenharmony_ci mod_delayed_work(sbi->h_wb->dirty_sb_writeback_wq, 7162306a36Sopenharmony_ci &sbi->h_wb->dirty_sb_writeback_work, timeout); 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_cistatic inline void hmdfs_writeback_inodes_sb(struct super_block *sb) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci hmdfs_writeback_inodes_sb_delayed(sb, 0); 7762306a36Sopenharmony_ci} 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_cistatic void hmdfs_writeback_inode(struct super_block *sb, struct inode *inode) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci struct hmdfs_sb_info *sbi = sb->s_fs_info; 8262306a36Sopenharmony_ci struct hmdfs_writeback *hwb = sbi->h_wb; 8362306a36Sopenharmony_ci struct hmdfs_inode_info *info = hmdfs_i(inode); 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci spin_lock(&hwb->inode_list_lock); 8662306a36Sopenharmony_ci if (list_empty(&info->wb_list)) { 8762306a36Sopenharmony_ci ihold(inode); 8862306a36Sopenharmony_ci list_add_tail(&info->wb_list, &hwb->inode_list_head); 8962306a36Sopenharmony_ci queue_delayed_work(hwb->dirty_inode_writeback_wq, 9062306a36Sopenharmony_ci &hwb->dirty_inode_writeback_work, 0); 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci spin_unlock(&hwb->inode_list_lock); 9362306a36Sopenharmony_ci} 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_cistatic unsigned long hmdfs_idirty_pages(struct inode *inode, int tag) 9662306a36Sopenharmony_ci{ 9762306a36Sopenharmony_ci#if KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE 9862306a36Sopenharmony_ci struct folio_batch fbatch; 9962306a36Sopenharmony_ci#else 10062306a36Sopenharmony_ci struct pagevec pvec; 10162306a36Sopenharmony_ci#endif 10262306a36Sopenharmony_ci unsigned long nr_dirty_pages = 0; 10362306a36Sopenharmony_ci pgoff_t index = 0; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci#if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE 10662306a36Sopenharmony_ci#if KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE 10762306a36Sopenharmony_ci folio_batch_init(&fbatch); 10862306a36Sopenharmony_ci#else 10962306a36Sopenharmony_ci pagevec_init(&pvec); 11062306a36Sopenharmony_ci#endif 11162306a36Sopenharmony_ci#else 11262306a36Sopenharmony_ci pagevec_init(&pvec, 0); 11362306a36Sopenharmony_ci#endif 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci#if KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE 11662306a36Sopenharmony_ci while (filemap_get_folios_tag(inode->i_mapping, &index, 11762306a36Sopenharmony_ci (pgoff_t)-1, tag, &fbatch)) { 11862306a36Sopenharmony_ci for (int i = 0; i < fbatch.nr; i++) { 11962306a36Sopenharmony_ci struct folio *folio = fbatch.folios[i]; 12062306a36Sopenharmony_ci if (folio_test_dirty(folio) || folio_test_writeback(folio)) { 12162306a36Sopenharmony_ci nr_dirty_pages++; 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci folio_batch_release(&fbatch); 12562306a36Sopenharmony_ci cond_resched(); 12662306a36Sopenharmony_ci } 12762306a36Sopenharmony_ci#else 12862306a36Sopenharmony_ci while (pagevec_lookup_tag(&pvec, inode->i_mapping, &index, tag)) { 12962306a36Sopenharmony_ci nr_dirty_pages += pagevec_count(&pvec); 13062306a36Sopenharmony_ci pagevec_release(&pvec); 13162306a36Sopenharmony_ci cond_resched(); 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci#endif 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci return nr_dirty_pages; 13662306a36Sopenharmony_ci} 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_cistatic inline unsigned long hmdfs_ratio_thresh(unsigned long ratio, 13962306a36Sopenharmony_ci unsigned long thresh) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci unsigned long ret = (ratio * thresh) >> HMDFS_LIMIT_SHIFT; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci return (ret == 0) ? 1 : ret; 14462306a36Sopenharmony_ci} 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_cistatic inline unsigned long hmdfs_thresh_ratio(unsigned long base, 14762306a36Sopenharmony_ci unsigned long thresh) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci unsigned long ratio = (base << HMDFS_LIMIT_SHIFT) / thresh; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci return (ratio == 0) ? 1 : ratio; 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_civoid hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb) 15562306a36Sopenharmony_ci{ 15662306a36Sopenharmony_ci hwb->dirty_fs_thresh = DIV_ROUND_UP(hwb->dirty_fs_bytes, PAGE_SIZE); 15762306a36Sopenharmony_ci hwb->dirty_file_thresh = DIV_ROUND_UP(hwb->dirty_file_bytes, PAGE_SIZE); 15862306a36Sopenharmony_ci hwb->dirty_fs_bg_thresh = 15962306a36Sopenharmony_ci DIV_ROUND_UP(hwb->dirty_fs_bg_bytes, PAGE_SIZE); 16062306a36Sopenharmony_ci hwb->dirty_file_bg_thresh = 16162306a36Sopenharmony_ci DIV_ROUND_UP(hwb->dirty_file_bg_bytes, PAGE_SIZE); 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci hwb->fs_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_fs_bg_thresh, 16462306a36Sopenharmony_ci hwb->dirty_fs_thresh); 16562306a36Sopenharmony_ci hwb->file_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_file_bg_thresh, 16662306a36Sopenharmony_ci hwb->dirty_file_thresh); 16762306a36Sopenharmony_ci hwb->fs_file_ratio = hmdfs_thresh_ratio(hwb->dirty_file_thresh, 16862306a36Sopenharmony_ci hwb->dirty_fs_thresh); 16962306a36Sopenharmony_ci} 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_cistatic void hmdfs_init_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci struct hmdfs_writeback *hwb = hdtc->hwb; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci hdtc->fs_thresh = hdtc->hwb->dirty_fs_thresh; 17662306a36Sopenharmony_ci hdtc->file_thresh = hdtc->hwb->dirty_file_thresh; 17762306a36Sopenharmony_ci hdtc->fs_bg_thresh = hdtc->hwb->dirty_fs_bg_thresh; 17862306a36Sopenharmony_ci hdtc->file_bg_thresh = hdtc->hwb->dirty_file_bg_thresh; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci if (!hwb->dirty_auto_threshold) 18162306a36Sopenharmony_ci return; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci /* 18462306a36Sopenharmony_ci * Init thresh according the previous bandwidth adjusted thresh, 18562306a36Sopenharmony_ci * thresh should be no more than setting thresh. 18662306a36Sopenharmony_ci */ 18762306a36Sopenharmony_ci if (hwb->bw_fs_thresh < hdtc->fs_thresh) { 18862306a36Sopenharmony_ci hdtc->fs_thresh = hwb->bw_fs_thresh; 18962306a36Sopenharmony_ci hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio, 19062306a36Sopenharmony_ci hdtc->fs_thresh); 19162306a36Sopenharmony_ci } 19262306a36Sopenharmony_ci if (hwb->bw_file_thresh < hdtc->file_thresh) { 19362306a36Sopenharmony_ci hdtc->file_thresh = hwb->bw_file_thresh; 19462306a36Sopenharmony_ci hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio, 19562306a36Sopenharmony_ci hdtc->file_thresh); 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci /* 19862306a36Sopenharmony_ci * The thresh should be updated in the first time of dirty pages 19962306a36Sopenharmony_ci * exceed the freerun ceiling. 20062306a36Sopenharmony_ci */ 20162306a36Sopenharmony_ci hdtc->thresh_time_stamp = jiffies - HMDFS_BANDWIDTH_INTERVAL - 1; 20262306a36Sopenharmony_ci} 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_cistatic void hmdfs_update_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc) 20562306a36Sopenharmony_ci{ 20662306a36Sopenharmony_ci struct hmdfs_writeback *hwb = hdtc->hwb; 20762306a36Sopenharmony_ci struct bdi_writeback *wb = hwb->wb; 20862306a36Sopenharmony_ci unsigned int time_limit = hwb->writeback_timelimit; 20962306a36Sopenharmony_ci unsigned long bw = wb->avg_write_bandwidth; 21062306a36Sopenharmony_ci unsigned long thresh; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci if (!hwb->dirty_auto_threshold) 21362306a36Sopenharmony_ci return; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci spin_lock(&hwb->write_bandwidth_lock); 21662306a36Sopenharmony_ci if (bw > hwb->max_write_bandwidth) 21762306a36Sopenharmony_ci hwb->max_write_bandwidth = bw; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci if (bw < hwb->min_write_bandwidth) 22062306a36Sopenharmony_ci hwb->min_write_bandwidth = bw; 22162306a36Sopenharmony_ci hwb->avg_write_bandwidth = bw; 22262306a36Sopenharmony_ci spin_unlock(&hwb->write_bandwidth_lock); 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci /* 22562306a36Sopenharmony_ci * If the bandwidth is lower than the lower limit, it may propably 22662306a36Sopenharmony_ci * offline, there is meaningless to set such a lower thresh. 22762306a36Sopenharmony_ci */ 22862306a36Sopenharmony_ci bw = max(bw, hwb->bw_thresh_lowerlimit); 22962306a36Sopenharmony_ci thresh = bw * time_limit / roundup_pow_of_two(HZ); 23062306a36Sopenharmony_ci if (thresh >= hwb->dirty_fs_thresh) { 23162306a36Sopenharmony_ci hdtc->fs_thresh = hwb->dirty_fs_thresh; 23262306a36Sopenharmony_ci hdtc->file_thresh = hwb->dirty_file_thresh; 23362306a36Sopenharmony_ci hdtc->fs_bg_thresh = hwb->dirty_fs_bg_thresh; 23462306a36Sopenharmony_ci hdtc->file_bg_thresh = hwb->dirty_file_bg_thresh; 23562306a36Sopenharmony_ci } else { 23662306a36Sopenharmony_ci /* Adjust thresh according to current bandwidth */ 23762306a36Sopenharmony_ci hdtc->fs_thresh = thresh; 23862306a36Sopenharmony_ci hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio, 23962306a36Sopenharmony_ci hdtc->fs_thresh); 24062306a36Sopenharmony_ci hdtc->file_thresh = hmdfs_ratio_thresh(hwb->fs_file_ratio, 24162306a36Sopenharmony_ci hdtc->fs_thresh); 24262306a36Sopenharmony_ci hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio, 24362306a36Sopenharmony_ci hdtc->file_thresh); 24462306a36Sopenharmony_ci } 24562306a36Sopenharmony_ci /* Save bandwidth adjusted thresh */ 24662306a36Sopenharmony_ci hwb->bw_fs_thresh = hdtc->fs_thresh; 24762306a36Sopenharmony_ci hwb->bw_file_thresh = hdtc->file_thresh; 24862306a36Sopenharmony_ci /* Update time stamp */ 24962306a36Sopenharmony_ci hdtc->thresh_time_stamp = jiffies; 25062306a36Sopenharmony_ci} 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_civoid hmdfs_update_ratelimit(struct hmdfs_writeback *hwb) 25362306a36Sopenharmony_ci{ 25462306a36Sopenharmony_ci struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb}; 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci hmdfs_init_dirty_limit(&hdtc); 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci /* hdtc.file_bg_thresh should be the lowest thresh */ 25962306a36Sopenharmony_ci hwb->ratelimit_pages = hdtc.file_bg_thresh / 26062306a36Sopenharmony_ci (num_online_cpus() * HMDFS_RATELIMIT_PAGES_GAP); 26162306a36Sopenharmony_ci if (hwb->ratelimit_pages < HMDFS_MIN_RATELIMIT_PAGES) 26262306a36Sopenharmony_ci hwb->ratelimit_pages = HMDFS_MIN_RATELIMIT_PAGES; 26362306a36Sopenharmony_ci} 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci/* This is a copy of wb_max_pause() */ 26662306a36Sopenharmony_cistatic unsigned long hmdfs_wb_pause(struct bdi_writeback *wb, 26762306a36Sopenharmony_ci unsigned long wb_dirty) 26862306a36Sopenharmony_ci{ 26962306a36Sopenharmony_ci unsigned long bw = wb->avg_write_bandwidth; 27062306a36Sopenharmony_ci unsigned long t; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci /* 27362306a36Sopenharmony_ci * Limit pause time for small memory systems. If sleeping for too long 27462306a36Sopenharmony_ci * time, a small pool of dirty/writeback pages may go empty and disk go 27562306a36Sopenharmony_ci * idle. 27662306a36Sopenharmony_ci * 27762306a36Sopenharmony_ci * 8 serves as the safety ratio. 27862306a36Sopenharmony_ci */ 27962306a36Sopenharmony_ci t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8)); 28062306a36Sopenharmony_ci t++; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci return min_t(unsigned long, t, HMDFS_MAX_PAUSE); 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_cistatic unsigned long 28662306a36Sopenharmony_cihmdfs_dirty_freerun_ceiling(struct hmdfs_dirty_throttle_control *hdtc, 28762306a36Sopenharmony_ci unsigned int type) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci if (type == HMDFS_DIRTY_FS) 29062306a36Sopenharmony_ci return (hdtc->fs_thresh + hdtc->fs_bg_thresh) / 2; 29162306a36Sopenharmony_ci else /* HMDFS_DIRTY_FILE_TYPE */ 29262306a36Sopenharmony_ci return (hdtc->file_thresh + hdtc->file_bg_thresh) / 2; 29362306a36Sopenharmony_ci} 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci/* This is a copy of dirty_poll_interval() */ 29662306a36Sopenharmony_cistatic inline unsigned long hmdfs_dirty_intv(unsigned long dirty, 29762306a36Sopenharmony_ci unsigned long thresh) 29862306a36Sopenharmony_ci{ 29962306a36Sopenharmony_ci if (thresh > dirty) 30062306a36Sopenharmony_ci return 1UL << (ilog2(thresh - dirty) >> 1); 30162306a36Sopenharmony_ci return 1; 30262306a36Sopenharmony_ci} 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_cistatic void hmdfs_balance_dirty_pages(struct address_space *mapping) 30562306a36Sopenharmony_ci{ 30662306a36Sopenharmony_ci struct inode *inode = mapping->host; 30762306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 30862306a36Sopenharmony_ci struct hmdfs_sb_info *sbi = sb->s_fs_info; 30962306a36Sopenharmony_ci struct hmdfs_writeback *hwb = sbi->h_wb; 31062306a36Sopenharmony_ci struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 31162306a36Sopenharmony_ci struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb}; 31262306a36Sopenharmony_ci unsigned int dirty_exceeded = 0; 31362306a36Sopenharmony_ci unsigned long start_time = jiffies; 31462306a36Sopenharmony_ci unsigned long pause = 0; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci /* Add delay work to trigger timeout writeback */ 31762306a36Sopenharmony_ci if (hwb->dirty_writeback_interval != 0) 31862306a36Sopenharmony_ci hmdfs_writeback_inodes_sb_delayed( 31962306a36Sopenharmony_ci sb, hwb->dirty_writeback_interval * 10); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci hmdfs_init_dirty_limit(&hdtc); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci while (1) { 32462306a36Sopenharmony_ci unsigned long exceed = 0; 32562306a36Sopenharmony_ci unsigned long diff; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci /* Per-filesystem overbalance writeback */ 32862306a36Sopenharmony_ci hdtc.fs_nr_dirty = wb_stat_sum(wb, WB_RECLAIMABLE); 32962306a36Sopenharmony_ci hdtc.fs_nr_reclaimable = 33062306a36Sopenharmony_ci hdtc.fs_nr_dirty + wb_stat_sum(wb, WB_WRITEBACK); 33162306a36Sopenharmony_ci if (hdtc.fs_nr_reclaimable < hdtc.file_bg_thresh) { 33262306a36Sopenharmony_ci diff = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable, 33362306a36Sopenharmony_ci hdtc.file_thresh); 33462306a36Sopenharmony_ci goto free_running; 33562306a36Sopenharmony_ci } 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci /* Per-file overbalance writeback */ 33862306a36Sopenharmony_ci hdtc.file_nr_dirty = 33962306a36Sopenharmony_ci hmdfs_idirty_pages(inode, PAGECACHE_TAG_DIRTY); 34062306a36Sopenharmony_ci hdtc.file_nr_reclaimable = 34162306a36Sopenharmony_ci hmdfs_idirty_pages(inode, PAGECACHE_TAG_WRITEBACK) + 34262306a36Sopenharmony_ci hdtc.file_nr_dirty; 34362306a36Sopenharmony_ci if ((hdtc.fs_nr_reclaimable < 34462306a36Sopenharmony_ci hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) && 34562306a36Sopenharmony_ci (hdtc.file_nr_reclaimable < 34662306a36Sopenharmony_ci hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FILE))) { 34762306a36Sopenharmony_ci unsigned long fs_intv, file_intv; 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci fs_intv = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable, 35062306a36Sopenharmony_ci hdtc.fs_thresh); 35162306a36Sopenharmony_ci file_intv = hmdfs_dirty_intv(hdtc.file_nr_reclaimable, 35262306a36Sopenharmony_ci hdtc.file_thresh); 35362306a36Sopenharmony_ci diff = min(fs_intv, file_intv); 35462306a36Sopenharmony_cifree_running: 35562306a36Sopenharmony_ci current->nr_dirtied_pause = diff; 35662306a36Sopenharmony_ci current->nr_dirtied = 0; 35762306a36Sopenharmony_ci break; 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci if (hdtc.fs_nr_reclaimable >= 36162306a36Sopenharmony_ci hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) { 36262306a36Sopenharmony_ci if (unlikely(!writeback_in_progress(wb))) 36362306a36Sopenharmony_ci hmdfs_writeback_inodes_sb(sb); 36462306a36Sopenharmony_ci } else { 36562306a36Sopenharmony_ci hmdfs_writeback_inode(sb, inode); 36662306a36Sopenharmony_ci } 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci /* 36962306a36Sopenharmony_ci * If dirty_auto_threshold is enabled, recalculate writeback 37062306a36Sopenharmony_ci * thresh according to current bandwidth. Update bandwidth 37162306a36Sopenharmony_ci * could be better if possible, but wb_update_bandwidth() is 37262306a36Sopenharmony_ci * not exported, so we cannot update bandwidth here, so the 37362306a36Sopenharmony_ci * bandwidth' update will be delayed if writing a lot to a 37462306a36Sopenharmony_ci * single file. 37562306a36Sopenharmony_ci */ 37662306a36Sopenharmony_ci if (hwb->dirty_auto_threshold && 37762306a36Sopenharmony_ci time_is_before_jiffies(hdtc.thresh_time_stamp + 37862306a36Sopenharmony_ci HMDFS_BANDWIDTH_INTERVAL)) 37962306a36Sopenharmony_ci hmdfs_update_dirty_limit(&hdtc); 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci if (unlikely(hdtc.fs_nr_reclaimable >= hdtc.fs_thresh)) 38262306a36Sopenharmony_ci exceed |= HMDFS_FS_EXCEED; 38362306a36Sopenharmony_ci if (unlikely(hdtc.file_nr_reclaimable >= hdtc.file_thresh)) 38462306a36Sopenharmony_ci exceed |= HMDFS_FILE_EXCEED; 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci if (!exceed) { 38762306a36Sopenharmony_ci trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, 38862306a36Sopenharmony_ci 0UL, start_time); 38962306a36Sopenharmony_ci current->nr_dirtied = 0; 39062306a36Sopenharmony_ci break; 39162306a36Sopenharmony_ci } 39262306a36Sopenharmony_ci /* 39362306a36Sopenharmony_ci * Per-file or per-fs reclaimable pages exceed throttle limit, 39462306a36Sopenharmony_ci * sleep pause time and check again. 39562306a36Sopenharmony_ci */ 39662306a36Sopenharmony_ci dirty_exceeded |= exceed; 39762306a36Sopenharmony_ci if (dirty_exceeded && !hwb->dirty_exceeded) 39862306a36Sopenharmony_ci hwb->dirty_exceeded = true; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci /* Pause */ 40162306a36Sopenharmony_ci pause = hmdfs_wb_pause(wb, hdtc.fs_nr_reclaimable); 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, pause, 40462306a36Sopenharmony_ci start_time); 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci __set_current_state(TASK_KILLABLE); 40762306a36Sopenharmony_ci io_schedule_timeout(pause); 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci if (fatal_signal_pending(current)) 41062306a36Sopenharmony_ci break; 41162306a36Sopenharmony_ci } 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci if (!dirty_exceeded && hwb->dirty_exceeded) 41462306a36Sopenharmony_ci hwb->dirty_exceeded = false; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci if (hdtc.fs_nr_reclaimable >= hdtc.fs_bg_thresh) { 41762306a36Sopenharmony_ci if (unlikely(!writeback_in_progress(wb))) 41862306a36Sopenharmony_ci hmdfs_writeback_inodes_sb(sb); 41962306a36Sopenharmony_ci } else if (hdtc.file_nr_reclaimable >= hdtc.file_bg_thresh) { 42062306a36Sopenharmony_ci hmdfs_writeback_inode(sb, inode); 42162306a36Sopenharmony_ci } 42262306a36Sopenharmony_ci} 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_civoid hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; 42762306a36Sopenharmony_ci struct hmdfs_writeback *hwb = sbi->h_wb; 42862306a36Sopenharmony_ci int *bdp_ratelimits = NULL; 42962306a36Sopenharmony_ci int ratelimit; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci if (!hwb->dirty_writeback_control) 43262306a36Sopenharmony_ci return; 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci /* Add delay work to trigger timeout writeback */ 43562306a36Sopenharmony_ci if (hwb->dirty_writeback_interval != 0) 43662306a36Sopenharmony_ci hmdfs_writeback_inodes_sb_delayed( 43762306a36Sopenharmony_ci mapping->host->i_sb, 43862306a36Sopenharmony_ci hwb->dirty_writeback_interval * 10); 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci ratelimit = current->nr_dirtied_pause; 44162306a36Sopenharmony_ci if (hwb->dirty_exceeded) 44262306a36Sopenharmony_ci ratelimit = min(ratelimit, HMDFS_DIRTY_EXCEED_RATELIMIT); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci /* 44562306a36Sopenharmony_ci * This prevents one CPU to accumulate too many dirtied pages 44662306a36Sopenharmony_ci * without calling into hmdfs_balance_dirty_pages(), which can 44762306a36Sopenharmony_ci * happen when there are 1000+ tasks, all of them start dirtying 44862306a36Sopenharmony_ci * pages at exactly the same time, hence all honoured too large 44962306a36Sopenharmony_ci * initial task->nr_dirtied_pause. 45062306a36Sopenharmony_ci */ 45162306a36Sopenharmony_ci preempt_disable(); 45262306a36Sopenharmony_ci bdp_ratelimits = this_cpu_ptr(hwb->bdp_ratelimits); 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci trace_hmdfs_balance_dirty_pages_ratelimited(sbi, hwb, *bdp_ratelimits); 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci if (unlikely(current->nr_dirtied >= ratelimit)) { 45762306a36Sopenharmony_ci *bdp_ratelimits = 0; 45862306a36Sopenharmony_ci } else if (unlikely(*bdp_ratelimits >= hwb->ratelimit_pages)) { 45962306a36Sopenharmony_ci *bdp_ratelimits = 0; 46062306a36Sopenharmony_ci ratelimit = 0; 46162306a36Sopenharmony_ci } 46262306a36Sopenharmony_ci preempt_enable(); 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci if (unlikely(current->nr_dirtied >= ratelimit)) 46562306a36Sopenharmony_ci hmdfs_balance_dirty_pages(mapping); 46662306a36Sopenharmony_ci} 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_civoid hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi) 46962306a36Sopenharmony_ci{ 47062306a36Sopenharmony_ci if (!sbi->h_wb) 47162306a36Sopenharmony_ci return; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci flush_delayed_work(&sbi->h_wb->dirty_sb_writeback_work); 47462306a36Sopenharmony_ci flush_delayed_work(&sbi->h_wb->dirty_inode_writeback_work); 47562306a36Sopenharmony_ci destroy_workqueue(sbi->h_wb->dirty_sb_writeback_wq); 47662306a36Sopenharmony_ci destroy_workqueue(sbi->h_wb->dirty_inode_writeback_wq); 47762306a36Sopenharmony_ci free_percpu(sbi->h_wb->bdp_ratelimits); 47862306a36Sopenharmony_ci kfree(sbi->h_wb); 47962306a36Sopenharmony_ci sbi->h_wb = NULL; 48062306a36Sopenharmony_ci} 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ciint hmdfs_init_writeback(struct hmdfs_sb_info *sbi) 48362306a36Sopenharmony_ci{ 48462306a36Sopenharmony_ci struct hmdfs_writeback *hwb; 48562306a36Sopenharmony_ci char name[HMDFS_WQ_NAME_LEN]; 48662306a36Sopenharmony_ci int ret = -ENOMEM; 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci hwb = kzalloc(sizeof(struct hmdfs_writeback), GFP_KERNEL); 48962306a36Sopenharmony_ci if (!hwb) 49062306a36Sopenharmony_ci return ret; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci hwb->sbi = sbi; 49362306a36Sopenharmony_ci hwb->wb = &sbi->sb->s_bdi->wb; 49462306a36Sopenharmony_ci hwb->dirty_writeback_control = true; 49562306a36Sopenharmony_ci hwb->dirty_writeback_interval = HM_DEFAULT_WRITEBACK_INTERVAL; 49662306a36Sopenharmony_ci hwb->dirty_file_bg_bytes = HMDFS_FILE_BG_WB_BYTES; 49762306a36Sopenharmony_ci hwb->dirty_fs_bg_bytes = HMDFS_FS_BG_WB_BYTES; 49862306a36Sopenharmony_ci hwb->dirty_file_bytes = HMDFS_FILE_WB_BYTES; 49962306a36Sopenharmony_ci hwb->dirty_fs_bytes = HMDFS_FS_WB_BYTES; 50062306a36Sopenharmony_ci hmdfs_calculate_dirty_thresh(hwb); 50162306a36Sopenharmony_ci hwb->bw_file_thresh = hwb->dirty_file_thresh; 50262306a36Sopenharmony_ci hwb->bw_fs_thresh = hwb->dirty_fs_thresh; 50362306a36Sopenharmony_ci spin_lock_init(&hwb->inode_list_lock); 50462306a36Sopenharmony_ci INIT_LIST_HEAD(&hwb->inode_list_head); 50562306a36Sopenharmony_ci hwb->dirty_exceeded = false; 50662306a36Sopenharmony_ci hwb->ratelimit_pages = HMDFS_DEF_RATELIMIT_PAGES; 50762306a36Sopenharmony_ci hwb->dirty_auto_threshold = true; 50862306a36Sopenharmony_ci hwb->writeback_timelimit = HMDFS_DEF_WB_TIMELIMIT; 50962306a36Sopenharmony_ci hwb->bw_thresh_lowerlimit = HMDFS_BW_THRESH_DEF_LIMIT; 51062306a36Sopenharmony_ci spin_lock_init(&hwb->write_bandwidth_lock); 51162306a36Sopenharmony_ci hwb->avg_write_bandwidth = 0; 51262306a36Sopenharmony_ci hwb->max_write_bandwidth = 0; 51362306a36Sopenharmony_ci hwb->min_write_bandwidth = ULONG_MAX; 51462306a36Sopenharmony_ci hwb->bdp_ratelimits = alloc_percpu(int); 51562306a36Sopenharmony_ci if (!hwb->bdp_ratelimits) 51662306a36Sopenharmony_ci goto free_hwb; 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci snprintf(name, sizeof(name), "dfs_ino_wb%u", sbi->seq); 51962306a36Sopenharmony_ci hwb->dirty_inode_writeback_wq = create_singlethread_workqueue(name); 52062306a36Sopenharmony_ci if (!hwb->dirty_inode_writeback_wq) { 52162306a36Sopenharmony_ci hmdfs_err("Failed to create inode writeback workqueue!"); 52262306a36Sopenharmony_ci goto free_bdp; 52362306a36Sopenharmony_ci } 52462306a36Sopenharmony_ci snprintf(name, sizeof(name), "dfs_sb_wb%u", sbi->seq); 52562306a36Sopenharmony_ci hwb->dirty_sb_writeback_wq = create_singlethread_workqueue(name); 52662306a36Sopenharmony_ci if (!hwb->dirty_sb_writeback_wq) { 52762306a36Sopenharmony_ci hmdfs_err("Failed to create filesystem writeback workqueue!"); 52862306a36Sopenharmony_ci goto free_i_wq; 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci INIT_DELAYED_WORK(&hwb->dirty_sb_writeback_work, 53162306a36Sopenharmony_ci hmdfs_writeback_inodes_sb_handler); 53262306a36Sopenharmony_ci INIT_DELAYED_WORK(&hwb->dirty_inode_writeback_work, 53362306a36Sopenharmony_ci hmdfs_writeback_inode_handler); 53462306a36Sopenharmony_ci sbi->h_wb = hwb; 53562306a36Sopenharmony_ci return 0; 53662306a36Sopenharmony_cifree_i_wq: 53762306a36Sopenharmony_ci destroy_workqueue(hwb->dirty_inode_writeback_wq); 53862306a36Sopenharmony_cifree_bdp: 53962306a36Sopenharmony_ci free_percpu(hwb->bdp_ratelimits); 54062306a36Sopenharmony_cifree_hwb: 54162306a36Sopenharmony_ci kfree(hwb); 54262306a36Sopenharmony_ci return ret; 54362306a36Sopenharmony_ci} 544