162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * mm/page-writeback.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2002, Linus Torvalds.
662306a36Sopenharmony_ci * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Contains functions related to writing back dirty pages at the
962306a36Sopenharmony_ci * address_space level.
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * 10Apr2002	Andrew Morton
1262306a36Sopenharmony_ci *		Initial version
1362306a36Sopenharmony_ci */
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#include <linux/kernel.h>
1662306a36Sopenharmony_ci#include <linux/math64.h>
1762306a36Sopenharmony_ci#include <linux/export.h>
1862306a36Sopenharmony_ci#include <linux/spinlock.h>
1962306a36Sopenharmony_ci#include <linux/fs.h>
2062306a36Sopenharmony_ci#include <linux/mm.h>
2162306a36Sopenharmony_ci#include <linux/swap.h>
2262306a36Sopenharmony_ci#include <linux/slab.h>
2362306a36Sopenharmony_ci#include <linux/pagemap.h>
2462306a36Sopenharmony_ci#include <linux/writeback.h>
2562306a36Sopenharmony_ci#include <linux/init.h>
2662306a36Sopenharmony_ci#include <linux/backing-dev.h>
2762306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
2862306a36Sopenharmony_ci#include <linux/blkdev.h>
2962306a36Sopenharmony_ci#include <linux/mpage.h>
3062306a36Sopenharmony_ci#include <linux/rmap.h>
3162306a36Sopenharmony_ci#include <linux/percpu.h>
3262306a36Sopenharmony_ci#include <linux/smp.h>
3362306a36Sopenharmony_ci#include <linux/sysctl.h>
3462306a36Sopenharmony_ci#include <linux/cpu.h>
3562306a36Sopenharmony_ci#include <linux/syscalls.h>
3662306a36Sopenharmony_ci#include <linux/pagevec.h>
3762306a36Sopenharmony_ci#include <linux/timer.h>
3862306a36Sopenharmony_ci#include <linux/sched/rt.h>
3962306a36Sopenharmony_ci#include <linux/sched/signal.h>
4062306a36Sopenharmony_ci#include <linux/mm_inline.h>
4162306a36Sopenharmony_ci#include <trace/events/writeback.h>
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci#include "internal.h"
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci/*
4662306a36Sopenharmony_ci * Sleep at most 200ms at a time in balance_dirty_pages().
4762306a36Sopenharmony_ci */
4862306a36Sopenharmony_ci#define MAX_PAUSE		max(HZ/5, 1)
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci/*
5162306a36Sopenharmony_ci * Try to keep balance_dirty_pages() call intervals higher than this many pages
5262306a36Sopenharmony_ci * by raising pause time to max_pause when falls below it.
5362306a36Sopenharmony_ci */
5462306a36Sopenharmony_ci#define DIRTY_POLL_THRESH	(128 >> (PAGE_SHIFT - 10))
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci/*
5762306a36Sopenharmony_ci * Estimate write bandwidth at 200ms intervals.
5862306a36Sopenharmony_ci */
5962306a36Sopenharmony_ci#define BANDWIDTH_INTERVAL	max(HZ/5, 1)
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci#define RATELIMIT_CALC_SHIFT	10
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci/*
6462306a36Sopenharmony_ci * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
6562306a36Sopenharmony_ci * will look to see if it needs to force writeback or throttling.
6662306a36Sopenharmony_ci */
6762306a36Sopenharmony_cistatic long ratelimit_pages = 32;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci/* The following parameters are exported via /proc/sys/vm */
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci/*
7262306a36Sopenharmony_ci * Start background writeback (via writeback threads) at this percentage
7362306a36Sopenharmony_ci */
7462306a36Sopenharmony_cistatic int dirty_background_ratio = 10;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci/*
7762306a36Sopenharmony_ci * dirty_background_bytes starts at 0 (disabled) so that it is a function of
7862306a36Sopenharmony_ci * dirty_background_ratio * the amount of dirtyable memory
7962306a36Sopenharmony_ci */
8062306a36Sopenharmony_cistatic unsigned long dirty_background_bytes;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci/*
8362306a36Sopenharmony_ci * free highmem will not be subtracted from the total free memory
8462306a36Sopenharmony_ci * for calculating free ratios if vm_highmem_is_dirtyable is true
8562306a36Sopenharmony_ci */
8662306a36Sopenharmony_cistatic int vm_highmem_is_dirtyable;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci/*
8962306a36Sopenharmony_ci * The generator of dirty data starts writeback at this percentage
9062306a36Sopenharmony_ci */
9162306a36Sopenharmony_cistatic int vm_dirty_ratio = 20;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci/*
9462306a36Sopenharmony_ci * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
9562306a36Sopenharmony_ci * vm_dirty_ratio * the amount of dirtyable memory
9662306a36Sopenharmony_ci */
9762306a36Sopenharmony_cistatic unsigned long vm_dirty_bytes;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci/*
10062306a36Sopenharmony_ci * The interval between `kupdate'-style writebacks
10162306a36Sopenharmony_ci */
10262306a36Sopenharmony_ciunsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dirty_writeback_interval);
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci/*
10762306a36Sopenharmony_ci * The longest time for which data is allowed to remain dirty
10862306a36Sopenharmony_ci */
10962306a36Sopenharmony_ciunsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci/*
11262306a36Sopenharmony_ci * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
11362306a36Sopenharmony_ci * a full sync is triggered after this time elapses without any disk activity.
11462306a36Sopenharmony_ci */
11562306a36Sopenharmony_ciint laptop_mode;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ciEXPORT_SYMBOL(laptop_mode);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci/* End of sysctl-exported parameters */
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_cistruct wb_domain global_wb_domain;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci/* consolidated parameters for balance_dirty_pages() and its subroutines */
12462306a36Sopenharmony_cistruct dirty_throttle_control {
12562306a36Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK
12662306a36Sopenharmony_ci	struct wb_domain	*dom;
12762306a36Sopenharmony_ci	struct dirty_throttle_control *gdtc;	/* only set in memcg dtc's */
12862306a36Sopenharmony_ci#endif
12962306a36Sopenharmony_ci	struct bdi_writeback	*wb;
13062306a36Sopenharmony_ci	struct fprop_local_percpu *wb_completions;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	unsigned long		avail;		/* dirtyable */
13362306a36Sopenharmony_ci	unsigned long		dirty;		/* file_dirty + write + nfs */
13462306a36Sopenharmony_ci	unsigned long		thresh;		/* dirty threshold */
13562306a36Sopenharmony_ci	unsigned long		bg_thresh;	/* dirty background threshold */
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	unsigned long		wb_dirty;	/* per-wb counterparts */
13862306a36Sopenharmony_ci	unsigned long		wb_thresh;
13962306a36Sopenharmony_ci	unsigned long		wb_bg_thresh;
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	unsigned long		pos_ratio;
14262306a36Sopenharmony_ci};
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci/*
14562306a36Sopenharmony_ci * Length of period for aging writeout fractions of bdis. This is an
14662306a36Sopenharmony_ci * arbitrarily chosen number. The longer the period, the slower fractions will
14762306a36Sopenharmony_ci * reflect changes in current writeout rate.
14862306a36Sopenharmony_ci */
14962306a36Sopenharmony_ci#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci#define GDTC_INIT(__wb)		.wb = (__wb),				\
15462306a36Sopenharmony_ci				.dom = &global_wb_domain,		\
15562306a36Sopenharmony_ci				.wb_completions = &(__wb)->completions
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci#define GDTC_INIT_NO_WB		.dom = &global_wb_domain
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci#define MDTC_INIT(__wb, __gdtc)	.wb = (__wb),				\
16062306a36Sopenharmony_ci				.dom = mem_cgroup_wb_domain(__wb),	\
16162306a36Sopenharmony_ci				.wb_completions = &(__wb)->memcg_completions, \
16262306a36Sopenharmony_ci				.gdtc = __gdtc
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_cistatic bool mdtc_valid(struct dirty_throttle_control *dtc)
16562306a36Sopenharmony_ci{
16662306a36Sopenharmony_ci	return dtc->dom;
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_cistatic struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	return dtc->dom;
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_cistatic struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
17562306a36Sopenharmony_ci{
17662306a36Sopenharmony_ci	return mdtc->gdtc;
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_cistatic struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
18062306a36Sopenharmony_ci{
18162306a36Sopenharmony_ci	return &wb->memcg_completions;
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cistatic void wb_min_max_ratio(struct bdi_writeback *wb,
18562306a36Sopenharmony_ci			     unsigned long *minp, unsigned long *maxp)
18662306a36Sopenharmony_ci{
18762306a36Sopenharmony_ci	unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth);
18862306a36Sopenharmony_ci	unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
18962306a36Sopenharmony_ci	unsigned long long min = wb->bdi->min_ratio;
19062306a36Sopenharmony_ci	unsigned long long max = wb->bdi->max_ratio;
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	/*
19362306a36Sopenharmony_ci	 * @wb may already be clean by the time control reaches here and
19462306a36Sopenharmony_ci	 * the total may not include its bw.
19562306a36Sopenharmony_ci	 */
19662306a36Sopenharmony_ci	if (this_bw < tot_bw) {
19762306a36Sopenharmony_ci		if (min) {
19862306a36Sopenharmony_ci			min *= this_bw;
19962306a36Sopenharmony_ci			min = div64_ul(min, tot_bw);
20062306a36Sopenharmony_ci		}
20162306a36Sopenharmony_ci		if (max < 100 * BDI_RATIO_SCALE) {
20262306a36Sopenharmony_ci			max *= this_bw;
20362306a36Sopenharmony_ci			max = div64_ul(max, tot_bw);
20462306a36Sopenharmony_ci		}
20562306a36Sopenharmony_ci	}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	*minp = min;
20862306a36Sopenharmony_ci	*maxp = max;
20962306a36Sopenharmony_ci}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci#else	/* CONFIG_CGROUP_WRITEBACK */
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci#define GDTC_INIT(__wb)		.wb = (__wb),                           \
21462306a36Sopenharmony_ci				.wb_completions = &(__wb)->completions
21562306a36Sopenharmony_ci#define GDTC_INIT_NO_WB
21662306a36Sopenharmony_ci#define MDTC_INIT(__wb, __gdtc)
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_cistatic bool mdtc_valid(struct dirty_throttle_control *dtc)
21962306a36Sopenharmony_ci{
22062306a36Sopenharmony_ci	return false;
22162306a36Sopenharmony_ci}
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_cistatic struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
22462306a36Sopenharmony_ci{
22562306a36Sopenharmony_ci	return &global_wb_domain;
22662306a36Sopenharmony_ci}
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_cistatic struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
22962306a36Sopenharmony_ci{
23062306a36Sopenharmony_ci	return NULL;
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	return NULL;
23662306a36Sopenharmony_ci}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_cistatic void wb_min_max_ratio(struct bdi_writeback *wb,
23962306a36Sopenharmony_ci			     unsigned long *minp, unsigned long *maxp)
24062306a36Sopenharmony_ci{
24162306a36Sopenharmony_ci	*minp = wb->bdi->min_ratio;
24262306a36Sopenharmony_ci	*maxp = wb->bdi->max_ratio;
24362306a36Sopenharmony_ci}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci#endif	/* CONFIG_CGROUP_WRITEBACK */
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci/*
24862306a36Sopenharmony_ci * In a memory zone, there is a certain amount of pages we consider
24962306a36Sopenharmony_ci * available for the page cache, which is essentially the number of
25062306a36Sopenharmony_ci * free and reclaimable pages, minus some zone reserves to protect
25162306a36Sopenharmony_ci * lowmem and the ability to uphold the zone's watermarks without
25262306a36Sopenharmony_ci * requiring writeback.
25362306a36Sopenharmony_ci *
25462306a36Sopenharmony_ci * This number of dirtyable pages is the base value of which the
25562306a36Sopenharmony_ci * user-configurable dirty ratio is the effective number of pages that
25662306a36Sopenharmony_ci * are allowed to be actually dirtied.  Per individual zone, or
25762306a36Sopenharmony_ci * globally by using the sum of dirtyable pages over all zones.
25862306a36Sopenharmony_ci *
25962306a36Sopenharmony_ci * Because the user is allowed to specify the dirty limit globally as
26062306a36Sopenharmony_ci * absolute number of bytes, calculating the per-zone dirty limit can
26162306a36Sopenharmony_ci * require translating the configured limit into a percentage of
26262306a36Sopenharmony_ci * global dirtyable memory first.
26362306a36Sopenharmony_ci */
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci/**
26662306a36Sopenharmony_ci * node_dirtyable_memory - number of dirtyable pages in a node
26762306a36Sopenharmony_ci * @pgdat: the node
26862306a36Sopenharmony_ci *
26962306a36Sopenharmony_ci * Return: the node's number of pages potentially available for dirty
27062306a36Sopenharmony_ci * page cache.  This is the base value for the per-node dirty limits.
27162306a36Sopenharmony_ci */
27262306a36Sopenharmony_cistatic unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
27362306a36Sopenharmony_ci{
27462306a36Sopenharmony_ci	unsigned long nr_pages = 0;
27562306a36Sopenharmony_ci	int z;
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	for (z = 0; z < MAX_NR_ZONES; z++) {
27862306a36Sopenharmony_ci		struct zone *zone = pgdat->node_zones + z;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci		if (!populated_zone(zone))
28162306a36Sopenharmony_ci			continue;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci		nr_pages += zone_page_state(zone, NR_FREE_PAGES);
28462306a36Sopenharmony_ci	}
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	/*
28762306a36Sopenharmony_ci	 * Pages reserved for the kernel should not be considered
28862306a36Sopenharmony_ci	 * dirtyable, to prevent a situation where reclaim has to
28962306a36Sopenharmony_ci	 * clean pages in order to balance the zones.
29062306a36Sopenharmony_ci	 */
29162306a36Sopenharmony_ci	nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
29462306a36Sopenharmony_ci	nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	return nr_pages;
29762306a36Sopenharmony_ci}
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_cistatic unsigned long highmem_dirtyable_memory(unsigned long total)
30062306a36Sopenharmony_ci{
30162306a36Sopenharmony_ci#ifdef CONFIG_HIGHMEM
30262306a36Sopenharmony_ci	int node;
30362306a36Sopenharmony_ci	unsigned long x = 0;
30462306a36Sopenharmony_ci	int i;
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	for_each_node_state(node, N_HIGH_MEMORY) {
30762306a36Sopenharmony_ci		for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
30862306a36Sopenharmony_ci			struct zone *z;
30962306a36Sopenharmony_ci			unsigned long nr_pages;
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci			if (!is_highmem_idx(i))
31262306a36Sopenharmony_ci				continue;
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci			z = &NODE_DATA(node)->node_zones[i];
31562306a36Sopenharmony_ci			if (!populated_zone(z))
31662306a36Sopenharmony_ci				continue;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci			nr_pages = zone_page_state(z, NR_FREE_PAGES);
31962306a36Sopenharmony_ci			/* watch for underflows */
32062306a36Sopenharmony_ci			nr_pages -= min(nr_pages, high_wmark_pages(z));
32162306a36Sopenharmony_ci			nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
32262306a36Sopenharmony_ci			nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
32362306a36Sopenharmony_ci			x += nr_pages;
32462306a36Sopenharmony_ci		}
32562306a36Sopenharmony_ci	}
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	/*
32862306a36Sopenharmony_ci	 * Make sure that the number of highmem pages is never larger
32962306a36Sopenharmony_ci	 * than the number of the total dirtyable memory. This can only
33062306a36Sopenharmony_ci	 * occur in very strange VM situations but we want to make sure
33162306a36Sopenharmony_ci	 * that this does not occur.
33262306a36Sopenharmony_ci	 */
33362306a36Sopenharmony_ci	return min(x, total);
33462306a36Sopenharmony_ci#else
33562306a36Sopenharmony_ci	return 0;
33662306a36Sopenharmony_ci#endif
33762306a36Sopenharmony_ci}
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci/**
34062306a36Sopenharmony_ci * global_dirtyable_memory - number of globally dirtyable pages
34162306a36Sopenharmony_ci *
34262306a36Sopenharmony_ci * Return: the global number of pages potentially available for dirty
34362306a36Sopenharmony_ci * page cache.  This is the base value for the global dirty limits.
34462306a36Sopenharmony_ci */
34562306a36Sopenharmony_cistatic unsigned long global_dirtyable_memory(void)
34662306a36Sopenharmony_ci{
34762306a36Sopenharmony_ci	unsigned long x;
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci	x = global_zone_page_state(NR_FREE_PAGES);
35062306a36Sopenharmony_ci	/*
35162306a36Sopenharmony_ci	 * Pages reserved for the kernel should not be considered
35262306a36Sopenharmony_ci	 * dirtyable, to prevent a situation where reclaim has to
35362306a36Sopenharmony_ci	 * clean pages in order to balance the zones.
35462306a36Sopenharmony_ci	 */
35562306a36Sopenharmony_ci	x -= min(x, totalreserve_pages);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	x += global_node_page_state(NR_INACTIVE_FILE);
35862306a36Sopenharmony_ci	x += global_node_page_state(NR_ACTIVE_FILE);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	if (!vm_highmem_is_dirtyable)
36162306a36Sopenharmony_ci		x -= highmem_dirtyable_memory(x);
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci	return x + 1;	/* Ensure that we never return 0 */
36462306a36Sopenharmony_ci}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci/**
36762306a36Sopenharmony_ci * domain_dirty_limits - calculate thresh and bg_thresh for a wb_domain
36862306a36Sopenharmony_ci * @dtc: dirty_throttle_control of interest
36962306a36Sopenharmony_ci *
37062306a36Sopenharmony_ci * Calculate @dtc->thresh and ->bg_thresh considering
37162306a36Sopenharmony_ci * vm_dirty_{bytes|ratio} and dirty_background_{bytes|ratio}.  The caller
37262306a36Sopenharmony_ci * must ensure that @dtc->avail is set before calling this function.  The
37362306a36Sopenharmony_ci * dirty limits will be lifted by 1/4 for real-time tasks.
37462306a36Sopenharmony_ci */
37562306a36Sopenharmony_cistatic void domain_dirty_limits(struct dirty_throttle_control *dtc)
37662306a36Sopenharmony_ci{
37762306a36Sopenharmony_ci	const unsigned long available_memory = dtc->avail;
37862306a36Sopenharmony_ci	struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
37962306a36Sopenharmony_ci	unsigned long bytes = vm_dirty_bytes;
38062306a36Sopenharmony_ci	unsigned long bg_bytes = dirty_background_bytes;
38162306a36Sopenharmony_ci	/* convert ratios to per-PAGE_SIZE for higher precision */
38262306a36Sopenharmony_ci	unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
38362306a36Sopenharmony_ci	unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
38462306a36Sopenharmony_ci	unsigned long thresh;
38562306a36Sopenharmony_ci	unsigned long bg_thresh;
38662306a36Sopenharmony_ci	struct task_struct *tsk;
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	/* gdtc is !NULL iff @dtc is for memcg domain */
38962306a36Sopenharmony_ci	if (gdtc) {
39062306a36Sopenharmony_ci		unsigned long global_avail = gdtc->avail;
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci		/*
39362306a36Sopenharmony_ci		 * The byte settings can't be applied directly to memcg
39462306a36Sopenharmony_ci		 * domains.  Convert them to ratios by scaling against
39562306a36Sopenharmony_ci		 * globally available memory.  As the ratios are in
39662306a36Sopenharmony_ci		 * per-PAGE_SIZE, they can be obtained by dividing bytes by
39762306a36Sopenharmony_ci		 * number of pages.
39862306a36Sopenharmony_ci		 */
39962306a36Sopenharmony_ci		if (bytes)
40062306a36Sopenharmony_ci			ratio = min(DIV_ROUND_UP(bytes, global_avail),
40162306a36Sopenharmony_ci				    PAGE_SIZE);
40262306a36Sopenharmony_ci		if (bg_bytes)
40362306a36Sopenharmony_ci			bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
40462306a36Sopenharmony_ci				       PAGE_SIZE);
40562306a36Sopenharmony_ci		bytes = bg_bytes = 0;
40662306a36Sopenharmony_ci	}
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	if (bytes)
40962306a36Sopenharmony_ci		thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
41062306a36Sopenharmony_ci	else
41162306a36Sopenharmony_ci		thresh = (ratio * available_memory) / PAGE_SIZE;
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci	if (bg_bytes)
41462306a36Sopenharmony_ci		bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
41562306a36Sopenharmony_ci	else
41662306a36Sopenharmony_ci		bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	if (bg_thresh >= thresh)
41962306a36Sopenharmony_ci		bg_thresh = thresh / 2;
42062306a36Sopenharmony_ci	tsk = current;
42162306a36Sopenharmony_ci	if (rt_task(tsk)) {
42262306a36Sopenharmony_ci		bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
42362306a36Sopenharmony_ci		thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
42462306a36Sopenharmony_ci	}
42562306a36Sopenharmony_ci	dtc->thresh = thresh;
42662306a36Sopenharmony_ci	dtc->bg_thresh = bg_thresh;
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	/* we should eventually report the domain in the TP */
42962306a36Sopenharmony_ci	if (!gdtc)
43062306a36Sopenharmony_ci		trace_global_dirty_state(bg_thresh, thresh);
43162306a36Sopenharmony_ci}
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci/**
43462306a36Sopenharmony_ci * global_dirty_limits - background-writeback and dirty-throttling thresholds
43562306a36Sopenharmony_ci * @pbackground: out parameter for bg_thresh
43662306a36Sopenharmony_ci * @pdirty: out parameter for thresh
43762306a36Sopenharmony_ci *
43862306a36Sopenharmony_ci * Calculate bg_thresh and thresh for global_wb_domain.  See
43962306a36Sopenharmony_ci * domain_dirty_limits() for details.
44062306a36Sopenharmony_ci */
44162306a36Sopenharmony_civoid global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
44262306a36Sopenharmony_ci{
44362306a36Sopenharmony_ci	struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	gdtc.avail = global_dirtyable_memory();
44662306a36Sopenharmony_ci	domain_dirty_limits(&gdtc);
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	*pbackground = gdtc.bg_thresh;
44962306a36Sopenharmony_ci	*pdirty = gdtc.thresh;
45062306a36Sopenharmony_ci}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci/**
45362306a36Sopenharmony_ci * node_dirty_limit - maximum number of dirty pages allowed in a node
45462306a36Sopenharmony_ci * @pgdat: the node
45562306a36Sopenharmony_ci *
45662306a36Sopenharmony_ci * Return: the maximum number of dirty pages allowed in a node, based
45762306a36Sopenharmony_ci * on the node's dirtyable memory.
45862306a36Sopenharmony_ci */
45962306a36Sopenharmony_cistatic unsigned long node_dirty_limit(struct pglist_data *pgdat)
46062306a36Sopenharmony_ci{
46162306a36Sopenharmony_ci	unsigned long node_memory = node_dirtyable_memory(pgdat);
46262306a36Sopenharmony_ci	struct task_struct *tsk = current;
46362306a36Sopenharmony_ci	unsigned long dirty;
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	if (vm_dirty_bytes)
46662306a36Sopenharmony_ci		dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
46762306a36Sopenharmony_ci			node_memory / global_dirtyable_memory();
46862306a36Sopenharmony_ci	else
46962306a36Sopenharmony_ci		dirty = vm_dirty_ratio * node_memory / 100;
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	if (rt_task(tsk))
47262306a36Sopenharmony_ci		dirty += dirty / 4;
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	return dirty;
47562306a36Sopenharmony_ci}
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci/**
47862306a36Sopenharmony_ci * node_dirty_ok - tells whether a node is within its dirty limits
47962306a36Sopenharmony_ci * @pgdat: the node to check
48062306a36Sopenharmony_ci *
48162306a36Sopenharmony_ci * Return: %true when the dirty pages in @pgdat are within the node's
48262306a36Sopenharmony_ci * dirty limit, %false if the limit is exceeded.
48362306a36Sopenharmony_ci */
48462306a36Sopenharmony_cibool node_dirty_ok(struct pglist_data *pgdat)
48562306a36Sopenharmony_ci{
48662306a36Sopenharmony_ci	unsigned long limit = node_dirty_limit(pgdat);
48762306a36Sopenharmony_ci	unsigned long nr_pages = 0;
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
49062306a36Sopenharmony_ci	nr_pages += node_page_state(pgdat, NR_WRITEBACK);
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	return nr_pages <= limit;
49362306a36Sopenharmony_ci}
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL
49662306a36Sopenharmony_cistatic int dirty_background_ratio_handler(struct ctl_table *table, int write,
49762306a36Sopenharmony_ci		void *buffer, size_t *lenp, loff_t *ppos)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci	int ret;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
50262306a36Sopenharmony_ci	if (ret == 0 && write)
50362306a36Sopenharmony_ci		dirty_background_bytes = 0;
50462306a36Sopenharmony_ci	return ret;
50562306a36Sopenharmony_ci}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_cistatic int dirty_background_bytes_handler(struct ctl_table *table, int write,
50862306a36Sopenharmony_ci		void *buffer, size_t *lenp, loff_t *ppos)
50962306a36Sopenharmony_ci{
51062306a36Sopenharmony_ci	int ret;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
51362306a36Sopenharmony_ci	if (ret == 0 && write)
51462306a36Sopenharmony_ci		dirty_background_ratio = 0;
51562306a36Sopenharmony_ci	return ret;
51662306a36Sopenharmony_ci}
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_cistatic int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
51962306a36Sopenharmony_ci		size_t *lenp, loff_t *ppos)
52062306a36Sopenharmony_ci{
52162306a36Sopenharmony_ci	int old_ratio = vm_dirty_ratio;
52262306a36Sopenharmony_ci	int ret;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
52562306a36Sopenharmony_ci	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
52662306a36Sopenharmony_ci		writeback_set_ratelimit();
52762306a36Sopenharmony_ci		vm_dirty_bytes = 0;
52862306a36Sopenharmony_ci	}
52962306a36Sopenharmony_ci	return ret;
53062306a36Sopenharmony_ci}
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_cistatic int dirty_bytes_handler(struct ctl_table *table, int write,
53362306a36Sopenharmony_ci		void *buffer, size_t *lenp, loff_t *ppos)
53462306a36Sopenharmony_ci{
53562306a36Sopenharmony_ci	unsigned long old_bytes = vm_dirty_bytes;
53662306a36Sopenharmony_ci	int ret;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
53962306a36Sopenharmony_ci	if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
54062306a36Sopenharmony_ci		writeback_set_ratelimit();
54162306a36Sopenharmony_ci		vm_dirty_ratio = 0;
54262306a36Sopenharmony_ci	}
54362306a36Sopenharmony_ci	return ret;
54462306a36Sopenharmony_ci}
54562306a36Sopenharmony_ci#endif
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_cistatic unsigned long wp_next_time(unsigned long cur_time)
54862306a36Sopenharmony_ci{
54962306a36Sopenharmony_ci	cur_time += VM_COMPLETIONS_PERIOD_LEN;
55062306a36Sopenharmony_ci	/* 0 has a special meaning... */
55162306a36Sopenharmony_ci	if (!cur_time)
55262306a36Sopenharmony_ci		return 1;
55362306a36Sopenharmony_ci	return cur_time;
55462306a36Sopenharmony_ci}
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_cistatic void wb_domain_writeout_add(struct wb_domain *dom,
55762306a36Sopenharmony_ci				   struct fprop_local_percpu *completions,
55862306a36Sopenharmony_ci				   unsigned int max_prop_frac, long nr)
55962306a36Sopenharmony_ci{
56062306a36Sopenharmony_ci	__fprop_add_percpu_max(&dom->completions, completions,
56162306a36Sopenharmony_ci			       max_prop_frac, nr);
56262306a36Sopenharmony_ci	/* First event after period switching was turned off? */
56362306a36Sopenharmony_ci	if (unlikely(!dom->period_time)) {
56462306a36Sopenharmony_ci		/*
56562306a36Sopenharmony_ci		 * We can race with other __bdi_writeout_inc calls here but
56662306a36Sopenharmony_ci		 * it does not cause any harm since the resulting time when
56762306a36Sopenharmony_ci		 * timer will fire and what is in writeout_period_time will be
56862306a36Sopenharmony_ci		 * roughly the same.
56962306a36Sopenharmony_ci		 */
57062306a36Sopenharmony_ci		dom->period_time = wp_next_time(jiffies);
57162306a36Sopenharmony_ci		mod_timer(&dom->period_timer, dom->period_time);
57262306a36Sopenharmony_ci	}
57362306a36Sopenharmony_ci}
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci/*
57662306a36Sopenharmony_ci * Increment @wb's writeout completion count and the global writeout
57762306a36Sopenharmony_ci * completion count. Called from __folio_end_writeback().
57862306a36Sopenharmony_ci */
57962306a36Sopenharmony_cistatic inline void __wb_writeout_add(struct bdi_writeback *wb, long nr)
58062306a36Sopenharmony_ci{
58162306a36Sopenharmony_ci	struct wb_domain *cgdom;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	wb_stat_mod(wb, WB_WRITTEN, nr);
58462306a36Sopenharmony_ci	wb_domain_writeout_add(&global_wb_domain, &wb->completions,
58562306a36Sopenharmony_ci			       wb->bdi->max_prop_frac, nr);
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	cgdom = mem_cgroup_wb_domain(wb);
58862306a36Sopenharmony_ci	if (cgdom)
58962306a36Sopenharmony_ci		wb_domain_writeout_add(cgdom, wb_memcg_completions(wb),
59062306a36Sopenharmony_ci				       wb->bdi->max_prop_frac, nr);
59162306a36Sopenharmony_ci}
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_civoid wb_writeout_inc(struct bdi_writeback *wb)
59462306a36Sopenharmony_ci{
59562306a36Sopenharmony_ci	unsigned long flags;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	local_irq_save(flags);
59862306a36Sopenharmony_ci	__wb_writeout_add(wb, 1);
59962306a36Sopenharmony_ci	local_irq_restore(flags);
60062306a36Sopenharmony_ci}
60162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(wb_writeout_inc);
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci/*
60462306a36Sopenharmony_ci * On idle system, we can be called long after we scheduled because we use
60562306a36Sopenharmony_ci * deferred timers so count with missed periods.
60662306a36Sopenharmony_ci */
60762306a36Sopenharmony_cistatic void writeout_period(struct timer_list *t)
60862306a36Sopenharmony_ci{
60962306a36Sopenharmony_ci	struct wb_domain *dom = from_timer(dom, t, period_timer);
61062306a36Sopenharmony_ci	int miss_periods = (jiffies - dom->period_time) /
61162306a36Sopenharmony_ci						 VM_COMPLETIONS_PERIOD_LEN;
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	if (fprop_new_period(&dom->completions, miss_periods + 1)) {
61462306a36Sopenharmony_ci		dom->period_time = wp_next_time(dom->period_time +
61562306a36Sopenharmony_ci				miss_periods * VM_COMPLETIONS_PERIOD_LEN);
61662306a36Sopenharmony_ci		mod_timer(&dom->period_timer, dom->period_time);
61762306a36Sopenharmony_ci	} else {
61862306a36Sopenharmony_ci		/*
61962306a36Sopenharmony_ci		 * Aging has zeroed all fractions. Stop wasting CPU on period
62062306a36Sopenharmony_ci		 * updates.
62162306a36Sopenharmony_ci		 */
62262306a36Sopenharmony_ci		dom->period_time = 0;
62362306a36Sopenharmony_ci	}
62462306a36Sopenharmony_ci}
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ciint wb_domain_init(struct wb_domain *dom, gfp_t gfp)
62762306a36Sopenharmony_ci{
62862306a36Sopenharmony_ci	memset(dom, 0, sizeof(*dom));
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci	spin_lock_init(&dom->lock);
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ci	dom->dirty_limit_tstamp = jiffies;
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	return fprop_global_init(&dom->completions, gfp);
63762306a36Sopenharmony_ci}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK
64062306a36Sopenharmony_civoid wb_domain_exit(struct wb_domain *dom)
64162306a36Sopenharmony_ci{
64262306a36Sopenharmony_ci	del_timer_sync(&dom->period_timer);
64362306a36Sopenharmony_ci	fprop_global_destroy(&dom->completions);
64462306a36Sopenharmony_ci}
64562306a36Sopenharmony_ci#endif
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci/*
64862306a36Sopenharmony_ci * bdi_min_ratio keeps the sum of the minimum dirty shares of all
64962306a36Sopenharmony_ci * registered backing devices, which, for obvious reasons, can not
65062306a36Sopenharmony_ci * exceed 100%.
65162306a36Sopenharmony_ci */
65262306a36Sopenharmony_cistatic unsigned int bdi_min_ratio;
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_cistatic int bdi_check_pages_limit(unsigned long pages)
65562306a36Sopenharmony_ci{
65662306a36Sopenharmony_ci	unsigned long max_dirty_pages = global_dirtyable_memory();
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci	if (pages > max_dirty_pages)
65962306a36Sopenharmony_ci		return -EINVAL;
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci	return 0;
66262306a36Sopenharmony_ci}
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_cistatic unsigned long bdi_ratio_from_pages(unsigned long pages)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	unsigned long background_thresh;
66762306a36Sopenharmony_ci	unsigned long dirty_thresh;
66862306a36Sopenharmony_ci	unsigned long ratio;
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	global_dirty_limits(&background_thresh, &dirty_thresh);
67162306a36Sopenharmony_ci	ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh);
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	return ratio;
67462306a36Sopenharmony_ci}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_cistatic u64 bdi_get_bytes(unsigned int ratio)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	unsigned long background_thresh;
67962306a36Sopenharmony_ci	unsigned long dirty_thresh;
68062306a36Sopenharmony_ci	u64 bytes;
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci	global_dirty_limits(&background_thresh, &dirty_thresh);
68362306a36Sopenharmony_ci	bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100;
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	return bytes;
68662306a36Sopenharmony_ci}
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_cistatic int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
68962306a36Sopenharmony_ci{
69062306a36Sopenharmony_ci	unsigned int delta;
69162306a36Sopenharmony_ci	int ret = 0;
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci	if (min_ratio > 100 * BDI_RATIO_SCALE)
69462306a36Sopenharmony_ci		return -EINVAL;
69562306a36Sopenharmony_ci	min_ratio *= BDI_RATIO_SCALE;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	spin_lock_bh(&bdi_lock);
69862306a36Sopenharmony_ci	if (min_ratio > bdi->max_ratio) {
69962306a36Sopenharmony_ci		ret = -EINVAL;
70062306a36Sopenharmony_ci	} else {
70162306a36Sopenharmony_ci		if (min_ratio < bdi->min_ratio) {
70262306a36Sopenharmony_ci			delta = bdi->min_ratio - min_ratio;
70362306a36Sopenharmony_ci			bdi_min_ratio -= delta;
70462306a36Sopenharmony_ci			bdi->min_ratio = min_ratio;
70562306a36Sopenharmony_ci		} else {
70662306a36Sopenharmony_ci			delta = min_ratio - bdi->min_ratio;
70762306a36Sopenharmony_ci			if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) {
70862306a36Sopenharmony_ci				bdi_min_ratio += delta;
70962306a36Sopenharmony_ci				bdi->min_ratio = min_ratio;
71062306a36Sopenharmony_ci			} else {
71162306a36Sopenharmony_ci				ret = -EINVAL;
71262306a36Sopenharmony_ci			}
71362306a36Sopenharmony_ci		}
71462306a36Sopenharmony_ci	}
71562306a36Sopenharmony_ci	spin_unlock_bh(&bdi_lock);
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	return ret;
71862306a36Sopenharmony_ci}
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_cistatic int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
72162306a36Sopenharmony_ci{
72262306a36Sopenharmony_ci	int ret = 0;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	if (max_ratio > 100 * BDI_RATIO_SCALE)
72562306a36Sopenharmony_ci		return -EINVAL;
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci	spin_lock_bh(&bdi_lock);
72862306a36Sopenharmony_ci	if (bdi->min_ratio > max_ratio) {
72962306a36Sopenharmony_ci		ret = -EINVAL;
73062306a36Sopenharmony_ci	} else {
73162306a36Sopenharmony_ci		bdi->max_ratio = max_ratio;
73262306a36Sopenharmony_ci		bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
73362306a36Sopenharmony_ci	}
73462306a36Sopenharmony_ci	spin_unlock_bh(&bdi_lock);
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	return ret;
73762306a36Sopenharmony_ci}
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ciint bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio)
74062306a36Sopenharmony_ci{
74162306a36Sopenharmony_ci	return __bdi_set_min_ratio(bdi, min_ratio);
74262306a36Sopenharmony_ci}
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ciint bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio)
74562306a36Sopenharmony_ci{
74662306a36Sopenharmony_ci	return __bdi_set_max_ratio(bdi, max_ratio);
74762306a36Sopenharmony_ci}
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ciint bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
75062306a36Sopenharmony_ci{
75162306a36Sopenharmony_ci	return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE);
75262306a36Sopenharmony_ci}
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ciint bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
75562306a36Sopenharmony_ci{
75662306a36Sopenharmony_ci	return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE);
75762306a36Sopenharmony_ci}
75862306a36Sopenharmony_ciEXPORT_SYMBOL(bdi_set_max_ratio);
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ciu64 bdi_get_min_bytes(struct backing_dev_info *bdi)
76162306a36Sopenharmony_ci{
76262306a36Sopenharmony_ci	return bdi_get_bytes(bdi->min_ratio);
76362306a36Sopenharmony_ci}
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ciint bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes)
76662306a36Sopenharmony_ci{
76762306a36Sopenharmony_ci	int ret;
76862306a36Sopenharmony_ci	unsigned long pages = min_bytes >> PAGE_SHIFT;
76962306a36Sopenharmony_ci	unsigned long min_ratio;
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci	ret = bdi_check_pages_limit(pages);
77262306a36Sopenharmony_ci	if (ret)
77362306a36Sopenharmony_ci		return ret;
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	min_ratio = bdi_ratio_from_pages(pages);
77662306a36Sopenharmony_ci	return __bdi_set_min_ratio(bdi, min_ratio);
77762306a36Sopenharmony_ci}
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ciu64 bdi_get_max_bytes(struct backing_dev_info *bdi)
78062306a36Sopenharmony_ci{
78162306a36Sopenharmony_ci	return bdi_get_bytes(bdi->max_ratio);
78262306a36Sopenharmony_ci}
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ciint bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes)
78562306a36Sopenharmony_ci{
78662306a36Sopenharmony_ci	int ret;
78762306a36Sopenharmony_ci	unsigned long pages = max_bytes >> PAGE_SHIFT;
78862306a36Sopenharmony_ci	unsigned long max_ratio;
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci	ret = bdi_check_pages_limit(pages);
79162306a36Sopenharmony_ci	if (ret)
79262306a36Sopenharmony_ci		return ret;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	max_ratio = bdi_ratio_from_pages(pages);
79562306a36Sopenharmony_ci	return __bdi_set_max_ratio(bdi, max_ratio);
79662306a36Sopenharmony_ci}
79762306a36Sopenharmony_ci
79862306a36Sopenharmony_ciint bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit)
79962306a36Sopenharmony_ci{
80062306a36Sopenharmony_ci	if (strict_limit > 1)
80162306a36Sopenharmony_ci		return -EINVAL;
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	spin_lock_bh(&bdi_lock);
80462306a36Sopenharmony_ci	if (strict_limit)
80562306a36Sopenharmony_ci		bdi->capabilities |= BDI_CAP_STRICTLIMIT;
80662306a36Sopenharmony_ci	else
80762306a36Sopenharmony_ci		bdi->capabilities &= ~BDI_CAP_STRICTLIMIT;
80862306a36Sopenharmony_ci	spin_unlock_bh(&bdi_lock);
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	return 0;
81162306a36Sopenharmony_ci}
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_cistatic unsigned long dirty_freerun_ceiling(unsigned long thresh,
81462306a36Sopenharmony_ci					   unsigned long bg_thresh)
81562306a36Sopenharmony_ci{
81662306a36Sopenharmony_ci	return (thresh + bg_thresh) / 2;
81762306a36Sopenharmony_ci}
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_cistatic unsigned long hard_dirty_limit(struct wb_domain *dom,
82062306a36Sopenharmony_ci				      unsigned long thresh)
82162306a36Sopenharmony_ci{
82262306a36Sopenharmony_ci	return max(thresh, dom->dirty_limit);
82362306a36Sopenharmony_ci}
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci/*
82662306a36Sopenharmony_ci * Memory which can be further allocated to a memcg domain is capped by
82762306a36Sopenharmony_ci * system-wide clean memory excluding the amount being used in the domain.
82862306a36Sopenharmony_ci */
82962306a36Sopenharmony_cistatic void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
83062306a36Sopenharmony_ci			    unsigned long filepages, unsigned long headroom)
83162306a36Sopenharmony_ci{
83262306a36Sopenharmony_ci	struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
83362306a36Sopenharmony_ci	unsigned long clean = filepages - min(filepages, mdtc->dirty);
83462306a36Sopenharmony_ci	unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
83562306a36Sopenharmony_ci	unsigned long other_clean = global_clean - min(global_clean, clean);
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_ci	mdtc->avail = filepages + min(headroom, other_clean);
83862306a36Sopenharmony_ci}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci/**
84162306a36Sopenharmony_ci * __wb_calc_thresh - @wb's share of dirty throttling threshold
84262306a36Sopenharmony_ci * @dtc: dirty_throttle_context of interest
84362306a36Sopenharmony_ci *
84462306a36Sopenharmony_ci * Note that balance_dirty_pages() will only seriously take it as a hard limit
84562306a36Sopenharmony_ci * when sleeping max_pause per page is not enough to keep the dirty pages under
84662306a36Sopenharmony_ci * control. For example, when the device is completely stalled due to some error
84762306a36Sopenharmony_ci * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
84862306a36Sopenharmony_ci * In the other normal situations, it acts more gently by throttling the tasks
84962306a36Sopenharmony_ci * more (rather than completely block them) when the wb dirty pages go high.
85062306a36Sopenharmony_ci *
85162306a36Sopenharmony_ci * It allocates high/low dirty limits to fast/slow devices, in order to prevent
85262306a36Sopenharmony_ci * - starving fast devices
85362306a36Sopenharmony_ci * - piling up dirty pages (that will take long time to sync) on slow devices
85462306a36Sopenharmony_ci *
85562306a36Sopenharmony_ci * The wb's share of dirty limit will be adapting to its throughput and
85662306a36Sopenharmony_ci * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
85762306a36Sopenharmony_ci *
85862306a36Sopenharmony_ci * Return: @wb's dirty limit in pages. The term "dirty" in the context of
85962306a36Sopenharmony_ci * dirty balancing includes all PG_dirty and PG_writeback pages.
86062306a36Sopenharmony_ci */
86162306a36Sopenharmony_cistatic unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
86262306a36Sopenharmony_ci{
86362306a36Sopenharmony_ci	struct wb_domain *dom = dtc_dom(dtc);
86462306a36Sopenharmony_ci	unsigned long thresh = dtc->thresh;
86562306a36Sopenharmony_ci	u64 wb_thresh;
86662306a36Sopenharmony_ci	unsigned long numerator, denominator;
86762306a36Sopenharmony_ci	unsigned long wb_min_ratio, wb_max_ratio;
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci	/*
87062306a36Sopenharmony_ci	 * Calculate this BDI's share of the thresh ratio.
87162306a36Sopenharmony_ci	 */
87262306a36Sopenharmony_ci	fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
87362306a36Sopenharmony_ci			      &numerator, &denominator);
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE);
87662306a36Sopenharmony_ci	wb_thresh *= numerator;
87762306a36Sopenharmony_ci	wb_thresh = div64_ul(wb_thresh, denominator);
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
88262306a36Sopenharmony_ci	if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE))
88362306a36Sopenharmony_ci		wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	return wb_thresh;
88662306a36Sopenharmony_ci}
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ciunsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
88962306a36Sopenharmony_ci{
89062306a36Sopenharmony_ci	struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
89162306a36Sopenharmony_ci					       .thresh = thresh };
89262306a36Sopenharmony_ci	return __wb_calc_thresh(&gdtc);
89362306a36Sopenharmony_ci}
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci/*
89662306a36Sopenharmony_ci *                           setpoint - dirty 3
89762306a36Sopenharmony_ci *        f(dirty) := 1.0 + (----------------)
89862306a36Sopenharmony_ci *                           limit - setpoint
89962306a36Sopenharmony_ci *
90062306a36Sopenharmony_ci * it's a 3rd order polynomial that subjects to
90162306a36Sopenharmony_ci *
90262306a36Sopenharmony_ci * (1) f(freerun)  = 2.0 => rampup dirty_ratelimit reasonably fast
90362306a36Sopenharmony_ci * (2) f(setpoint) = 1.0 => the balance point
90462306a36Sopenharmony_ci * (3) f(limit)    = 0   => the hard limit
90562306a36Sopenharmony_ci * (4) df/dx      <= 0	 => negative feedback control
90662306a36Sopenharmony_ci * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
90762306a36Sopenharmony_ci *     => fast response on large errors; small oscillation near setpoint
90862306a36Sopenharmony_ci */
90962306a36Sopenharmony_cistatic long long pos_ratio_polynom(unsigned long setpoint,
91062306a36Sopenharmony_ci					  unsigned long dirty,
91162306a36Sopenharmony_ci					  unsigned long limit)
91262306a36Sopenharmony_ci{
91362306a36Sopenharmony_ci	long long pos_ratio;
91462306a36Sopenharmony_ci	long x;
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
91762306a36Sopenharmony_ci		      (limit - setpoint) | 1);
91862306a36Sopenharmony_ci	pos_ratio = x;
91962306a36Sopenharmony_ci	pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
92062306a36Sopenharmony_ci	pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
92162306a36Sopenharmony_ci	pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
92462306a36Sopenharmony_ci}
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci/*
92762306a36Sopenharmony_ci * Dirty position control.
92862306a36Sopenharmony_ci *
92962306a36Sopenharmony_ci * (o) global/bdi setpoints
93062306a36Sopenharmony_ci *
93162306a36Sopenharmony_ci * We want the dirty pages be balanced around the global/wb setpoints.
93262306a36Sopenharmony_ci * When the number of dirty pages is higher/lower than the setpoint, the
93362306a36Sopenharmony_ci * dirty position control ratio (and hence task dirty ratelimit) will be
93462306a36Sopenharmony_ci * decreased/increased to bring the dirty pages back to the setpoint.
93562306a36Sopenharmony_ci *
93662306a36Sopenharmony_ci *     pos_ratio = 1 << RATELIMIT_CALC_SHIFT
93762306a36Sopenharmony_ci *
93862306a36Sopenharmony_ci *     if (dirty < setpoint) scale up   pos_ratio
93962306a36Sopenharmony_ci *     if (dirty > setpoint) scale down pos_ratio
94062306a36Sopenharmony_ci *
94162306a36Sopenharmony_ci *     if (wb_dirty < wb_setpoint) scale up   pos_ratio
94262306a36Sopenharmony_ci *     if (wb_dirty > wb_setpoint) scale down pos_ratio
94362306a36Sopenharmony_ci *
94462306a36Sopenharmony_ci *     task_ratelimit = dirty_ratelimit * pos_ratio >> RATELIMIT_CALC_SHIFT
94562306a36Sopenharmony_ci *
94662306a36Sopenharmony_ci * (o) global control line
94762306a36Sopenharmony_ci *
94862306a36Sopenharmony_ci *     ^ pos_ratio
94962306a36Sopenharmony_ci *     |
95062306a36Sopenharmony_ci *     |            |<===== global dirty control scope ======>|
95162306a36Sopenharmony_ci * 2.0  * * * * * * *
95262306a36Sopenharmony_ci *     |            .*
95362306a36Sopenharmony_ci *     |            . *
95462306a36Sopenharmony_ci *     |            .   *
95562306a36Sopenharmony_ci *     |            .     *
95662306a36Sopenharmony_ci *     |            .        *
95762306a36Sopenharmony_ci *     |            .            *
95862306a36Sopenharmony_ci * 1.0 ................................*
95962306a36Sopenharmony_ci *     |            .                  .     *
96062306a36Sopenharmony_ci *     |            .                  .          *
96162306a36Sopenharmony_ci *     |            .                  .              *
96262306a36Sopenharmony_ci *     |            .                  .                 *
96362306a36Sopenharmony_ci *     |            .                  .                    *
96462306a36Sopenharmony_ci *   0 +------------.------------------.----------------------*------------->
96562306a36Sopenharmony_ci *           freerun^          setpoint^                 limit^   dirty pages
96662306a36Sopenharmony_ci *
96762306a36Sopenharmony_ci * (o) wb control line
96862306a36Sopenharmony_ci *
96962306a36Sopenharmony_ci *     ^ pos_ratio
97062306a36Sopenharmony_ci *     |
97162306a36Sopenharmony_ci *     |            *
97262306a36Sopenharmony_ci *     |              *
97362306a36Sopenharmony_ci *     |                *
97462306a36Sopenharmony_ci *     |                  *
97562306a36Sopenharmony_ci *     |                    * |<=========== span ============>|
97662306a36Sopenharmony_ci * 1.0 .......................*
97762306a36Sopenharmony_ci *     |                      . *
97862306a36Sopenharmony_ci *     |                      .   *
97962306a36Sopenharmony_ci *     |                      .     *
98062306a36Sopenharmony_ci *     |                      .       *
98162306a36Sopenharmony_ci *     |                      .         *
98262306a36Sopenharmony_ci *     |                      .           *
98362306a36Sopenharmony_ci *     |                      .             *
98462306a36Sopenharmony_ci *     |                      .               *
98562306a36Sopenharmony_ci *     |                      .                 *
98662306a36Sopenharmony_ci *     |                      .                   *
98762306a36Sopenharmony_ci *     |                      .                     *
98862306a36Sopenharmony_ci * 1/4 ...............................................* * * * * * * * * * * *
98962306a36Sopenharmony_ci *     |                      .                         .
99062306a36Sopenharmony_ci *     |                      .                           .
99162306a36Sopenharmony_ci *     |                      .                             .
99262306a36Sopenharmony_ci *   0 +----------------------.-------------------------------.------------->
99362306a36Sopenharmony_ci *                wb_setpoint^                    x_intercept^
99462306a36Sopenharmony_ci *
99562306a36Sopenharmony_ci * The wb control line won't drop below pos_ratio=1/4, so that wb_dirty can
99662306a36Sopenharmony_ci * be smoothly throttled down to normal if it starts high in situations like
99762306a36Sopenharmony_ci * - start writing to a slow SD card and a fast disk at the same time. The SD
99862306a36Sopenharmony_ci *   card's wb_dirty may rush to many times higher than wb_setpoint.
99962306a36Sopenharmony_ci * - the wb dirty thresh drops quickly due to change of JBOD workload
100062306a36Sopenharmony_ci */
100162306a36Sopenharmony_cistatic void wb_position_ratio(struct dirty_throttle_control *dtc)
100262306a36Sopenharmony_ci{
100362306a36Sopenharmony_ci	struct bdi_writeback *wb = dtc->wb;
100462306a36Sopenharmony_ci	unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
100562306a36Sopenharmony_ci	unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
100662306a36Sopenharmony_ci	unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
100762306a36Sopenharmony_ci	unsigned long wb_thresh = dtc->wb_thresh;
100862306a36Sopenharmony_ci	unsigned long x_intercept;
100962306a36Sopenharmony_ci	unsigned long setpoint;		/* dirty pages' target balance point */
101062306a36Sopenharmony_ci	unsigned long wb_setpoint;
101162306a36Sopenharmony_ci	unsigned long span;
101262306a36Sopenharmony_ci	long long pos_ratio;		/* for scaling up/down the rate limit */
101362306a36Sopenharmony_ci	long x;
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci	dtc->pos_ratio = 0;
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	if (unlikely(dtc->dirty >= limit))
101862306a36Sopenharmony_ci		return;
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci	/*
102162306a36Sopenharmony_ci	 * global setpoint
102262306a36Sopenharmony_ci	 *
102362306a36Sopenharmony_ci	 * See comment for pos_ratio_polynom().
102462306a36Sopenharmony_ci	 */
102562306a36Sopenharmony_ci	setpoint = (freerun + limit) / 2;
102662306a36Sopenharmony_ci	pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci	/*
102962306a36Sopenharmony_ci	 * The strictlimit feature is a tool preventing mistrusted filesystems
103062306a36Sopenharmony_ci	 * from growing a large number of dirty pages before throttling. For
103162306a36Sopenharmony_ci	 * such filesystems balance_dirty_pages always checks wb counters
103262306a36Sopenharmony_ci	 * against wb limits. Even if global "nr_dirty" is under "freerun".
103362306a36Sopenharmony_ci	 * This is especially important for fuse which sets bdi->max_ratio to
103462306a36Sopenharmony_ci	 * 1% by default. Without strictlimit feature, fuse writeback may
103562306a36Sopenharmony_ci	 * consume arbitrary amount of RAM because it is accounted in
103662306a36Sopenharmony_ci	 * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty".
103762306a36Sopenharmony_ci	 *
103862306a36Sopenharmony_ci	 * Here, in wb_position_ratio(), we calculate pos_ratio based on
103962306a36Sopenharmony_ci	 * two values: wb_dirty and wb_thresh. Let's consider an example:
104062306a36Sopenharmony_ci	 * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global
104162306a36Sopenharmony_ci	 * limits are set by default to 10% and 20% (background and throttle).
104262306a36Sopenharmony_ci	 * Then wb_thresh is 1% of 20% of 16GB. This amounts to ~8K pages.
104362306a36Sopenharmony_ci	 * wb_calc_thresh(wb, bg_thresh) is about ~4K pages. wb_setpoint is
104462306a36Sopenharmony_ci	 * about ~6K pages (as the average of background and throttle wb
104562306a36Sopenharmony_ci	 * limits). The 3rd order polynomial will provide positive feedback if
104662306a36Sopenharmony_ci	 * wb_dirty is under wb_setpoint and vice versa.
104762306a36Sopenharmony_ci	 *
104862306a36Sopenharmony_ci	 * Note, that we cannot use global counters in these calculations
104962306a36Sopenharmony_ci	 * because we want to throttle process writing to a strictlimit wb
105062306a36Sopenharmony_ci	 * much earlier than global "freerun" is reached (~23MB vs. ~2.3GB
105162306a36Sopenharmony_ci	 * in the example above).
105262306a36Sopenharmony_ci	 */
105362306a36Sopenharmony_ci	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
105462306a36Sopenharmony_ci		long long wb_pos_ratio;
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci		if (dtc->wb_dirty < 8) {
105762306a36Sopenharmony_ci			dtc->pos_ratio = min_t(long long, pos_ratio * 2,
105862306a36Sopenharmony_ci					   2 << RATELIMIT_CALC_SHIFT);
105962306a36Sopenharmony_ci			return;
106062306a36Sopenharmony_ci		}
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ci		if (dtc->wb_dirty >= wb_thresh)
106362306a36Sopenharmony_ci			return;
106462306a36Sopenharmony_ci
106562306a36Sopenharmony_ci		wb_setpoint = dirty_freerun_ceiling(wb_thresh,
106662306a36Sopenharmony_ci						    dtc->wb_bg_thresh);
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci		if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
106962306a36Sopenharmony_ci			return;
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_ci		wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
107262306a36Sopenharmony_ci						 wb_thresh);
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci		/*
107562306a36Sopenharmony_ci		 * Typically, for strictlimit case, wb_setpoint << setpoint
107662306a36Sopenharmony_ci		 * and pos_ratio >> wb_pos_ratio. In the other words global
107762306a36Sopenharmony_ci		 * state ("dirty") is not limiting factor and we have to
107862306a36Sopenharmony_ci		 * make decision based on wb counters. But there is an
107962306a36Sopenharmony_ci		 * important case when global pos_ratio should get precedence:
108062306a36Sopenharmony_ci		 * global limits are exceeded (e.g. due to activities on other
108162306a36Sopenharmony_ci		 * wb's) while given strictlimit wb is below limit.
108262306a36Sopenharmony_ci		 *
108362306a36Sopenharmony_ci		 * "pos_ratio * wb_pos_ratio" would work for the case above,
108462306a36Sopenharmony_ci		 * but it would look too non-natural for the case of all
108562306a36Sopenharmony_ci		 * activity in the system coming from a single strictlimit wb
108662306a36Sopenharmony_ci		 * with bdi->max_ratio == 100%.
108762306a36Sopenharmony_ci		 *
108862306a36Sopenharmony_ci		 * Note that min() below somewhat changes the dynamics of the
108962306a36Sopenharmony_ci		 * control system. Normally, pos_ratio value can be well over 3
109062306a36Sopenharmony_ci		 * (when globally we are at freerun and wb is well below wb
109162306a36Sopenharmony_ci		 * setpoint). Now the maximum pos_ratio in the same situation
109262306a36Sopenharmony_ci		 * is 2. We might want to tweak this if we observe the control
109362306a36Sopenharmony_ci		 * system is too slow to adapt.
109462306a36Sopenharmony_ci		 */
109562306a36Sopenharmony_ci		dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
109662306a36Sopenharmony_ci		return;
109762306a36Sopenharmony_ci	}
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	/*
110062306a36Sopenharmony_ci	 * We have computed basic pos_ratio above based on global situation. If
110162306a36Sopenharmony_ci	 * the wb is over/under its share of dirty pages, we want to scale
110262306a36Sopenharmony_ci	 * pos_ratio further down/up. That is done by the following mechanism.
110362306a36Sopenharmony_ci	 */
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	/*
110662306a36Sopenharmony_ci	 * wb setpoint
110762306a36Sopenharmony_ci	 *
110862306a36Sopenharmony_ci	 *        f(wb_dirty) := 1.0 + k * (wb_dirty - wb_setpoint)
110962306a36Sopenharmony_ci	 *
111062306a36Sopenharmony_ci	 *                        x_intercept - wb_dirty
111162306a36Sopenharmony_ci	 *                     := --------------------------
111262306a36Sopenharmony_ci	 *                        x_intercept - wb_setpoint
111362306a36Sopenharmony_ci	 *
111462306a36Sopenharmony_ci	 * The main wb control line is a linear function that subjects to
111562306a36Sopenharmony_ci	 *
111662306a36Sopenharmony_ci	 * (1) f(wb_setpoint) = 1.0
111762306a36Sopenharmony_ci	 * (2) k = - 1 / (8 * write_bw)  (in single wb case)
111862306a36Sopenharmony_ci	 *     or equally: x_intercept = wb_setpoint + 8 * write_bw
111962306a36Sopenharmony_ci	 *
112062306a36Sopenharmony_ci	 * For single wb case, the dirty pages are observed to fluctuate
112162306a36Sopenharmony_ci	 * regularly within range
112262306a36Sopenharmony_ci	 *        [wb_setpoint - write_bw/2, wb_setpoint + write_bw/2]
112362306a36Sopenharmony_ci	 * for various filesystems, where (2) can yield in a reasonable 12.5%
112462306a36Sopenharmony_ci	 * fluctuation range for pos_ratio.
112562306a36Sopenharmony_ci	 *
112662306a36Sopenharmony_ci	 * For JBOD case, wb_thresh (not wb_dirty!) could fluctuate up to its
112762306a36Sopenharmony_ci	 * own size, so move the slope over accordingly and choose a slope that
112862306a36Sopenharmony_ci	 * yields 100% pos_ratio fluctuation on suddenly doubled wb_thresh.
112962306a36Sopenharmony_ci	 */
113062306a36Sopenharmony_ci	if (unlikely(wb_thresh > dtc->thresh))
113162306a36Sopenharmony_ci		wb_thresh = dtc->thresh;
113262306a36Sopenharmony_ci	/*
113362306a36Sopenharmony_ci	 * It's very possible that wb_thresh is close to 0 not because the
113462306a36Sopenharmony_ci	 * device is slow, but that it has remained inactive for long time.
113562306a36Sopenharmony_ci	 * Honour such devices a reasonable good (hopefully IO efficient)
113662306a36Sopenharmony_ci	 * threshold, so that the occasional writes won't be blocked and active
113762306a36Sopenharmony_ci	 * writes can rampup the threshold quickly.
113862306a36Sopenharmony_ci	 */
113962306a36Sopenharmony_ci	wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
114062306a36Sopenharmony_ci	/*
114162306a36Sopenharmony_ci	 * scale global setpoint to wb's:
114262306a36Sopenharmony_ci	 *	wb_setpoint = setpoint * wb_thresh / thresh
114362306a36Sopenharmony_ci	 */
114462306a36Sopenharmony_ci	x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
114562306a36Sopenharmony_ci	wb_setpoint = setpoint * (u64)x >> 16;
114662306a36Sopenharmony_ci	/*
114762306a36Sopenharmony_ci	 * Use span=(8*write_bw) in single wb case as indicated by
114862306a36Sopenharmony_ci	 * (thresh - wb_thresh ~= 0) and transit to wb_thresh in JBOD case.
114962306a36Sopenharmony_ci	 *
115062306a36Sopenharmony_ci	 *        wb_thresh                    thresh - wb_thresh
115162306a36Sopenharmony_ci	 * span = --------- * (8 * write_bw) + ------------------ * wb_thresh
115262306a36Sopenharmony_ci	 *         thresh                           thresh
115362306a36Sopenharmony_ci	 */
115462306a36Sopenharmony_ci	span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
115562306a36Sopenharmony_ci	x_intercept = wb_setpoint + span;
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_ci	if (dtc->wb_dirty < x_intercept - span / 4) {
115862306a36Sopenharmony_ci		pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
115962306a36Sopenharmony_ci				      (x_intercept - wb_setpoint) | 1);
116062306a36Sopenharmony_ci	} else
116162306a36Sopenharmony_ci		pos_ratio /= 4;
116262306a36Sopenharmony_ci
116362306a36Sopenharmony_ci	/*
116462306a36Sopenharmony_ci	 * wb reserve area, safeguard against dirty pool underrun and disk idle
116562306a36Sopenharmony_ci	 * It may push the desired control point of global dirty pages higher
116662306a36Sopenharmony_ci	 * than setpoint.
116762306a36Sopenharmony_ci	 */
116862306a36Sopenharmony_ci	x_intercept = wb_thresh / 2;
116962306a36Sopenharmony_ci	if (dtc->wb_dirty < x_intercept) {
117062306a36Sopenharmony_ci		if (dtc->wb_dirty > x_intercept / 8)
117162306a36Sopenharmony_ci			pos_ratio = div_u64(pos_ratio * x_intercept,
117262306a36Sopenharmony_ci					    dtc->wb_dirty);
117362306a36Sopenharmony_ci		else
117462306a36Sopenharmony_ci			pos_ratio *= 8;
117562306a36Sopenharmony_ci	}
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci	dtc->pos_ratio = pos_ratio;
117862306a36Sopenharmony_ci}
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_cistatic void wb_update_write_bandwidth(struct bdi_writeback *wb,
118162306a36Sopenharmony_ci				      unsigned long elapsed,
118262306a36Sopenharmony_ci				      unsigned long written)
118362306a36Sopenharmony_ci{
118462306a36Sopenharmony_ci	const unsigned long period = roundup_pow_of_two(3 * HZ);
118562306a36Sopenharmony_ci	unsigned long avg = wb->avg_write_bandwidth;
118662306a36Sopenharmony_ci	unsigned long old = wb->write_bandwidth;
118762306a36Sopenharmony_ci	u64 bw;
118862306a36Sopenharmony_ci
118962306a36Sopenharmony_ci	/*
119062306a36Sopenharmony_ci	 * bw = written * HZ / elapsed
119162306a36Sopenharmony_ci	 *
119262306a36Sopenharmony_ci	 *                   bw * elapsed + write_bandwidth * (period - elapsed)
119362306a36Sopenharmony_ci	 * write_bandwidth = ---------------------------------------------------
119462306a36Sopenharmony_ci	 *                                          period
119562306a36Sopenharmony_ci	 *
119662306a36Sopenharmony_ci	 * @written may have decreased due to folio_redirty_for_writepage().
119762306a36Sopenharmony_ci	 * Avoid underflowing @bw calculation.
119862306a36Sopenharmony_ci	 */
119962306a36Sopenharmony_ci	bw = written - min(written, wb->written_stamp);
120062306a36Sopenharmony_ci	bw *= HZ;
120162306a36Sopenharmony_ci	if (unlikely(elapsed > period)) {
120262306a36Sopenharmony_ci		bw = div64_ul(bw, elapsed);
120362306a36Sopenharmony_ci		avg = bw;
120462306a36Sopenharmony_ci		goto out;
120562306a36Sopenharmony_ci	}
120662306a36Sopenharmony_ci	bw += (u64)wb->write_bandwidth * (period - elapsed);
120762306a36Sopenharmony_ci	bw >>= ilog2(period);
120862306a36Sopenharmony_ci
120962306a36Sopenharmony_ci	/*
121062306a36Sopenharmony_ci	 * one more level of smoothing, for filtering out sudden spikes
121162306a36Sopenharmony_ci	 */
121262306a36Sopenharmony_ci	if (avg > old && old >= (unsigned long)bw)
121362306a36Sopenharmony_ci		avg -= (avg - old) >> 3;
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	if (avg < old && old <= (unsigned long)bw)
121662306a36Sopenharmony_ci		avg += (old - avg) >> 3;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ciout:
121962306a36Sopenharmony_ci	/* keep avg > 0 to guarantee that tot > 0 if there are dirty wbs */
122062306a36Sopenharmony_ci	avg = max(avg, 1LU);
122162306a36Sopenharmony_ci	if (wb_has_dirty_io(wb)) {
122262306a36Sopenharmony_ci		long delta = avg - wb->avg_write_bandwidth;
122362306a36Sopenharmony_ci		WARN_ON_ONCE(atomic_long_add_return(delta,
122462306a36Sopenharmony_ci					&wb->bdi->tot_write_bandwidth) <= 0);
122562306a36Sopenharmony_ci	}
122662306a36Sopenharmony_ci	wb->write_bandwidth = bw;
122762306a36Sopenharmony_ci	WRITE_ONCE(wb->avg_write_bandwidth, avg);
122862306a36Sopenharmony_ci}
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_cistatic void update_dirty_limit(struct dirty_throttle_control *dtc)
123162306a36Sopenharmony_ci{
123262306a36Sopenharmony_ci	struct wb_domain *dom = dtc_dom(dtc);
123362306a36Sopenharmony_ci	unsigned long thresh = dtc->thresh;
123462306a36Sopenharmony_ci	unsigned long limit = dom->dirty_limit;
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ci	/*
123762306a36Sopenharmony_ci	 * Follow up in one step.
123862306a36Sopenharmony_ci	 */
123962306a36Sopenharmony_ci	if (limit < thresh) {
124062306a36Sopenharmony_ci		limit = thresh;
124162306a36Sopenharmony_ci		goto update;
124262306a36Sopenharmony_ci	}
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci	/*
124562306a36Sopenharmony_ci	 * Follow down slowly. Use the higher one as the target, because thresh
124662306a36Sopenharmony_ci	 * may drop below dirty. This is exactly the reason to introduce
124762306a36Sopenharmony_ci	 * dom->dirty_limit which is guaranteed to lie above the dirty pages.
124862306a36Sopenharmony_ci	 */
124962306a36Sopenharmony_ci	thresh = max(thresh, dtc->dirty);
125062306a36Sopenharmony_ci	if (limit > thresh) {
125162306a36Sopenharmony_ci		limit -= (limit - thresh) >> 5;
125262306a36Sopenharmony_ci		goto update;
125362306a36Sopenharmony_ci	}
125462306a36Sopenharmony_ci	return;
125562306a36Sopenharmony_ciupdate:
125662306a36Sopenharmony_ci	dom->dirty_limit = limit;
125762306a36Sopenharmony_ci}
125862306a36Sopenharmony_ci
125962306a36Sopenharmony_cistatic void domain_update_dirty_limit(struct dirty_throttle_control *dtc,
126062306a36Sopenharmony_ci				      unsigned long now)
126162306a36Sopenharmony_ci{
126262306a36Sopenharmony_ci	struct wb_domain *dom = dtc_dom(dtc);
126362306a36Sopenharmony_ci
126462306a36Sopenharmony_ci	/*
126562306a36Sopenharmony_ci	 * check locklessly first to optimize away locking for the most time
126662306a36Sopenharmony_ci	 */
126762306a36Sopenharmony_ci	if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
126862306a36Sopenharmony_ci		return;
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_ci	spin_lock(&dom->lock);
127162306a36Sopenharmony_ci	if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
127262306a36Sopenharmony_ci		update_dirty_limit(dtc);
127362306a36Sopenharmony_ci		dom->dirty_limit_tstamp = now;
127462306a36Sopenharmony_ci	}
127562306a36Sopenharmony_ci	spin_unlock(&dom->lock);
127662306a36Sopenharmony_ci}
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci/*
127962306a36Sopenharmony_ci * Maintain wb->dirty_ratelimit, the base dirty throttle rate.
128062306a36Sopenharmony_ci *
128162306a36Sopenharmony_ci * Normal wb tasks will be curbed at or below it in long term.
128262306a36Sopenharmony_ci * Obviously it should be around (write_bw / N) when there are N dd tasks.
128362306a36Sopenharmony_ci */
128462306a36Sopenharmony_cistatic void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
128562306a36Sopenharmony_ci				      unsigned long dirtied,
128662306a36Sopenharmony_ci				      unsigned long elapsed)
128762306a36Sopenharmony_ci{
128862306a36Sopenharmony_ci	struct bdi_writeback *wb = dtc->wb;
128962306a36Sopenharmony_ci	unsigned long dirty = dtc->dirty;
129062306a36Sopenharmony_ci	unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
129162306a36Sopenharmony_ci	unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
129262306a36Sopenharmony_ci	unsigned long setpoint = (freerun + limit) / 2;
129362306a36Sopenharmony_ci	unsigned long write_bw = wb->avg_write_bandwidth;
129462306a36Sopenharmony_ci	unsigned long dirty_ratelimit = wb->dirty_ratelimit;
129562306a36Sopenharmony_ci	unsigned long dirty_rate;
129662306a36Sopenharmony_ci	unsigned long task_ratelimit;
129762306a36Sopenharmony_ci	unsigned long balanced_dirty_ratelimit;
129862306a36Sopenharmony_ci	unsigned long step;
129962306a36Sopenharmony_ci	unsigned long x;
130062306a36Sopenharmony_ci	unsigned long shift;
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	/*
130362306a36Sopenharmony_ci	 * The dirty rate will match the writeout rate in long term, except
130462306a36Sopenharmony_ci	 * when dirty pages are truncated by userspace or re-dirtied by FS.
130562306a36Sopenharmony_ci	 */
130662306a36Sopenharmony_ci	dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_ci	/*
130962306a36Sopenharmony_ci	 * task_ratelimit reflects each dd's dirty rate for the past 200ms.
131062306a36Sopenharmony_ci	 */
131162306a36Sopenharmony_ci	task_ratelimit = (u64)dirty_ratelimit *
131262306a36Sopenharmony_ci					dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
131362306a36Sopenharmony_ci	task_ratelimit++; /* it helps rampup dirty_ratelimit from tiny values */
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci	/*
131662306a36Sopenharmony_ci	 * A linear estimation of the "balanced" throttle rate. The theory is,
131762306a36Sopenharmony_ci	 * if there are N dd tasks, each throttled at task_ratelimit, the wb's
131862306a36Sopenharmony_ci	 * dirty_rate will be measured to be (N * task_ratelimit). So the below
131962306a36Sopenharmony_ci	 * formula will yield the balanced rate limit (write_bw / N).
132062306a36Sopenharmony_ci	 *
132162306a36Sopenharmony_ci	 * Note that the expanded form is not a pure rate feedback:
132262306a36Sopenharmony_ci	 *	rate_(i+1) = rate_(i) * (write_bw / dirty_rate)		     (1)
132362306a36Sopenharmony_ci	 * but also takes pos_ratio into account:
132462306a36Sopenharmony_ci	 *	rate_(i+1) = rate_(i) * (write_bw / dirty_rate) * pos_ratio  (2)
132562306a36Sopenharmony_ci	 *
132662306a36Sopenharmony_ci	 * (1) is not realistic because pos_ratio also takes part in balancing
132762306a36Sopenharmony_ci	 * the dirty rate.  Consider the state
132862306a36Sopenharmony_ci	 *	pos_ratio = 0.5						     (3)
132962306a36Sopenharmony_ci	 *	rate = 2 * (write_bw / N)				     (4)
133062306a36Sopenharmony_ci	 * If (1) is used, it will stuck in that state! Because each dd will
133162306a36Sopenharmony_ci	 * be throttled at
133262306a36Sopenharmony_ci	 *	task_ratelimit = pos_ratio * rate = (write_bw / N)	     (5)
133362306a36Sopenharmony_ci	 * yielding
133462306a36Sopenharmony_ci	 *	dirty_rate = N * task_ratelimit = write_bw		     (6)
133562306a36Sopenharmony_ci	 * put (6) into (1) we get
133662306a36Sopenharmony_ci	 *	rate_(i+1) = rate_(i)					     (7)
133762306a36Sopenharmony_ci	 *
133862306a36Sopenharmony_ci	 * So we end up using (2) to always keep
133962306a36Sopenharmony_ci	 *	rate_(i+1) ~= (write_bw / N)				     (8)
134062306a36Sopenharmony_ci	 * regardless of the value of pos_ratio. As long as (8) is satisfied,
134162306a36Sopenharmony_ci	 * pos_ratio is able to drive itself to 1.0, which is not only where
134262306a36Sopenharmony_ci	 * the dirty count meet the setpoint, but also where the slope of
134362306a36Sopenharmony_ci	 * pos_ratio is most flat and hence task_ratelimit is least fluctuated.
134462306a36Sopenharmony_ci	 */
134562306a36Sopenharmony_ci	balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
134662306a36Sopenharmony_ci					   dirty_rate | 1);
134762306a36Sopenharmony_ci	/*
134862306a36Sopenharmony_ci	 * balanced_dirty_ratelimit ~= (write_bw / N) <= write_bw
134962306a36Sopenharmony_ci	 */
135062306a36Sopenharmony_ci	if (unlikely(balanced_dirty_ratelimit > write_bw))
135162306a36Sopenharmony_ci		balanced_dirty_ratelimit = write_bw;
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_ci	/*
135462306a36Sopenharmony_ci	 * We could safely do this and return immediately:
135562306a36Sopenharmony_ci	 *
135662306a36Sopenharmony_ci	 *	wb->dirty_ratelimit = balanced_dirty_ratelimit;
135762306a36Sopenharmony_ci	 *
135862306a36Sopenharmony_ci	 * However to get a more stable dirty_ratelimit, the below elaborated
135962306a36Sopenharmony_ci	 * code makes use of task_ratelimit to filter out singular points and
136062306a36Sopenharmony_ci	 * limit the step size.
136162306a36Sopenharmony_ci	 *
136262306a36Sopenharmony_ci	 * The below code essentially only uses the relative value of
136362306a36Sopenharmony_ci	 *
136462306a36Sopenharmony_ci	 *	task_ratelimit - dirty_ratelimit
136562306a36Sopenharmony_ci	 *	= (pos_ratio - 1) * dirty_ratelimit
136662306a36Sopenharmony_ci	 *
136762306a36Sopenharmony_ci	 * which reflects the direction and size of dirty position error.
136862306a36Sopenharmony_ci	 */
136962306a36Sopenharmony_ci
137062306a36Sopenharmony_ci	/*
137162306a36Sopenharmony_ci	 * dirty_ratelimit will follow balanced_dirty_ratelimit iff
137262306a36Sopenharmony_ci	 * task_ratelimit is on the same side of dirty_ratelimit, too.
137362306a36Sopenharmony_ci	 * For example, when
137462306a36Sopenharmony_ci	 * - dirty_ratelimit > balanced_dirty_ratelimit
137562306a36Sopenharmony_ci	 * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint)
137662306a36Sopenharmony_ci	 * lowering dirty_ratelimit will help meet both the position and rate
137762306a36Sopenharmony_ci	 * control targets. Otherwise, don't update dirty_ratelimit if it will
137862306a36Sopenharmony_ci	 * only help meet the rate target. After all, what the users ultimately
137962306a36Sopenharmony_ci	 * feel and care are stable dirty rate and small position error.
138062306a36Sopenharmony_ci	 *
138162306a36Sopenharmony_ci	 * |task_ratelimit - dirty_ratelimit| is used to limit the step size
138262306a36Sopenharmony_ci	 * and filter out the singular points of balanced_dirty_ratelimit. Which
138362306a36Sopenharmony_ci	 * keeps jumping around randomly and can even leap far away at times
138462306a36Sopenharmony_ci	 * due to the small 200ms estimation period of dirty_rate (we want to
138562306a36Sopenharmony_ci	 * keep that period small to reduce time lags).
138662306a36Sopenharmony_ci	 */
138762306a36Sopenharmony_ci	step = 0;
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_ci	/*
139062306a36Sopenharmony_ci	 * For strictlimit case, calculations above were based on wb counters
139162306a36Sopenharmony_ci	 * and limits (starting from pos_ratio = wb_position_ratio() and up to
139262306a36Sopenharmony_ci	 * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
139362306a36Sopenharmony_ci	 * Hence, to calculate "step" properly, we have to use wb_dirty as
139462306a36Sopenharmony_ci	 * "dirty" and wb_setpoint as "setpoint".
139562306a36Sopenharmony_ci	 *
139662306a36Sopenharmony_ci	 * We rampup dirty_ratelimit forcibly if wb_dirty is low because
139762306a36Sopenharmony_ci	 * it's possible that wb_thresh is close to zero due to inactivity
139862306a36Sopenharmony_ci	 * of backing device.
139962306a36Sopenharmony_ci	 */
140062306a36Sopenharmony_ci	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
140162306a36Sopenharmony_ci		dirty = dtc->wb_dirty;
140262306a36Sopenharmony_ci		if (dtc->wb_dirty < 8)
140362306a36Sopenharmony_ci			setpoint = dtc->wb_dirty + 1;
140462306a36Sopenharmony_ci		else
140562306a36Sopenharmony_ci			setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
140662306a36Sopenharmony_ci	}
140762306a36Sopenharmony_ci
140862306a36Sopenharmony_ci	if (dirty < setpoint) {
140962306a36Sopenharmony_ci		x = min3(wb->balanced_dirty_ratelimit,
141062306a36Sopenharmony_ci			 balanced_dirty_ratelimit, task_ratelimit);
141162306a36Sopenharmony_ci		if (dirty_ratelimit < x)
141262306a36Sopenharmony_ci			step = x - dirty_ratelimit;
141362306a36Sopenharmony_ci	} else {
141462306a36Sopenharmony_ci		x = max3(wb->balanced_dirty_ratelimit,
141562306a36Sopenharmony_ci			 balanced_dirty_ratelimit, task_ratelimit);
141662306a36Sopenharmony_ci		if (dirty_ratelimit > x)
141762306a36Sopenharmony_ci			step = dirty_ratelimit - x;
141862306a36Sopenharmony_ci	}
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci	/*
142162306a36Sopenharmony_ci	 * Don't pursue 100% rate matching. It's impossible since the balanced
142262306a36Sopenharmony_ci	 * rate itself is constantly fluctuating. So decrease the track speed
142362306a36Sopenharmony_ci	 * when it gets close to the target. Helps eliminate pointless tremors.
142462306a36Sopenharmony_ci	 */
142562306a36Sopenharmony_ci	shift = dirty_ratelimit / (2 * step + 1);
142662306a36Sopenharmony_ci	if (shift < BITS_PER_LONG)
142762306a36Sopenharmony_ci		step = DIV_ROUND_UP(step >> shift, 8);
142862306a36Sopenharmony_ci	else
142962306a36Sopenharmony_ci		step = 0;
143062306a36Sopenharmony_ci
143162306a36Sopenharmony_ci	if (dirty_ratelimit < balanced_dirty_ratelimit)
143262306a36Sopenharmony_ci		dirty_ratelimit += step;
143362306a36Sopenharmony_ci	else
143462306a36Sopenharmony_ci		dirty_ratelimit -= step;
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_ci	WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL));
143762306a36Sopenharmony_ci	wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
144062306a36Sopenharmony_ci}
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_cistatic void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
144362306a36Sopenharmony_ci				  struct dirty_throttle_control *mdtc,
144462306a36Sopenharmony_ci				  bool update_ratelimit)
144562306a36Sopenharmony_ci{
144662306a36Sopenharmony_ci	struct bdi_writeback *wb = gdtc->wb;
144762306a36Sopenharmony_ci	unsigned long now = jiffies;
144862306a36Sopenharmony_ci	unsigned long elapsed;
144962306a36Sopenharmony_ci	unsigned long dirtied;
145062306a36Sopenharmony_ci	unsigned long written;
145162306a36Sopenharmony_ci
145262306a36Sopenharmony_ci	spin_lock(&wb->list_lock);
145362306a36Sopenharmony_ci
145462306a36Sopenharmony_ci	/*
145562306a36Sopenharmony_ci	 * Lockless checks for elapsed time are racy and delayed update after
145662306a36Sopenharmony_ci	 * IO completion doesn't do it at all (to make sure written pages are
145762306a36Sopenharmony_ci	 * accounted reasonably quickly). Make sure elapsed >= 1 to avoid
145862306a36Sopenharmony_ci	 * division errors.
145962306a36Sopenharmony_ci	 */
146062306a36Sopenharmony_ci	elapsed = max(now - wb->bw_time_stamp, 1UL);
146162306a36Sopenharmony_ci	dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
146262306a36Sopenharmony_ci	written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
146362306a36Sopenharmony_ci
146462306a36Sopenharmony_ci	if (update_ratelimit) {
146562306a36Sopenharmony_ci		domain_update_dirty_limit(gdtc, now);
146662306a36Sopenharmony_ci		wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
146762306a36Sopenharmony_ci
146862306a36Sopenharmony_ci		/*
146962306a36Sopenharmony_ci		 * @mdtc is always NULL if !CGROUP_WRITEBACK but the
147062306a36Sopenharmony_ci		 * compiler has no way to figure that out.  Help it.
147162306a36Sopenharmony_ci		 */
147262306a36Sopenharmony_ci		if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
147362306a36Sopenharmony_ci			domain_update_dirty_limit(mdtc, now);
147462306a36Sopenharmony_ci			wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
147562306a36Sopenharmony_ci		}
147662306a36Sopenharmony_ci	}
147762306a36Sopenharmony_ci	wb_update_write_bandwidth(wb, elapsed, written);
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci	wb->dirtied_stamp = dirtied;
148062306a36Sopenharmony_ci	wb->written_stamp = written;
148162306a36Sopenharmony_ci	WRITE_ONCE(wb->bw_time_stamp, now);
148262306a36Sopenharmony_ci	spin_unlock(&wb->list_lock);
148362306a36Sopenharmony_ci}
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_civoid wb_update_bandwidth(struct bdi_writeback *wb)
148662306a36Sopenharmony_ci{
148762306a36Sopenharmony_ci	struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
148862306a36Sopenharmony_ci
148962306a36Sopenharmony_ci	__wb_update_bandwidth(&gdtc, NULL, false);
149062306a36Sopenharmony_ci}
149162306a36Sopenharmony_ci
149262306a36Sopenharmony_ci/* Interval after which we consider wb idle and don't estimate bandwidth */
149362306a36Sopenharmony_ci#define WB_BANDWIDTH_IDLE_JIF (HZ)
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_cistatic void wb_bandwidth_estimate_start(struct bdi_writeback *wb)
149662306a36Sopenharmony_ci{
149762306a36Sopenharmony_ci	unsigned long now = jiffies;
149862306a36Sopenharmony_ci	unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp);
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci	if (elapsed > WB_BANDWIDTH_IDLE_JIF &&
150162306a36Sopenharmony_ci	    !atomic_read(&wb->writeback_inodes)) {
150262306a36Sopenharmony_ci		spin_lock(&wb->list_lock);
150362306a36Sopenharmony_ci		wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED);
150462306a36Sopenharmony_ci		wb->written_stamp = wb_stat(wb, WB_WRITTEN);
150562306a36Sopenharmony_ci		WRITE_ONCE(wb->bw_time_stamp, now);
150662306a36Sopenharmony_ci		spin_unlock(&wb->list_lock);
150762306a36Sopenharmony_ci	}
150862306a36Sopenharmony_ci}
150962306a36Sopenharmony_ci
151062306a36Sopenharmony_ci/*
151162306a36Sopenharmony_ci * After a task dirtied this many pages, balance_dirty_pages_ratelimited()
151262306a36Sopenharmony_ci * will look to see if it needs to start dirty throttling.
151362306a36Sopenharmony_ci *
151462306a36Sopenharmony_ci * If dirty_poll_interval is too low, big NUMA machines will call the expensive
151562306a36Sopenharmony_ci * global_zone_page_state() too often. So scale it near-sqrt to the safety margin
151662306a36Sopenharmony_ci * (the number of pages we may dirty without exceeding the dirty limits).
151762306a36Sopenharmony_ci */
151862306a36Sopenharmony_cistatic unsigned long dirty_poll_interval(unsigned long dirty,
151962306a36Sopenharmony_ci					 unsigned long thresh)
152062306a36Sopenharmony_ci{
152162306a36Sopenharmony_ci	if (thresh > dirty)
152262306a36Sopenharmony_ci		return 1UL << (ilog2(thresh - dirty) >> 1);
152362306a36Sopenharmony_ci
152462306a36Sopenharmony_ci	return 1;
152562306a36Sopenharmony_ci}
152662306a36Sopenharmony_ci
152762306a36Sopenharmony_cistatic unsigned long wb_max_pause(struct bdi_writeback *wb,
152862306a36Sopenharmony_ci				  unsigned long wb_dirty)
152962306a36Sopenharmony_ci{
153062306a36Sopenharmony_ci	unsigned long bw = READ_ONCE(wb->avg_write_bandwidth);
153162306a36Sopenharmony_ci	unsigned long t;
153262306a36Sopenharmony_ci
153362306a36Sopenharmony_ci	/*
153462306a36Sopenharmony_ci	 * Limit pause time for small memory systems. If sleeping for too long
153562306a36Sopenharmony_ci	 * time, a small pool of dirty/writeback pages may go empty and disk go
153662306a36Sopenharmony_ci	 * idle.
153762306a36Sopenharmony_ci	 *
153862306a36Sopenharmony_ci	 * 8 serves as the safety ratio.
153962306a36Sopenharmony_ci	 */
154062306a36Sopenharmony_ci	t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
154162306a36Sopenharmony_ci	t++;
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci	return min_t(unsigned long, t, MAX_PAUSE);
154462306a36Sopenharmony_ci}
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_cistatic long wb_min_pause(struct bdi_writeback *wb,
154762306a36Sopenharmony_ci			 long max_pause,
154862306a36Sopenharmony_ci			 unsigned long task_ratelimit,
154962306a36Sopenharmony_ci			 unsigned long dirty_ratelimit,
155062306a36Sopenharmony_ci			 int *nr_dirtied_pause)
155162306a36Sopenharmony_ci{
155262306a36Sopenharmony_ci	long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth));
155362306a36Sopenharmony_ci	long lo = ilog2(READ_ONCE(wb->dirty_ratelimit));
155462306a36Sopenharmony_ci	long t;		/* target pause */
155562306a36Sopenharmony_ci	long pause;	/* estimated next pause */
155662306a36Sopenharmony_ci	int pages;	/* target nr_dirtied_pause */
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci	/* target for 10ms pause on 1-dd case */
155962306a36Sopenharmony_ci	t = max(1, HZ / 100);
156062306a36Sopenharmony_ci
156162306a36Sopenharmony_ci	/*
156262306a36Sopenharmony_ci	 * Scale up pause time for concurrent dirtiers in order to reduce CPU
156362306a36Sopenharmony_ci	 * overheads.
156462306a36Sopenharmony_ci	 *
156562306a36Sopenharmony_ci	 * (N * 10ms) on 2^N concurrent tasks.
156662306a36Sopenharmony_ci	 */
156762306a36Sopenharmony_ci	if (hi > lo)
156862306a36Sopenharmony_ci		t += (hi - lo) * (10 * HZ) / 1024;
156962306a36Sopenharmony_ci
157062306a36Sopenharmony_ci	/*
157162306a36Sopenharmony_ci	 * This is a bit convoluted. We try to base the next nr_dirtied_pause
157262306a36Sopenharmony_ci	 * on the much more stable dirty_ratelimit. However the next pause time
157362306a36Sopenharmony_ci	 * will be computed based on task_ratelimit and the two rate limits may
157462306a36Sopenharmony_ci	 * depart considerably at some time. Especially if task_ratelimit goes
157562306a36Sopenharmony_ci	 * below dirty_ratelimit/2 and the target pause is max_pause, the next
157662306a36Sopenharmony_ci	 * pause time will be max_pause*2 _trimmed down_ to max_pause.  As a
157762306a36Sopenharmony_ci	 * result task_ratelimit won't be executed faithfully, which could
157862306a36Sopenharmony_ci	 * eventually bring down dirty_ratelimit.
157962306a36Sopenharmony_ci	 *
158062306a36Sopenharmony_ci	 * We apply two rules to fix it up:
158162306a36Sopenharmony_ci	 * 1) try to estimate the next pause time and if necessary, use a lower
158262306a36Sopenharmony_ci	 *    nr_dirtied_pause so as not to exceed max_pause. When this happens,
158362306a36Sopenharmony_ci	 *    nr_dirtied_pause will be "dancing" with task_ratelimit.
158462306a36Sopenharmony_ci	 * 2) limit the target pause time to max_pause/2, so that the normal
158562306a36Sopenharmony_ci	 *    small fluctuations of task_ratelimit won't trigger rule (1) and
158662306a36Sopenharmony_ci	 *    nr_dirtied_pause will remain as stable as dirty_ratelimit.
158762306a36Sopenharmony_ci	 */
158862306a36Sopenharmony_ci	t = min(t, 1 + max_pause / 2);
158962306a36Sopenharmony_ci	pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci	/*
159262306a36Sopenharmony_ci	 * Tiny nr_dirtied_pause is found to hurt I/O performance in the test
159362306a36Sopenharmony_ci	 * case fio-mmap-randwrite-64k, which does 16*{sync read, async write}.
159462306a36Sopenharmony_ci	 * When the 16 consecutive reads are often interrupted by some dirty
159562306a36Sopenharmony_ci	 * throttling pause during the async writes, cfq will go into idles
159662306a36Sopenharmony_ci	 * (deadline is fine). So push nr_dirtied_pause as high as possible
159762306a36Sopenharmony_ci	 * until reaches DIRTY_POLL_THRESH=32 pages.
159862306a36Sopenharmony_ci	 */
159962306a36Sopenharmony_ci	if (pages < DIRTY_POLL_THRESH) {
160062306a36Sopenharmony_ci		t = max_pause;
160162306a36Sopenharmony_ci		pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
160262306a36Sopenharmony_ci		if (pages > DIRTY_POLL_THRESH) {
160362306a36Sopenharmony_ci			pages = DIRTY_POLL_THRESH;
160462306a36Sopenharmony_ci			t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
160562306a36Sopenharmony_ci		}
160662306a36Sopenharmony_ci	}
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	pause = HZ * pages / (task_ratelimit + 1);
160962306a36Sopenharmony_ci	if (pause > max_pause) {
161062306a36Sopenharmony_ci		t = max_pause;
161162306a36Sopenharmony_ci		pages = task_ratelimit * t / roundup_pow_of_two(HZ);
161262306a36Sopenharmony_ci	}
161362306a36Sopenharmony_ci
161462306a36Sopenharmony_ci	*nr_dirtied_pause = pages;
161562306a36Sopenharmony_ci	/*
161662306a36Sopenharmony_ci	 * The minimal pause time will normally be half the target pause time.
161762306a36Sopenharmony_ci	 */
161862306a36Sopenharmony_ci	return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
161962306a36Sopenharmony_ci}
162062306a36Sopenharmony_ci
162162306a36Sopenharmony_cistatic inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
162262306a36Sopenharmony_ci{
162362306a36Sopenharmony_ci	struct bdi_writeback *wb = dtc->wb;
162462306a36Sopenharmony_ci	unsigned long wb_reclaimable;
162562306a36Sopenharmony_ci
162662306a36Sopenharmony_ci	/*
162762306a36Sopenharmony_ci	 * wb_thresh is not treated as some limiting factor as
162862306a36Sopenharmony_ci	 * dirty_thresh, due to reasons
162962306a36Sopenharmony_ci	 * - in JBOD setup, wb_thresh can fluctuate a lot
163062306a36Sopenharmony_ci	 * - in a system with HDD and USB key, the USB key may somehow
163162306a36Sopenharmony_ci	 *   go into state (wb_dirty >> wb_thresh) either because
163262306a36Sopenharmony_ci	 *   wb_dirty starts high, or because wb_thresh drops low.
163362306a36Sopenharmony_ci	 *   In this case we don't want to hard throttle the USB key
163462306a36Sopenharmony_ci	 *   dirtiers for 100 seconds until wb_dirty drops under
163562306a36Sopenharmony_ci	 *   wb_thresh. Instead the auxiliary wb control line in
163662306a36Sopenharmony_ci	 *   wb_position_ratio() will let the dirtier task progress
163762306a36Sopenharmony_ci	 *   at some rate <= (write_bw / 2) for bringing down wb_dirty.
163862306a36Sopenharmony_ci	 */
163962306a36Sopenharmony_ci	dtc->wb_thresh = __wb_calc_thresh(dtc);
164062306a36Sopenharmony_ci	dtc->wb_bg_thresh = dtc->thresh ?
164162306a36Sopenharmony_ci		div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci	/*
164462306a36Sopenharmony_ci	 * In order to avoid the stacked BDI deadlock we need
164562306a36Sopenharmony_ci	 * to ensure we accurately count the 'dirty' pages when
164662306a36Sopenharmony_ci	 * the threshold is low.
164762306a36Sopenharmony_ci	 *
164862306a36Sopenharmony_ci	 * Otherwise it would be possible to get thresh+n pages
164962306a36Sopenharmony_ci	 * reported dirty, even though there are thresh-m pages
165062306a36Sopenharmony_ci	 * actually dirty; with m+n sitting in the percpu
165162306a36Sopenharmony_ci	 * deltas.
165262306a36Sopenharmony_ci	 */
165362306a36Sopenharmony_ci	if (dtc->wb_thresh < 2 * wb_stat_error()) {
165462306a36Sopenharmony_ci		wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
165562306a36Sopenharmony_ci		dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
165662306a36Sopenharmony_ci	} else {
165762306a36Sopenharmony_ci		wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
165862306a36Sopenharmony_ci		dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
165962306a36Sopenharmony_ci	}
166062306a36Sopenharmony_ci}
166162306a36Sopenharmony_ci
166262306a36Sopenharmony_ci/*
166362306a36Sopenharmony_ci * balance_dirty_pages() must be called by processes which are generating dirty
166462306a36Sopenharmony_ci * data.  It looks at the number of dirty pages in the machine and will force
166562306a36Sopenharmony_ci * the caller to wait once crossing the (background_thresh + dirty_thresh) / 2.
166662306a36Sopenharmony_ci * If we're over `background_thresh' then the writeback threads are woken to
166762306a36Sopenharmony_ci * perform some writeout.
166862306a36Sopenharmony_ci */
166962306a36Sopenharmony_cistatic int balance_dirty_pages(struct bdi_writeback *wb,
167062306a36Sopenharmony_ci			       unsigned long pages_dirtied, unsigned int flags)
167162306a36Sopenharmony_ci{
167262306a36Sopenharmony_ci	struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
167362306a36Sopenharmony_ci	struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
167462306a36Sopenharmony_ci	struct dirty_throttle_control * const gdtc = &gdtc_stor;
167562306a36Sopenharmony_ci	struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
167662306a36Sopenharmony_ci						     &mdtc_stor : NULL;
167762306a36Sopenharmony_ci	struct dirty_throttle_control *sdtc;
167862306a36Sopenharmony_ci	unsigned long nr_reclaimable;	/* = file_dirty */
167962306a36Sopenharmony_ci	long period;
168062306a36Sopenharmony_ci	long pause;
168162306a36Sopenharmony_ci	long max_pause;
168262306a36Sopenharmony_ci	long min_pause;
168362306a36Sopenharmony_ci	int nr_dirtied_pause;
168462306a36Sopenharmony_ci	bool dirty_exceeded = false;
168562306a36Sopenharmony_ci	unsigned long task_ratelimit;
168662306a36Sopenharmony_ci	unsigned long dirty_ratelimit;
168762306a36Sopenharmony_ci	struct backing_dev_info *bdi = wb->bdi;
168862306a36Sopenharmony_ci	bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
168962306a36Sopenharmony_ci	unsigned long start_time = jiffies;
169062306a36Sopenharmony_ci	int ret = 0;
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci	for (;;) {
169362306a36Sopenharmony_ci		unsigned long now = jiffies;
169462306a36Sopenharmony_ci		unsigned long dirty, thresh, bg_thresh;
169562306a36Sopenharmony_ci		unsigned long m_dirty = 0;	/* stop bogus uninit warnings */
169662306a36Sopenharmony_ci		unsigned long m_thresh = 0;
169762306a36Sopenharmony_ci		unsigned long m_bg_thresh = 0;
169862306a36Sopenharmony_ci
169962306a36Sopenharmony_ci		nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
170062306a36Sopenharmony_ci		gdtc->avail = global_dirtyable_memory();
170162306a36Sopenharmony_ci		gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
170262306a36Sopenharmony_ci
170362306a36Sopenharmony_ci		domain_dirty_limits(gdtc);
170462306a36Sopenharmony_ci
170562306a36Sopenharmony_ci		if (unlikely(strictlimit)) {
170662306a36Sopenharmony_ci			wb_dirty_limits(gdtc);
170762306a36Sopenharmony_ci
170862306a36Sopenharmony_ci			dirty = gdtc->wb_dirty;
170962306a36Sopenharmony_ci			thresh = gdtc->wb_thresh;
171062306a36Sopenharmony_ci			bg_thresh = gdtc->wb_bg_thresh;
171162306a36Sopenharmony_ci		} else {
171262306a36Sopenharmony_ci			dirty = gdtc->dirty;
171362306a36Sopenharmony_ci			thresh = gdtc->thresh;
171462306a36Sopenharmony_ci			bg_thresh = gdtc->bg_thresh;
171562306a36Sopenharmony_ci		}
171662306a36Sopenharmony_ci
171762306a36Sopenharmony_ci		if (mdtc) {
171862306a36Sopenharmony_ci			unsigned long filepages, headroom, writeback;
171962306a36Sopenharmony_ci
172062306a36Sopenharmony_ci			/*
172162306a36Sopenharmony_ci			 * If @wb belongs to !root memcg, repeat the same
172262306a36Sopenharmony_ci			 * basic calculations for the memcg domain.
172362306a36Sopenharmony_ci			 */
172462306a36Sopenharmony_ci			mem_cgroup_wb_stats(wb, &filepages, &headroom,
172562306a36Sopenharmony_ci					    &mdtc->dirty, &writeback);
172662306a36Sopenharmony_ci			mdtc->dirty += writeback;
172762306a36Sopenharmony_ci			mdtc_calc_avail(mdtc, filepages, headroom);
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_ci			domain_dirty_limits(mdtc);
173062306a36Sopenharmony_ci
173162306a36Sopenharmony_ci			if (unlikely(strictlimit)) {
173262306a36Sopenharmony_ci				wb_dirty_limits(mdtc);
173362306a36Sopenharmony_ci				m_dirty = mdtc->wb_dirty;
173462306a36Sopenharmony_ci				m_thresh = mdtc->wb_thresh;
173562306a36Sopenharmony_ci				m_bg_thresh = mdtc->wb_bg_thresh;
173662306a36Sopenharmony_ci			} else {
173762306a36Sopenharmony_ci				m_dirty = mdtc->dirty;
173862306a36Sopenharmony_ci				m_thresh = mdtc->thresh;
173962306a36Sopenharmony_ci				m_bg_thresh = mdtc->bg_thresh;
174062306a36Sopenharmony_ci			}
174162306a36Sopenharmony_ci		}
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_ci		/*
174462306a36Sopenharmony_ci		 * In laptop mode, we wait until hitting the higher threshold
174562306a36Sopenharmony_ci		 * before starting background writeout, and then write out all
174662306a36Sopenharmony_ci		 * the way down to the lower threshold.  So slow writers cause
174762306a36Sopenharmony_ci		 * minimal disk activity.
174862306a36Sopenharmony_ci		 *
174962306a36Sopenharmony_ci		 * In normal mode, we start background writeout at the lower
175062306a36Sopenharmony_ci		 * background_thresh, to keep the amount of dirty memory low.
175162306a36Sopenharmony_ci		 */
175262306a36Sopenharmony_ci		if (!laptop_mode && nr_reclaimable > gdtc->bg_thresh &&
175362306a36Sopenharmony_ci		    !writeback_in_progress(wb))
175462306a36Sopenharmony_ci			wb_start_background_writeback(wb);
175562306a36Sopenharmony_ci
175662306a36Sopenharmony_ci		/*
175762306a36Sopenharmony_ci		 * Throttle it only when the background writeback cannot
175862306a36Sopenharmony_ci		 * catch-up. This avoids (excessively) small writeouts
175962306a36Sopenharmony_ci		 * when the wb limits are ramping up in case of !strictlimit.
176062306a36Sopenharmony_ci		 *
176162306a36Sopenharmony_ci		 * In strictlimit case make decision based on the wb counters
176262306a36Sopenharmony_ci		 * and limits. Small writeouts when the wb limits are ramping
176362306a36Sopenharmony_ci		 * up are the price we consciously pay for strictlimit-ing.
176462306a36Sopenharmony_ci		 *
176562306a36Sopenharmony_ci		 * If memcg domain is in effect, @dirty should be under
176662306a36Sopenharmony_ci		 * both global and memcg freerun ceilings.
176762306a36Sopenharmony_ci		 */
176862306a36Sopenharmony_ci		if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
176962306a36Sopenharmony_ci		    (!mdtc ||
177062306a36Sopenharmony_ci		     m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
177162306a36Sopenharmony_ci			unsigned long intv;
177262306a36Sopenharmony_ci			unsigned long m_intv;
177362306a36Sopenharmony_ci
177462306a36Sopenharmony_cifree_running:
177562306a36Sopenharmony_ci			intv = dirty_poll_interval(dirty, thresh);
177662306a36Sopenharmony_ci			m_intv = ULONG_MAX;
177762306a36Sopenharmony_ci
177862306a36Sopenharmony_ci			current->dirty_paused_when = now;
177962306a36Sopenharmony_ci			current->nr_dirtied = 0;
178062306a36Sopenharmony_ci			if (mdtc)
178162306a36Sopenharmony_ci				m_intv = dirty_poll_interval(m_dirty, m_thresh);
178262306a36Sopenharmony_ci			current->nr_dirtied_pause = min(intv, m_intv);
178362306a36Sopenharmony_ci			break;
178462306a36Sopenharmony_ci		}
178562306a36Sopenharmony_ci
178662306a36Sopenharmony_ci		/* Start writeback even when in laptop mode */
178762306a36Sopenharmony_ci		if (unlikely(!writeback_in_progress(wb)))
178862306a36Sopenharmony_ci			wb_start_background_writeback(wb);
178962306a36Sopenharmony_ci
179062306a36Sopenharmony_ci		mem_cgroup_flush_foreign(wb);
179162306a36Sopenharmony_ci
179262306a36Sopenharmony_ci		/*
179362306a36Sopenharmony_ci		 * Calculate global domain's pos_ratio and select the
179462306a36Sopenharmony_ci		 * global dtc by default.
179562306a36Sopenharmony_ci		 */
179662306a36Sopenharmony_ci		if (!strictlimit) {
179762306a36Sopenharmony_ci			wb_dirty_limits(gdtc);
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_ci			if ((current->flags & PF_LOCAL_THROTTLE) &&
180062306a36Sopenharmony_ci			    gdtc->wb_dirty <
180162306a36Sopenharmony_ci			    dirty_freerun_ceiling(gdtc->wb_thresh,
180262306a36Sopenharmony_ci						  gdtc->wb_bg_thresh))
180362306a36Sopenharmony_ci				/*
180462306a36Sopenharmony_ci				 * LOCAL_THROTTLE tasks must not be throttled
180562306a36Sopenharmony_ci				 * when below the per-wb freerun ceiling.
180662306a36Sopenharmony_ci				 */
180762306a36Sopenharmony_ci				goto free_running;
180862306a36Sopenharmony_ci		}
180962306a36Sopenharmony_ci
181062306a36Sopenharmony_ci		dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
181162306a36Sopenharmony_ci			((gdtc->dirty > gdtc->thresh) || strictlimit);
181262306a36Sopenharmony_ci
181362306a36Sopenharmony_ci		wb_position_ratio(gdtc);
181462306a36Sopenharmony_ci		sdtc = gdtc;
181562306a36Sopenharmony_ci
181662306a36Sopenharmony_ci		if (mdtc) {
181762306a36Sopenharmony_ci			/*
181862306a36Sopenharmony_ci			 * If memcg domain is in effect, calculate its
181962306a36Sopenharmony_ci			 * pos_ratio.  @wb should satisfy constraints from
182062306a36Sopenharmony_ci			 * both global and memcg domains.  Choose the one
182162306a36Sopenharmony_ci			 * w/ lower pos_ratio.
182262306a36Sopenharmony_ci			 */
182362306a36Sopenharmony_ci			if (!strictlimit) {
182462306a36Sopenharmony_ci				wb_dirty_limits(mdtc);
182562306a36Sopenharmony_ci
182662306a36Sopenharmony_ci				if ((current->flags & PF_LOCAL_THROTTLE) &&
182762306a36Sopenharmony_ci				    mdtc->wb_dirty <
182862306a36Sopenharmony_ci				    dirty_freerun_ceiling(mdtc->wb_thresh,
182962306a36Sopenharmony_ci							  mdtc->wb_bg_thresh))
183062306a36Sopenharmony_ci					/*
183162306a36Sopenharmony_ci					 * LOCAL_THROTTLE tasks must not be
183262306a36Sopenharmony_ci					 * throttled when below the per-wb
183362306a36Sopenharmony_ci					 * freerun ceiling.
183462306a36Sopenharmony_ci					 */
183562306a36Sopenharmony_ci					goto free_running;
183662306a36Sopenharmony_ci			}
183762306a36Sopenharmony_ci			dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
183862306a36Sopenharmony_ci				((mdtc->dirty > mdtc->thresh) || strictlimit);
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_ci			wb_position_ratio(mdtc);
184162306a36Sopenharmony_ci			if (mdtc->pos_ratio < gdtc->pos_ratio)
184262306a36Sopenharmony_ci				sdtc = mdtc;
184362306a36Sopenharmony_ci		}
184462306a36Sopenharmony_ci
184562306a36Sopenharmony_ci		if (dirty_exceeded != wb->dirty_exceeded)
184662306a36Sopenharmony_ci			wb->dirty_exceeded = dirty_exceeded;
184762306a36Sopenharmony_ci
184862306a36Sopenharmony_ci		if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
184962306a36Sopenharmony_ci					   BANDWIDTH_INTERVAL))
185062306a36Sopenharmony_ci			__wb_update_bandwidth(gdtc, mdtc, true);
185162306a36Sopenharmony_ci
185262306a36Sopenharmony_ci		/* throttle according to the chosen dtc */
185362306a36Sopenharmony_ci		dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit);
185462306a36Sopenharmony_ci		task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
185562306a36Sopenharmony_ci							RATELIMIT_CALC_SHIFT;
185662306a36Sopenharmony_ci		max_pause = wb_max_pause(wb, sdtc->wb_dirty);
185762306a36Sopenharmony_ci		min_pause = wb_min_pause(wb, max_pause,
185862306a36Sopenharmony_ci					 task_ratelimit, dirty_ratelimit,
185962306a36Sopenharmony_ci					 &nr_dirtied_pause);
186062306a36Sopenharmony_ci
186162306a36Sopenharmony_ci		if (unlikely(task_ratelimit == 0)) {
186262306a36Sopenharmony_ci			period = max_pause;
186362306a36Sopenharmony_ci			pause = max_pause;
186462306a36Sopenharmony_ci			goto pause;
186562306a36Sopenharmony_ci		}
186662306a36Sopenharmony_ci		period = HZ * pages_dirtied / task_ratelimit;
186762306a36Sopenharmony_ci		pause = period;
186862306a36Sopenharmony_ci		if (current->dirty_paused_when)
186962306a36Sopenharmony_ci			pause -= now - current->dirty_paused_when;
187062306a36Sopenharmony_ci		/*
187162306a36Sopenharmony_ci		 * For less than 1s think time (ext3/4 may block the dirtier
187262306a36Sopenharmony_ci		 * for up to 800ms from time to time on 1-HDD; so does xfs,
187362306a36Sopenharmony_ci		 * however at much less frequency), try to compensate it in
187462306a36Sopenharmony_ci		 * future periods by updating the virtual time; otherwise just
187562306a36Sopenharmony_ci		 * do a reset, as it may be a light dirtier.
187662306a36Sopenharmony_ci		 */
187762306a36Sopenharmony_ci		if (pause < min_pause) {
187862306a36Sopenharmony_ci			trace_balance_dirty_pages(wb,
187962306a36Sopenharmony_ci						  sdtc->thresh,
188062306a36Sopenharmony_ci						  sdtc->bg_thresh,
188162306a36Sopenharmony_ci						  sdtc->dirty,
188262306a36Sopenharmony_ci						  sdtc->wb_thresh,
188362306a36Sopenharmony_ci						  sdtc->wb_dirty,
188462306a36Sopenharmony_ci						  dirty_ratelimit,
188562306a36Sopenharmony_ci						  task_ratelimit,
188662306a36Sopenharmony_ci						  pages_dirtied,
188762306a36Sopenharmony_ci						  period,
188862306a36Sopenharmony_ci						  min(pause, 0L),
188962306a36Sopenharmony_ci						  start_time);
189062306a36Sopenharmony_ci			if (pause < -HZ) {
189162306a36Sopenharmony_ci				current->dirty_paused_when = now;
189262306a36Sopenharmony_ci				current->nr_dirtied = 0;
189362306a36Sopenharmony_ci			} else if (period) {
189462306a36Sopenharmony_ci				current->dirty_paused_when += period;
189562306a36Sopenharmony_ci				current->nr_dirtied = 0;
189662306a36Sopenharmony_ci			} else if (current->nr_dirtied_pause <= pages_dirtied)
189762306a36Sopenharmony_ci				current->nr_dirtied_pause += pages_dirtied;
189862306a36Sopenharmony_ci			break;
189962306a36Sopenharmony_ci		}
190062306a36Sopenharmony_ci		if (unlikely(pause > max_pause)) {
190162306a36Sopenharmony_ci			/* for occasional dropped task_ratelimit */
190262306a36Sopenharmony_ci			now += min(pause - max_pause, max_pause);
190362306a36Sopenharmony_ci			pause = max_pause;
190462306a36Sopenharmony_ci		}
190562306a36Sopenharmony_ci
190662306a36Sopenharmony_cipause:
190762306a36Sopenharmony_ci		trace_balance_dirty_pages(wb,
190862306a36Sopenharmony_ci					  sdtc->thresh,
190962306a36Sopenharmony_ci					  sdtc->bg_thresh,
191062306a36Sopenharmony_ci					  sdtc->dirty,
191162306a36Sopenharmony_ci					  sdtc->wb_thresh,
191262306a36Sopenharmony_ci					  sdtc->wb_dirty,
191362306a36Sopenharmony_ci					  dirty_ratelimit,
191462306a36Sopenharmony_ci					  task_ratelimit,
191562306a36Sopenharmony_ci					  pages_dirtied,
191662306a36Sopenharmony_ci					  period,
191762306a36Sopenharmony_ci					  pause,
191862306a36Sopenharmony_ci					  start_time);
191962306a36Sopenharmony_ci		if (flags & BDP_ASYNC) {
192062306a36Sopenharmony_ci			ret = -EAGAIN;
192162306a36Sopenharmony_ci			break;
192262306a36Sopenharmony_ci		}
192362306a36Sopenharmony_ci		__set_current_state(TASK_KILLABLE);
192462306a36Sopenharmony_ci		bdi->last_bdp_sleep = jiffies;
192562306a36Sopenharmony_ci		io_schedule_timeout(pause);
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci		current->dirty_paused_when = now + pause;
192862306a36Sopenharmony_ci		current->nr_dirtied = 0;
192962306a36Sopenharmony_ci		current->nr_dirtied_pause = nr_dirtied_pause;
193062306a36Sopenharmony_ci
193162306a36Sopenharmony_ci		/*
193262306a36Sopenharmony_ci		 * This is typically equal to (dirty < thresh) and can also
193362306a36Sopenharmony_ci		 * keep "1000+ dd on a slow USB stick" under control.
193462306a36Sopenharmony_ci		 */
193562306a36Sopenharmony_ci		if (task_ratelimit)
193662306a36Sopenharmony_ci			break;
193762306a36Sopenharmony_ci
193862306a36Sopenharmony_ci		/*
193962306a36Sopenharmony_ci		 * In the case of an unresponsive NFS server and the NFS dirty
194062306a36Sopenharmony_ci		 * pages exceeds dirty_thresh, give the other good wb's a pipe
194162306a36Sopenharmony_ci		 * to go through, so that tasks on them still remain responsive.
194262306a36Sopenharmony_ci		 *
194362306a36Sopenharmony_ci		 * In theory 1 page is enough to keep the consumer-producer
194462306a36Sopenharmony_ci		 * pipe going: the flusher cleans 1 page => the task dirties 1
194562306a36Sopenharmony_ci		 * more page. However wb_dirty has accounting errors.  So use
194662306a36Sopenharmony_ci		 * the larger and more IO friendly wb_stat_error.
194762306a36Sopenharmony_ci		 */
194862306a36Sopenharmony_ci		if (sdtc->wb_dirty <= wb_stat_error())
194962306a36Sopenharmony_ci			break;
195062306a36Sopenharmony_ci
195162306a36Sopenharmony_ci		if (fatal_signal_pending(current))
195262306a36Sopenharmony_ci			break;
195362306a36Sopenharmony_ci	}
195462306a36Sopenharmony_ci	return ret;
195562306a36Sopenharmony_ci}
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_cistatic DEFINE_PER_CPU(int, bdp_ratelimits);
195862306a36Sopenharmony_ci
195962306a36Sopenharmony_ci/*
196062306a36Sopenharmony_ci * Normal tasks are throttled by
196162306a36Sopenharmony_ci *	loop {
196262306a36Sopenharmony_ci *		dirty tsk->nr_dirtied_pause pages;
196362306a36Sopenharmony_ci *		take a snap in balance_dirty_pages();
196462306a36Sopenharmony_ci *	}
196562306a36Sopenharmony_ci * However there is a worst case. If every task exit immediately when dirtied
196662306a36Sopenharmony_ci * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
196762306a36Sopenharmony_ci * called to throttle the page dirties. The solution is to save the not yet
196862306a36Sopenharmony_ci * throttled page dirties in dirty_throttle_leaks on task exit and charge them
196962306a36Sopenharmony_ci * randomly into the running tasks. This works well for the above worst case,
197062306a36Sopenharmony_ci * as the new task will pick up and accumulate the old task's leaked dirty
197162306a36Sopenharmony_ci * count and eventually get throttled.
197262306a36Sopenharmony_ci */
197362306a36Sopenharmony_ciDEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
197462306a36Sopenharmony_ci
197562306a36Sopenharmony_ci/**
197662306a36Sopenharmony_ci * balance_dirty_pages_ratelimited_flags - Balance dirty memory state.
197762306a36Sopenharmony_ci * @mapping: address_space which was dirtied.
197862306a36Sopenharmony_ci * @flags: BDP flags.
197962306a36Sopenharmony_ci *
198062306a36Sopenharmony_ci * Processes which are dirtying memory should call in here once for each page
198162306a36Sopenharmony_ci * which was newly dirtied.  The function will periodically check the system's
198262306a36Sopenharmony_ci * dirty state and will initiate writeback if needed.
198362306a36Sopenharmony_ci *
198462306a36Sopenharmony_ci * See balance_dirty_pages_ratelimited() for details.
198562306a36Sopenharmony_ci *
198662306a36Sopenharmony_ci * Return: If @flags contains BDP_ASYNC, it may return -EAGAIN to
198762306a36Sopenharmony_ci * indicate that memory is out of balance and the caller must wait
198862306a36Sopenharmony_ci * for I/O to complete.  Otherwise, it will return 0 to indicate
198962306a36Sopenharmony_ci * that either memory was already in balance, or it was able to sleep
199062306a36Sopenharmony_ci * until the amount of dirty memory returned to balance.
199162306a36Sopenharmony_ci */
199262306a36Sopenharmony_ciint balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
199362306a36Sopenharmony_ci					unsigned int flags)
199462306a36Sopenharmony_ci{
199562306a36Sopenharmony_ci	struct inode *inode = mapping->host;
199662306a36Sopenharmony_ci	struct backing_dev_info *bdi = inode_to_bdi(inode);
199762306a36Sopenharmony_ci	struct bdi_writeback *wb = NULL;
199862306a36Sopenharmony_ci	int ratelimit;
199962306a36Sopenharmony_ci	int ret = 0;
200062306a36Sopenharmony_ci	int *p;
200162306a36Sopenharmony_ci
200262306a36Sopenharmony_ci	if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
200362306a36Sopenharmony_ci		return ret;
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_ci	if (inode_cgwb_enabled(inode))
200662306a36Sopenharmony_ci		wb = wb_get_create_current(bdi, GFP_KERNEL);
200762306a36Sopenharmony_ci	if (!wb)
200862306a36Sopenharmony_ci		wb = &bdi->wb;
200962306a36Sopenharmony_ci
201062306a36Sopenharmony_ci	ratelimit = current->nr_dirtied_pause;
201162306a36Sopenharmony_ci	if (wb->dirty_exceeded)
201262306a36Sopenharmony_ci		ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_ci	preempt_disable();
201562306a36Sopenharmony_ci	/*
201662306a36Sopenharmony_ci	 * This prevents one CPU to accumulate too many dirtied pages without
201762306a36Sopenharmony_ci	 * calling into balance_dirty_pages(), which can happen when there are
201862306a36Sopenharmony_ci	 * 1000+ tasks, all of them start dirtying pages at exactly the same
201962306a36Sopenharmony_ci	 * time, hence all honoured too large initial task->nr_dirtied_pause.
202062306a36Sopenharmony_ci	 */
202162306a36Sopenharmony_ci	p =  this_cpu_ptr(&bdp_ratelimits);
202262306a36Sopenharmony_ci	if (unlikely(current->nr_dirtied >= ratelimit))
202362306a36Sopenharmony_ci		*p = 0;
202462306a36Sopenharmony_ci	else if (unlikely(*p >= ratelimit_pages)) {
202562306a36Sopenharmony_ci		*p = 0;
202662306a36Sopenharmony_ci		ratelimit = 0;
202762306a36Sopenharmony_ci	}
202862306a36Sopenharmony_ci	/*
202962306a36Sopenharmony_ci	 * Pick up the dirtied pages by the exited tasks. This avoids lots of
203062306a36Sopenharmony_ci	 * short-lived tasks (eg. gcc invocations in a kernel build) escaping
203162306a36Sopenharmony_ci	 * the dirty throttling and livelock other long-run dirtiers.
203262306a36Sopenharmony_ci	 */
203362306a36Sopenharmony_ci	p = this_cpu_ptr(&dirty_throttle_leaks);
203462306a36Sopenharmony_ci	if (*p > 0 && current->nr_dirtied < ratelimit) {
203562306a36Sopenharmony_ci		unsigned long nr_pages_dirtied;
203662306a36Sopenharmony_ci		nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
203762306a36Sopenharmony_ci		*p -= nr_pages_dirtied;
203862306a36Sopenharmony_ci		current->nr_dirtied += nr_pages_dirtied;
203962306a36Sopenharmony_ci	}
204062306a36Sopenharmony_ci	preempt_enable();
204162306a36Sopenharmony_ci
204262306a36Sopenharmony_ci	if (unlikely(current->nr_dirtied >= ratelimit))
204362306a36Sopenharmony_ci		ret = balance_dirty_pages(wb, current->nr_dirtied, flags);
204462306a36Sopenharmony_ci
204562306a36Sopenharmony_ci	wb_put(wb);
204662306a36Sopenharmony_ci	return ret;
204762306a36Sopenharmony_ci}
204862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(balance_dirty_pages_ratelimited_flags);
204962306a36Sopenharmony_ci
205062306a36Sopenharmony_ci/**
205162306a36Sopenharmony_ci * balance_dirty_pages_ratelimited - balance dirty memory state.
205262306a36Sopenharmony_ci * @mapping: address_space which was dirtied.
205362306a36Sopenharmony_ci *
205462306a36Sopenharmony_ci * Processes which are dirtying memory should call in here once for each page
205562306a36Sopenharmony_ci * which was newly dirtied.  The function will periodically check the system's
205662306a36Sopenharmony_ci * dirty state and will initiate writeback if needed.
205762306a36Sopenharmony_ci *
205862306a36Sopenharmony_ci * Once we're over the dirty memory limit we decrease the ratelimiting
205962306a36Sopenharmony_ci * by a lot, to prevent individual processes from overshooting the limit
206062306a36Sopenharmony_ci * by (ratelimit_pages) each.
206162306a36Sopenharmony_ci */
206262306a36Sopenharmony_civoid balance_dirty_pages_ratelimited(struct address_space *mapping)
206362306a36Sopenharmony_ci{
206462306a36Sopenharmony_ci	balance_dirty_pages_ratelimited_flags(mapping, 0);
206562306a36Sopenharmony_ci}
206662306a36Sopenharmony_ciEXPORT_SYMBOL(balance_dirty_pages_ratelimited);
206762306a36Sopenharmony_ci
206862306a36Sopenharmony_ci/**
206962306a36Sopenharmony_ci * wb_over_bg_thresh - does @wb need to be written back?
207062306a36Sopenharmony_ci * @wb: bdi_writeback of interest
207162306a36Sopenharmony_ci *
207262306a36Sopenharmony_ci * Determines whether background writeback should keep writing @wb or it's
207362306a36Sopenharmony_ci * clean enough.
207462306a36Sopenharmony_ci *
207562306a36Sopenharmony_ci * Return: %true if writeback should continue.
207662306a36Sopenharmony_ci */
207762306a36Sopenharmony_cibool wb_over_bg_thresh(struct bdi_writeback *wb)
207862306a36Sopenharmony_ci{
207962306a36Sopenharmony_ci	struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
208062306a36Sopenharmony_ci	struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
208162306a36Sopenharmony_ci	struct dirty_throttle_control * const gdtc = &gdtc_stor;
208262306a36Sopenharmony_ci	struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
208362306a36Sopenharmony_ci						     &mdtc_stor : NULL;
208462306a36Sopenharmony_ci	unsigned long reclaimable;
208562306a36Sopenharmony_ci	unsigned long thresh;
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci	/*
208862306a36Sopenharmony_ci	 * Similar to balance_dirty_pages() but ignores pages being written
208962306a36Sopenharmony_ci	 * as we're trying to decide whether to put more under writeback.
209062306a36Sopenharmony_ci	 */
209162306a36Sopenharmony_ci	gdtc->avail = global_dirtyable_memory();
209262306a36Sopenharmony_ci	gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
209362306a36Sopenharmony_ci	domain_dirty_limits(gdtc);
209462306a36Sopenharmony_ci
209562306a36Sopenharmony_ci	if (gdtc->dirty > gdtc->bg_thresh)
209662306a36Sopenharmony_ci		return true;
209762306a36Sopenharmony_ci
209862306a36Sopenharmony_ci	thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh);
209962306a36Sopenharmony_ci	if (thresh < 2 * wb_stat_error())
210062306a36Sopenharmony_ci		reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
210162306a36Sopenharmony_ci	else
210262306a36Sopenharmony_ci		reclaimable = wb_stat(wb, WB_RECLAIMABLE);
210362306a36Sopenharmony_ci
210462306a36Sopenharmony_ci	if (reclaimable > thresh)
210562306a36Sopenharmony_ci		return true;
210662306a36Sopenharmony_ci
210762306a36Sopenharmony_ci	if (mdtc) {
210862306a36Sopenharmony_ci		unsigned long filepages, headroom, writeback;
210962306a36Sopenharmony_ci
211062306a36Sopenharmony_ci		mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
211162306a36Sopenharmony_ci				    &writeback);
211262306a36Sopenharmony_ci		mdtc_calc_avail(mdtc, filepages, headroom);
211362306a36Sopenharmony_ci		domain_dirty_limits(mdtc);	/* ditto, ignore writeback */
211462306a36Sopenharmony_ci
211562306a36Sopenharmony_ci		if (mdtc->dirty > mdtc->bg_thresh)
211662306a36Sopenharmony_ci			return true;
211762306a36Sopenharmony_ci
211862306a36Sopenharmony_ci		thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh);
211962306a36Sopenharmony_ci		if (thresh < 2 * wb_stat_error())
212062306a36Sopenharmony_ci			reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
212162306a36Sopenharmony_ci		else
212262306a36Sopenharmony_ci			reclaimable = wb_stat(wb, WB_RECLAIMABLE);
212362306a36Sopenharmony_ci
212462306a36Sopenharmony_ci		if (reclaimable > thresh)
212562306a36Sopenharmony_ci			return true;
212662306a36Sopenharmony_ci	}
212762306a36Sopenharmony_ci
212862306a36Sopenharmony_ci	return false;
212962306a36Sopenharmony_ci}
213062306a36Sopenharmony_ci
213162306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL
213262306a36Sopenharmony_ci/*
213362306a36Sopenharmony_ci * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
213462306a36Sopenharmony_ci */
213562306a36Sopenharmony_cistatic int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
213662306a36Sopenharmony_ci		void *buffer, size_t *length, loff_t *ppos)
213762306a36Sopenharmony_ci{
213862306a36Sopenharmony_ci	unsigned int old_interval = dirty_writeback_interval;
213962306a36Sopenharmony_ci	int ret;
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_ci	ret = proc_dointvec(table, write, buffer, length, ppos);
214262306a36Sopenharmony_ci
214362306a36Sopenharmony_ci	/*
214462306a36Sopenharmony_ci	 * Writing 0 to dirty_writeback_interval will disable periodic writeback
214562306a36Sopenharmony_ci	 * and a different non-zero value will wakeup the writeback threads.
214662306a36Sopenharmony_ci	 * wb_wakeup_delayed() would be more appropriate, but it's a pain to
214762306a36Sopenharmony_ci	 * iterate over all bdis and wbs.
214862306a36Sopenharmony_ci	 * The reason we do this is to make the change take effect immediately.
214962306a36Sopenharmony_ci	 */
215062306a36Sopenharmony_ci	if (!ret && write && dirty_writeback_interval &&
215162306a36Sopenharmony_ci		dirty_writeback_interval != old_interval)
215262306a36Sopenharmony_ci		wakeup_flusher_threads(WB_REASON_PERIODIC);
215362306a36Sopenharmony_ci
215462306a36Sopenharmony_ci	return ret;
215562306a36Sopenharmony_ci}
215662306a36Sopenharmony_ci#endif
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_civoid laptop_mode_timer_fn(struct timer_list *t)
215962306a36Sopenharmony_ci{
216062306a36Sopenharmony_ci	struct backing_dev_info *backing_dev_info =
216162306a36Sopenharmony_ci		from_timer(backing_dev_info, t, laptop_mode_wb_timer);
216262306a36Sopenharmony_ci
216362306a36Sopenharmony_ci	wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
216462306a36Sopenharmony_ci}
216562306a36Sopenharmony_ci
216662306a36Sopenharmony_ci/*
216762306a36Sopenharmony_ci * We've spun up the disk and we're in laptop mode: schedule writeback
216862306a36Sopenharmony_ci * of all dirty data a few seconds from now.  If the flush is already scheduled
216962306a36Sopenharmony_ci * then push it back - the user is still using the disk.
217062306a36Sopenharmony_ci */
217162306a36Sopenharmony_civoid laptop_io_completion(struct backing_dev_info *info)
217262306a36Sopenharmony_ci{
217362306a36Sopenharmony_ci	mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
217462306a36Sopenharmony_ci}
217562306a36Sopenharmony_ci
217662306a36Sopenharmony_ci/*
217762306a36Sopenharmony_ci * We're in laptop mode and we've just synced. The sync's writes will have
217862306a36Sopenharmony_ci * caused another writeback to be scheduled by laptop_io_completion.
217962306a36Sopenharmony_ci * Nothing needs to be written back anymore, so we unschedule the writeback.
218062306a36Sopenharmony_ci */
218162306a36Sopenharmony_civoid laptop_sync_completion(void)
218262306a36Sopenharmony_ci{
218362306a36Sopenharmony_ci	struct backing_dev_info *bdi;
218462306a36Sopenharmony_ci
218562306a36Sopenharmony_ci	rcu_read_lock();
218662306a36Sopenharmony_ci
218762306a36Sopenharmony_ci	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
218862306a36Sopenharmony_ci		del_timer(&bdi->laptop_mode_wb_timer);
218962306a36Sopenharmony_ci
219062306a36Sopenharmony_ci	rcu_read_unlock();
219162306a36Sopenharmony_ci}
219262306a36Sopenharmony_ci
219362306a36Sopenharmony_ci/*
219462306a36Sopenharmony_ci * If ratelimit_pages is too high then we can get into dirty-data overload
219562306a36Sopenharmony_ci * if a large number of processes all perform writes at the same time.
219662306a36Sopenharmony_ci *
219762306a36Sopenharmony_ci * Here we set ratelimit_pages to a level which ensures that when all CPUs are
219862306a36Sopenharmony_ci * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
219962306a36Sopenharmony_ci * thresholds.
220062306a36Sopenharmony_ci */
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_civoid writeback_set_ratelimit(void)
220362306a36Sopenharmony_ci{
220462306a36Sopenharmony_ci	struct wb_domain *dom = &global_wb_domain;
220562306a36Sopenharmony_ci	unsigned long background_thresh;
220662306a36Sopenharmony_ci	unsigned long dirty_thresh;
220762306a36Sopenharmony_ci
220862306a36Sopenharmony_ci	global_dirty_limits(&background_thresh, &dirty_thresh);
220962306a36Sopenharmony_ci	dom->dirty_limit = dirty_thresh;
221062306a36Sopenharmony_ci	ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
221162306a36Sopenharmony_ci	if (ratelimit_pages < 16)
221262306a36Sopenharmony_ci		ratelimit_pages = 16;
221362306a36Sopenharmony_ci}
221462306a36Sopenharmony_ci
221562306a36Sopenharmony_cistatic int page_writeback_cpu_online(unsigned int cpu)
221662306a36Sopenharmony_ci{
221762306a36Sopenharmony_ci	writeback_set_ratelimit();
221862306a36Sopenharmony_ci	return 0;
221962306a36Sopenharmony_ci}
222062306a36Sopenharmony_ci
222162306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL
222262306a36Sopenharmony_ci
222362306a36Sopenharmony_ci/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
222462306a36Sopenharmony_cistatic const unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_cistatic struct ctl_table vm_page_writeback_sysctls[] = {
222762306a36Sopenharmony_ci	{
222862306a36Sopenharmony_ci		.procname   = "dirty_background_ratio",
222962306a36Sopenharmony_ci		.data       = &dirty_background_ratio,
223062306a36Sopenharmony_ci		.maxlen     = sizeof(dirty_background_ratio),
223162306a36Sopenharmony_ci		.mode       = 0644,
223262306a36Sopenharmony_ci		.proc_handler   = dirty_background_ratio_handler,
223362306a36Sopenharmony_ci		.extra1     = SYSCTL_ZERO,
223462306a36Sopenharmony_ci		.extra2     = SYSCTL_ONE_HUNDRED,
223562306a36Sopenharmony_ci	},
223662306a36Sopenharmony_ci	{
223762306a36Sopenharmony_ci		.procname   = "dirty_background_bytes",
223862306a36Sopenharmony_ci		.data       = &dirty_background_bytes,
223962306a36Sopenharmony_ci		.maxlen     = sizeof(dirty_background_bytes),
224062306a36Sopenharmony_ci		.mode       = 0644,
224162306a36Sopenharmony_ci		.proc_handler   = dirty_background_bytes_handler,
224262306a36Sopenharmony_ci		.extra1     = SYSCTL_LONG_ONE,
224362306a36Sopenharmony_ci	},
224462306a36Sopenharmony_ci	{
224562306a36Sopenharmony_ci		.procname   = "dirty_ratio",
224662306a36Sopenharmony_ci		.data       = &vm_dirty_ratio,
224762306a36Sopenharmony_ci		.maxlen     = sizeof(vm_dirty_ratio),
224862306a36Sopenharmony_ci		.mode       = 0644,
224962306a36Sopenharmony_ci		.proc_handler   = dirty_ratio_handler,
225062306a36Sopenharmony_ci		.extra1     = SYSCTL_ZERO,
225162306a36Sopenharmony_ci		.extra2     = SYSCTL_ONE_HUNDRED,
225262306a36Sopenharmony_ci	},
225362306a36Sopenharmony_ci	{
225462306a36Sopenharmony_ci		.procname   = "dirty_bytes",
225562306a36Sopenharmony_ci		.data       = &vm_dirty_bytes,
225662306a36Sopenharmony_ci		.maxlen     = sizeof(vm_dirty_bytes),
225762306a36Sopenharmony_ci		.mode       = 0644,
225862306a36Sopenharmony_ci		.proc_handler   = dirty_bytes_handler,
225962306a36Sopenharmony_ci		.extra1     = (void *)&dirty_bytes_min,
226062306a36Sopenharmony_ci	},
226162306a36Sopenharmony_ci	{
226262306a36Sopenharmony_ci		.procname   = "dirty_writeback_centisecs",
226362306a36Sopenharmony_ci		.data       = &dirty_writeback_interval,
226462306a36Sopenharmony_ci		.maxlen     = sizeof(dirty_writeback_interval),
226562306a36Sopenharmony_ci		.mode       = 0644,
226662306a36Sopenharmony_ci		.proc_handler   = dirty_writeback_centisecs_handler,
226762306a36Sopenharmony_ci	},
226862306a36Sopenharmony_ci	{
226962306a36Sopenharmony_ci		.procname   = "dirty_expire_centisecs",
227062306a36Sopenharmony_ci		.data       = &dirty_expire_interval,
227162306a36Sopenharmony_ci		.maxlen     = sizeof(dirty_expire_interval),
227262306a36Sopenharmony_ci		.mode       = 0644,
227362306a36Sopenharmony_ci		.proc_handler   = proc_dointvec_minmax,
227462306a36Sopenharmony_ci		.extra1     = SYSCTL_ZERO,
227562306a36Sopenharmony_ci	},
227662306a36Sopenharmony_ci#ifdef CONFIG_HIGHMEM
227762306a36Sopenharmony_ci	{
227862306a36Sopenharmony_ci		.procname	= "highmem_is_dirtyable",
227962306a36Sopenharmony_ci		.data		= &vm_highmem_is_dirtyable,
228062306a36Sopenharmony_ci		.maxlen		= sizeof(vm_highmem_is_dirtyable),
228162306a36Sopenharmony_ci		.mode		= 0644,
228262306a36Sopenharmony_ci		.proc_handler	= proc_dointvec_minmax,
228362306a36Sopenharmony_ci		.extra1		= SYSCTL_ZERO,
228462306a36Sopenharmony_ci		.extra2		= SYSCTL_ONE,
228562306a36Sopenharmony_ci	},
228662306a36Sopenharmony_ci#endif
228762306a36Sopenharmony_ci	{
228862306a36Sopenharmony_ci		.procname	= "laptop_mode",
228962306a36Sopenharmony_ci		.data		= &laptop_mode,
229062306a36Sopenharmony_ci		.maxlen		= sizeof(laptop_mode),
229162306a36Sopenharmony_ci		.mode		= 0644,
229262306a36Sopenharmony_ci		.proc_handler	= proc_dointvec_jiffies,
229362306a36Sopenharmony_ci	},
229462306a36Sopenharmony_ci	{}
229562306a36Sopenharmony_ci};
229662306a36Sopenharmony_ci#endif
229762306a36Sopenharmony_ci
229862306a36Sopenharmony_ci/*
229962306a36Sopenharmony_ci * Called early on to tune the page writeback dirty limits.
230062306a36Sopenharmony_ci *
230162306a36Sopenharmony_ci * We used to scale dirty pages according to how total memory
230262306a36Sopenharmony_ci * related to pages that could be allocated for buffers.
230362306a36Sopenharmony_ci *
230462306a36Sopenharmony_ci * However, that was when we used "dirty_ratio" to scale with
230562306a36Sopenharmony_ci * all memory, and we don't do that any more. "dirty_ratio"
230662306a36Sopenharmony_ci * is now applied to total non-HIGHPAGE memory, and as such we can't
230762306a36Sopenharmony_ci * get into the old insane situation any more where we had
230862306a36Sopenharmony_ci * large amounts of dirty pages compared to a small amount of
230962306a36Sopenharmony_ci * non-HIGHMEM memory.
231062306a36Sopenharmony_ci *
231162306a36Sopenharmony_ci * But we might still want to scale the dirty_ratio by how
231262306a36Sopenharmony_ci * much memory the box has..
231362306a36Sopenharmony_ci */
231462306a36Sopenharmony_civoid __init page_writeback_init(void)
231562306a36Sopenharmony_ci{
231662306a36Sopenharmony_ci	BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
231762306a36Sopenharmony_ci
231862306a36Sopenharmony_ci	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
231962306a36Sopenharmony_ci			  page_writeback_cpu_online, NULL);
232062306a36Sopenharmony_ci	cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
232162306a36Sopenharmony_ci			  page_writeback_cpu_online);
232262306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL
232362306a36Sopenharmony_ci	register_sysctl_init("vm", vm_page_writeback_sysctls);
232462306a36Sopenharmony_ci#endif
232562306a36Sopenharmony_ci}
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci/**
232862306a36Sopenharmony_ci * tag_pages_for_writeback - tag pages to be written by write_cache_pages
232962306a36Sopenharmony_ci * @mapping: address space structure to write
233062306a36Sopenharmony_ci * @start: starting page index
233162306a36Sopenharmony_ci * @end: ending page index (inclusive)
233262306a36Sopenharmony_ci *
233362306a36Sopenharmony_ci * This function scans the page range from @start to @end (inclusive) and tags
233462306a36Sopenharmony_ci * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is
233562306a36Sopenharmony_ci * that write_cache_pages (or whoever calls this function) will then use
233662306a36Sopenharmony_ci * TOWRITE tag to identify pages eligible for writeback.  This mechanism is
233762306a36Sopenharmony_ci * used to avoid livelocking of writeback by a process steadily creating new
233862306a36Sopenharmony_ci * dirty pages in the file (thus it is important for this function to be quick
233962306a36Sopenharmony_ci * so that it can tag pages faster than a dirtying process can create them).
234062306a36Sopenharmony_ci */
234162306a36Sopenharmony_civoid tag_pages_for_writeback(struct address_space *mapping,
234262306a36Sopenharmony_ci			     pgoff_t start, pgoff_t end)
234362306a36Sopenharmony_ci{
234462306a36Sopenharmony_ci	XA_STATE(xas, &mapping->i_pages, start);
234562306a36Sopenharmony_ci	unsigned int tagged = 0;
234662306a36Sopenharmony_ci	void *page;
234762306a36Sopenharmony_ci
234862306a36Sopenharmony_ci	xas_lock_irq(&xas);
234962306a36Sopenharmony_ci	xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
235062306a36Sopenharmony_ci		xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
235162306a36Sopenharmony_ci		if (++tagged % XA_CHECK_SCHED)
235262306a36Sopenharmony_ci			continue;
235362306a36Sopenharmony_ci
235462306a36Sopenharmony_ci		xas_pause(&xas);
235562306a36Sopenharmony_ci		xas_unlock_irq(&xas);
235662306a36Sopenharmony_ci		cond_resched();
235762306a36Sopenharmony_ci		xas_lock_irq(&xas);
235862306a36Sopenharmony_ci	}
235962306a36Sopenharmony_ci	xas_unlock_irq(&xas);
236062306a36Sopenharmony_ci}
236162306a36Sopenharmony_ciEXPORT_SYMBOL(tag_pages_for_writeback);
236262306a36Sopenharmony_ci
236362306a36Sopenharmony_ci/**
236462306a36Sopenharmony_ci * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
236562306a36Sopenharmony_ci * @mapping: address space structure to write
236662306a36Sopenharmony_ci * @wbc: subtract the number of written pages from *@wbc->nr_to_write
236762306a36Sopenharmony_ci * @writepage: function called for each page
236862306a36Sopenharmony_ci * @data: data passed to writepage function
236962306a36Sopenharmony_ci *
237062306a36Sopenharmony_ci * If a page is already under I/O, write_cache_pages() skips it, even
237162306a36Sopenharmony_ci * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
237262306a36Sopenharmony_ci * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
237362306a36Sopenharmony_ci * and msync() need to guarantee that all the data which was dirty at the time
237462306a36Sopenharmony_ci * the call was made get new I/O started against them.  If wbc->sync_mode is
237562306a36Sopenharmony_ci * WB_SYNC_ALL then we were called for data integrity and we must wait for
237662306a36Sopenharmony_ci * existing IO to complete.
237762306a36Sopenharmony_ci *
237862306a36Sopenharmony_ci * To avoid livelocks (when other process dirties new pages), we first tag
237962306a36Sopenharmony_ci * pages which should be written back with TOWRITE tag and only then start
238062306a36Sopenharmony_ci * writing them. For data-integrity sync we have to be careful so that we do
238162306a36Sopenharmony_ci * not miss some pages (e.g., because some other process has cleared TOWRITE
238262306a36Sopenharmony_ci * tag we set). The rule we follow is that TOWRITE tag can be cleared only
238362306a36Sopenharmony_ci * by the process clearing the DIRTY tag (and submitting the page for IO).
238462306a36Sopenharmony_ci *
238562306a36Sopenharmony_ci * To avoid deadlocks between range_cyclic writeback and callers that hold
238662306a36Sopenharmony_ci * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
238762306a36Sopenharmony_ci * we do not loop back to the start of the file. Doing so causes a page
238862306a36Sopenharmony_ci * lock/page writeback access order inversion - we should only ever lock
238962306a36Sopenharmony_ci * multiple pages in ascending page->index order, and looping back to the start
239062306a36Sopenharmony_ci * of the file violates that rule and causes deadlocks.
239162306a36Sopenharmony_ci *
239262306a36Sopenharmony_ci * Return: %0 on success, negative error code otherwise
239362306a36Sopenharmony_ci */
239462306a36Sopenharmony_ciint write_cache_pages(struct address_space *mapping,
239562306a36Sopenharmony_ci		      struct writeback_control *wbc, writepage_t writepage,
239662306a36Sopenharmony_ci		      void *data)
239762306a36Sopenharmony_ci{
239862306a36Sopenharmony_ci	int ret = 0;
239962306a36Sopenharmony_ci	int done = 0;
240062306a36Sopenharmony_ci	int error;
240162306a36Sopenharmony_ci	struct folio_batch fbatch;
240262306a36Sopenharmony_ci	int nr_folios;
240362306a36Sopenharmony_ci	pgoff_t index;
240462306a36Sopenharmony_ci	pgoff_t end;		/* Inclusive */
240562306a36Sopenharmony_ci	pgoff_t done_index;
240662306a36Sopenharmony_ci	int range_whole = 0;
240762306a36Sopenharmony_ci	xa_mark_t tag;
240862306a36Sopenharmony_ci
240962306a36Sopenharmony_ci	folio_batch_init(&fbatch);
241062306a36Sopenharmony_ci	if (wbc->range_cyclic) {
241162306a36Sopenharmony_ci		index = mapping->writeback_index; /* prev offset */
241262306a36Sopenharmony_ci		end = -1;
241362306a36Sopenharmony_ci	} else {
241462306a36Sopenharmony_ci		index = wbc->range_start >> PAGE_SHIFT;
241562306a36Sopenharmony_ci		end = wbc->range_end >> PAGE_SHIFT;
241662306a36Sopenharmony_ci		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
241762306a36Sopenharmony_ci			range_whole = 1;
241862306a36Sopenharmony_ci	}
241962306a36Sopenharmony_ci	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
242062306a36Sopenharmony_ci		tag_pages_for_writeback(mapping, index, end);
242162306a36Sopenharmony_ci		tag = PAGECACHE_TAG_TOWRITE;
242262306a36Sopenharmony_ci	} else {
242362306a36Sopenharmony_ci		tag = PAGECACHE_TAG_DIRTY;
242462306a36Sopenharmony_ci	}
242562306a36Sopenharmony_ci	done_index = index;
242662306a36Sopenharmony_ci	while (!done && (index <= end)) {
242762306a36Sopenharmony_ci		int i;
242862306a36Sopenharmony_ci
242962306a36Sopenharmony_ci		nr_folios = filemap_get_folios_tag(mapping, &index, end,
243062306a36Sopenharmony_ci				tag, &fbatch);
243162306a36Sopenharmony_ci
243262306a36Sopenharmony_ci		if (nr_folios == 0)
243362306a36Sopenharmony_ci			break;
243462306a36Sopenharmony_ci
243562306a36Sopenharmony_ci		for (i = 0; i < nr_folios; i++) {
243662306a36Sopenharmony_ci			struct folio *folio = fbatch.folios[i];
243762306a36Sopenharmony_ci			unsigned long nr;
243862306a36Sopenharmony_ci
243962306a36Sopenharmony_ci			done_index = folio->index;
244062306a36Sopenharmony_ci
244162306a36Sopenharmony_ci			folio_lock(folio);
244262306a36Sopenharmony_ci
244362306a36Sopenharmony_ci			/*
244462306a36Sopenharmony_ci			 * Page truncated or invalidated. We can freely skip it
244562306a36Sopenharmony_ci			 * then, even for data integrity operations: the page
244662306a36Sopenharmony_ci			 * has disappeared concurrently, so there could be no
244762306a36Sopenharmony_ci			 * real expectation of this data integrity operation
244862306a36Sopenharmony_ci			 * even if there is now a new, dirty page at the same
244962306a36Sopenharmony_ci			 * pagecache address.
245062306a36Sopenharmony_ci			 */
245162306a36Sopenharmony_ci			if (unlikely(folio->mapping != mapping)) {
245262306a36Sopenharmony_cicontinue_unlock:
245362306a36Sopenharmony_ci				folio_unlock(folio);
245462306a36Sopenharmony_ci				continue;
245562306a36Sopenharmony_ci			}
245662306a36Sopenharmony_ci
245762306a36Sopenharmony_ci			if (!folio_test_dirty(folio)) {
245862306a36Sopenharmony_ci				/* someone wrote it for us */
245962306a36Sopenharmony_ci				goto continue_unlock;
246062306a36Sopenharmony_ci			}
246162306a36Sopenharmony_ci
246262306a36Sopenharmony_ci			if (folio_test_writeback(folio)) {
246362306a36Sopenharmony_ci				if (wbc->sync_mode != WB_SYNC_NONE)
246462306a36Sopenharmony_ci					folio_wait_writeback(folio);
246562306a36Sopenharmony_ci				else
246662306a36Sopenharmony_ci					goto continue_unlock;
246762306a36Sopenharmony_ci			}
246862306a36Sopenharmony_ci
246962306a36Sopenharmony_ci			BUG_ON(folio_test_writeback(folio));
247062306a36Sopenharmony_ci			if (!folio_clear_dirty_for_io(folio))
247162306a36Sopenharmony_ci				goto continue_unlock;
247262306a36Sopenharmony_ci
247362306a36Sopenharmony_ci			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
247462306a36Sopenharmony_ci			error = writepage(folio, wbc, data);
247562306a36Sopenharmony_ci			nr = folio_nr_pages(folio);
247662306a36Sopenharmony_ci			if (unlikely(error)) {
247762306a36Sopenharmony_ci				/*
247862306a36Sopenharmony_ci				 * Handle errors according to the type of
247962306a36Sopenharmony_ci				 * writeback. There's no need to continue for
248062306a36Sopenharmony_ci				 * background writeback. Just push done_index
248162306a36Sopenharmony_ci				 * past this page so media errors won't choke
248262306a36Sopenharmony_ci				 * writeout for the entire file. For integrity
248362306a36Sopenharmony_ci				 * writeback, we must process the entire dirty
248462306a36Sopenharmony_ci				 * set regardless of errors because the fs may
248562306a36Sopenharmony_ci				 * still have state to clear for each page. In
248662306a36Sopenharmony_ci				 * that case we continue processing and return
248762306a36Sopenharmony_ci				 * the first error.
248862306a36Sopenharmony_ci				 */
248962306a36Sopenharmony_ci				if (error == AOP_WRITEPAGE_ACTIVATE) {
249062306a36Sopenharmony_ci					folio_unlock(folio);
249162306a36Sopenharmony_ci					error = 0;
249262306a36Sopenharmony_ci				} else if (wbc->sync_mode != WB_SYNC_ALL) {
249362306a36Sopenharmony_ci					ret = error;
249462306a36Sopenharmony_ci					done_index = folio->index + nr;
249562306a36Sopenharmony_ci					done = 1;
249662306a36Sopenharmony_ci					break;
249762306a36Sopenharmony_ci				}
249862306a36Sopenharmony_ci				if (!ret)
249962306a36Sopenharmony_ci					ret = error;
250062306a36Sopenharmony_ci			}
250162306a36Sopenharmony_ci
250262306a36Sopenharmony_ci			/*
250362306a36Sopenharmony_ci			 * We stop writing back only if we are not doing
250462306a36Sopenharmony_ci			 * integrity sync. In case of integrity sync we have to
250562306a36Sopenharmony_ci			 * keep going until we have written all the pages
250662306a36Sopenharmony_ci			 * we tagged for writeback prior to entering this loop.
250762306a36Sopenharmony_ci			 */
250862306a36Sopenharmony_ci			wbc->nr_to_write -= nr;
250962306a36Sopenharmony_ci			if (wbc->nr_to_write <= 0 &&
251062306a36Sopenharmony_ci			    wbc->sync_mode == WB_SYNC_NONE) {
251162306a36Sopenharmony_ci				done = 1;
251262306a36Sopenharmony_ci				break;
251362306a36Sopenharmony_ci			}
251462306a36Sopenharmony_ci		}
251562306a36Sopenharmony_ci		folio_batch_release(&fbatch);
251662306a36Sopenharmony_ci		cond_resched();
251762306a36Sopenharmony_ci	}
251862306a36Sopenharmony_ci
251962306a36Sopenharmony_ci	/*
252062306a36Sopenharmony_ci	 * If we hit the last page and there is more work to be done: wrap
252162306a36Sopenharmony_ci	 * back the index back to the start of the file for the next
252262306a36Sopenharmony_ci	 * time we are called.
252362306a36Sopenharmony_ci	 */
252462306a36Sopenharmony_ci	if (wbc->range_cyclic && !done)
252562306a36Sopenharmony_ci		done_index = 0;
252662306a36Sopenharmony_ci	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
252762306a36Sopenharmony_ci		mapping->writeback_index = done_index;
252862306a36Sopenharmony_ci
252962306a36Sopenharmony_ci	return ret;
253062306a36Sopenharmony_ci}
253162306a36Sopenharmony_ciEXPORT_SYMBOL(write_cache_pages);
253262306a36Sopenharmony_ci
253362306a36Sopenharmony_cistatic int writepage_cb(struct folio *folio, struct writeback_control *wbc,
253462306a36Sopenharmony_ci		void *data)
253562306a36Sopenharmony_ci{
253662306a36Sopenharmony_ci	struct address_space *mapping = data;
253762306a36Sopenharmony_ci	int ret = mapping->a_ops->writepage(&folio->page, wbc);
253862306a36Sopenharmony_ci	mapping_set_error(mapping, ret);
253962306a36Sopenharmony_ci	return ret;
254062306a36Sopenharmony_ci}
254162306a36Sopenharmony_ci
254262306a36Sopenharmony_ciint do_writepages(struct address_space *mapping, struct writeback_control *wbc)
254362306a36Sopenharmony_ci{
254462306a36Sopenharmony_ci	int ret;
254562306a36Sopenharmony_ci	struct bdi_writeback *wb;
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_ci	if (wbc->nr_to_write <= 0)
254862306a36Sopenharmony_ci		return 0;
254962306a36Sopenharmony_ci	wb = inode_to_wb_wbc(mapping->host, wbc);
255062306a36Sopenharmony_ci	wb_bandwidth_estimate_start(wb);
255162306a36Sopenharmony_ci	while (1) {
255262306a36Sopenharmony_ci		if (mapping->a_ops->writepages) {
255362306a36Sopenharmony_ci			ret = mapping->a_ops->writepages(mapping, wbc);
255462306a36Sopenharmony_ci		} else if (mapping->a_ops->writepage) {
255562306a36Sopenharmony_ci			struct blk_plug plug;
255662306a36Sopenharmony_ci
255762306a36Sopenharmony_ci			blk_start_plug(&plug);
255862306a36Sopenharmony_ci			ret = write_cache_pages(mapping, wbc, writepage_cb,
255962306a36Sopenharmony_ci						mapping);
256062306a36Sopenharmony_ci			blk_finish_plug(&plug);
256162306a36Sopenharmony_ci		} else {
256262306a36Sopenharmony_ci			/* deal with chardevs and other special files */
256362306a36Sopenharmony_ci			ret = 0;
256462306a36Sopenharmony_ci		}
256562306a36Sopenharmony_ci		if (ret != -ENOMEM || wbc->sync_mode != WB_SYNC_ALL)
256662306a36Sopenharmony_ci			break;
256762306a36Sopenharmony_ci
256862306a36Sopenharmony_ci		/*
256962306a36Sopenharmony_ci		 * Lacking an allocation context or the locality or writeback
257062306a36Sopenharmony_ci		 * state of any of the inode's pages, throttle based on
257162306a36Sopenharmony_ci		 * writeback activity on the local node. It's as good a
257262306a36Sopenharmony_ci		 * guess as any.
257362306a36Sopenharmony_ci		 */
257462306a36Sopenharmony_ci		reclaim_throttle(NODE_DATA(numa_node_id()),
257562306a36Sopenharmony_ci			VMSCAN_THROTTLE_WRITEBACK);
257662306a36Sopenharmony_ci	}
257762306a36Sopenharmony_ci	/*
257862306a36Sopenharmony_ci	 * Usually few pages are written by now from those we've just submitted
257962306a36Sopenharmony_ci	 * but if there's constant writeback being submitted, this makes sure
258062306a36Sopenharmony_ci	 * writeback bandwidth is updated once in a while.
258162306a36Sopenharmony_ci	 */
258262306a36Sopenharmony_ci	if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
258362306a36Sopenharmony_ci				   BANDWIDTH_INTERVAL))
258462306a36Sopenharmony_ci		wb_update_bandwidth(wb);
258562306a36Sopenharmony_ci	return ret;
258662306a36Sopenharmony_ci}
258762306a36Sopenharmony_ci
258862306a36Sopenharmony_ci/*
258962306a36Sopenharmony_ci * For address_spaces which do not use buffers nor write back.
259062306a36Sopenharmony_ci */
259162306a36Sopenharmony_cibool noop_dirty_folio(struct address_space *mapping, struct folio *folio)
259262306a36Sopenharmony_ci{
259362306a36Sopenharmony_ci	if (!folio_test_dirty(folio))
259462306a36Sopenharmony_ci		return !folio_test_set_dirty(folio);
259562306a36Sopenharmony_ci	return false;
259662306a36Sopenharmony_ci}
259762306a36Sopenharmony_ciEXPORT_SYMBOL(noop_dirty_folio);
259862306a36Sopenharmony_ci
259962306a36Sopenharmony_ci/*
260062306a36Sopenharmony_ci * Helper function for set_page_dirty family.
260162306a36Sopenharmony_ci *
260262306a36Sopenharmony_ci * Caller must hold folio_memcg_lock().
260362306a36Sopenharmony_ci *
260462306a36Sopenharmony_ci * NOTE: This relies on being atomic wrt interrupts.
260562306a36Sopenharmony_ci */
260662306a36Sopenharmony_cistatic void folio_account_dirtied(struct folio *folio,
260762306a36Sopenharmony_ci		struct address_space *mapping)
260862306a36Sopenharmony_ci{
260962306a36Sopenharmony_ci	struct inode *inode = mapping->host;
261062306a36Sopenharmony_ci
261162306a36Sopenharmony_ci	trace_writeback_dirty_folio(folio, mapping);
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci	if (mapping_can_writeback(mapping)) {
261462306a36Sopenharmony_ci		struct bdi_writeback *wb;
261562306a36Sopenharmony_ci		long nr = folio_nr_pages(folio);
261662306a36Sopenharmony_ci
261762306a36Sopenharmony_ci		inode_attach_wb(inode, folio);
261862306a36Sopenharmony_ci		wb = inode_to_wb(inode);
261962306a36Sopenharmony_ci
262062306a36Sopenharmony_ci		__lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr);
262162306a36Sopenharmony_ci		__zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
262262306a36Sopenharmony_ci		__node_stat_mod_folio(folio, NR_DIRTIED, nr);
262362306a36Sopenharmony_ci		wb_stat_mod(wb, WB_RECLAIMABLE, nr);
262462306a36Sopenharmony_ci		wb_stat_mod(wb, WB_DIRTIED, nr);
262562306a36Sopenharmony_ci		task_io_account_write(nr * PAGE_SIZE);
262662306a36Sopenharmony_ci		current->nr_dirtied += nr;
262762306a36Sopenharmony_ci		__this_cpu_add(bdp_ratelimits, nr);
262862306a36Sopenharmony_ci
262962306a36Sopenharmony_ci		mem_cgroup_track_foreign_dirty(folio, wb);
263062306a36Sopenharmony_ci	}
263162306a36Sopenharmony_ci}
263262306a36Sopenharmony_ci
263362306a36Sopenharmony_ci/*
263462306a36Sopenharmony_ci * Helper function for deaccounting dirty page without writeback.
263562306a36Sopenharmony_ci *
263662306a36Sopenharmony_ci * Caller must hold folio_memcg_lock().
263762306a36Sopenharmony_ci */
263862306a36Sopenharmony_civoid folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
263962306a36Sopenharmony_ci{
264062306a36Sopenharmony_ci	long nr = folio_nr_pages(folio);
264162306a36Sopenharmony_ci
264262306a36Sopenharmony_ci	lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
264362306a36Sopenharmony_ci	zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
264462306a36Sopenharmony_ci	wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
264562306a36Sopenharmony_ci	task_io_account_cancelled_write(nr * PAGE_SIZE);
264662306a36Sopenharmony_ci}
264762306a36Sopenharmony_ci
264862306a36Sopenharmony_ci/*
264962306a36Sopenharmony_ci * Mark the folio dirty, and set it dirty in the page cache, and mark
265062306a36Sopenharmony_ci * the inode dirty.
265162306a36Sopenharmony_ci *
265262306a36Sopenharmony_ci * If warn is true, then emit a warning if the folio is not uptodate and has
265362306a36Sopenharmony_ci * not been truncated.
265462306a36Sopenharmony_ci *
265562306a36Sopenharmony_ci * The caller must hold folio_memcg_lock().  Most callers have the folio
265662306a36Sopenharmony_ci * locked.  A few have the folio blocked from truncation through other
265762306a36Sopenharmony_ci * means (eg zap_vma_pages() has it mapped and is holding the page table
265862306a36Sopenharmony_ci * lock).  This can also be called from mark_buffer_dirty(), which I
265962306a36Sopenharmony_ci * cannot prove is always protected against truncate.
266062306a36Sopenharmony_ci */
266162306a36Sopenharmony_civoid __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
266262306a36Sopenharmony_ci			     int warn)
266362306a36Sopenharmony_ci{
266462306a36Sopenharmony_ci	unsigned long flags;
266562306a36Sopenharmony_ci
266662306a36Sopenharmony_ci	xa_lock_irqsave(&mapping->i_pages, flags);
266762306a36Sopenharmony_ci	if (folio->mapping) {	/* Race with truncate? */
266862306a36Sopenharmony_ci		WARN_ON_ONCE(warn && !folio_test_uptodate(folio));
266962306a36Sopenharmony_ci		folio_account_dirtied(folio, mapping);
267062306a36Sopenharmony_ci		__xa_set_mark(&mapping->i_pages, folio_index(folio),
267162306a36Sopenharmony_ci				PAGECACHE_TAG_DIRTY);
267262306a36Sopenharmony_ci	}
267362306a36Sopenharmony_ci	xa_unlock_irqrestore(&mapping->i_pages, flags);
267462306a36Sopenharmony_ci}
267562306a36Sopenharmony_ci
267662306a36Sopenharmony_ci/**
267762306a36Sopenharmony_ci * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads.
267862306a36Sopenharmony_ci * @mapping: Address space this folio belongs to.
267962306a36Sopenharmony_ci * @folio: Folio to be marked as dirty.
268062306a36Sopenharmony_ci *
268162306a36Sopenharmony_ci * Filesystems which do not use buffer heads should call this function
268262306a36Sopenharmony_ci * from their set_page_dirty address space operation.  It ignores the
268362306a36Sopenharmony_ci * contents of folio_get_private(), so if the filesystem marks individual
268462306a36Sopenharmony_ci * blocks as dirty, the filesystem should handle that itself.
268562306a36Sopenharmony_ci *
268662306a36Sopenharmony_ci * This is also sometimes used by filesystems which use buffer_heads when
268762306a36Sopenharmony_ci * a single buffer is being dirtied: we want to set the folio dirty in
268862306a36Sopenharmony_ci * that case, but not all the buffers.  This is a "bottom-up" dirtying,
268962306a36Sopenharmony_ci * whereas block_dirty_folio() is a "top-down" dirtying.
269062306a36Sopenharmony_ci *
269162306a36Sopenharmony_ci * The caller must ensure this doesn't race with truncation.  Most will
269262306a36Sopenharmony_ci * simply hold the folio lock, but e.g. zap_pte_range() calls with the
269362306a36Sopenharmony_ci * folio mapped and the pte lock held, which also locks out truncation.
269462306a36Sopenharmony_ci */
269562306a36Sopenharmony_cibool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
269662306a36Sopenharmony_ci{
269762306a36Sopenharmony_ci	folio_memcg_lock(folio);
269862306a36Sopenharmony_ci	if (folio_test_set_dirty(folio)) {
269962306a36Sopenharmony_ci		folio_memcg_unlock(folio);
270062306a36Sopenharmony_ci		return false;
270162306a36Sopenharmony_ci	}
270262306a36Sopenharmony_ci
270362306a36Sopenharmony_ci	__folio_mark_dirty(folio, mapping, !folio_test_private(folio));
270462306a36Sopenharmony_ci	folio_memcg_unlock(folio);
270562306a36Sopenharmony_ci
270662306a36Sopenharmony_ci	if (mapping->host) {
270762306a36Sopenharmony_ci		/* !PageAnon && !swapper_space */
270862306a36Sopenharmony_ci		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
270962306a36Sopenharmony_ci	}
271062306a36Sopenharmony_ci	return true;
271162306a36Sopenharmony_ci}
271262306a36Sopenharmony_ciEXPORT_SYMBOL(filemap_dirty_folio);
271362306a36Sopenharmony_ci
271462306a36Sopenharmony_ci/**
271562306a36Sopenharmony_ci * folio_redirty_for_writepage - Decline to write a dirty folio.
271662306a36Sopenharmony_ci * @wbc: The writeback control.
271762306a36Sopenharmony_ci * @folio: The folio.
271862306a36Sopenharmony_ci *
271962306a36Sopenharmony_ci * When a writepage implementation decides that it doesn't want to write
272062306a36Sopenharmony_ci * @folio for some reason, it should call this function, unlock @folio and
272162306a36Sopenharmony_ci * return 0.
272262306a36Sopenharmony_ci *
272362306a36Sopenharmony_ci * Return: True if we redirtied the folio.  False if someone else dirtied
272462306a36Sopenharmony_ci * it first.
272562306a36Sopenharmony_ci */
272662306a36Sopenharmony_cibool folio_redirty_for_writepage(struct writeback_control *wbc,
272762306a36Sopenharmony_ci		struct folio *folio)
272862306a36Sopenharmony_ci{
272962306a36Sopenharmony_ci	struct address_space *mapping = folio->mapping;
273062306a36Sopenharmony_ci	long nr = folio_nr_pages(folio);
273162306a36Sopenharmony_ci	bool ret;
273262306a36Sopenharmony_ci
273362306a36Sopenharmony_ci	wbc->pages_skipped += nr;
273462306a36Sopenharmony_ci	ret = filemap_dirty_folio(mapping, folio);
273562306a36Sopenharmony_ci	if (mapping && mapping_can_writeback(mapping)) {
273662306a36Sopenharmony_ci		struct inode *inode = mapping->host;
273762306a36Sopenharmony_ci		struct bdi_writeback *wb;
273862306a36Sopenharmony_ci		struct wb_lock_cookie cookie = {};
273962306a36Sopenharmony_ci
274062306a36Sopenharmony_ci		wb = unlocked_inode_to_wb_begin(inode, &cookie);
274162306a36Sopenharmony_ci		current->nr_dirtied -= nr;
274262306a36Sopenharmony_ci		node_stat_mod_folio(folio, NR_DIRTIED, -nr);
274362306a36Sopenharmony_ci		wb_stat_mod(wb, WB_DIRTIED, -nr);
274462306a36Sopenharmony_ci		unlocked_inode_to_wb_end(inode, &cookie);
274562306a36Sopenharmony_ci	}
274662306a36Sopenharmony_ci	return ret;
274762306a36Sopenharmony_ci}
274862306a36Sopenharmony_ciEXPORT_SYMBOL(folio_redirty_for_writepage);
274962306a36Sopenharmony_ci
275062306a36Sopenharmony_ci/**
275162306a36Sopenharmony_ci * folio_mark_dirty - Mark a folio as being modified.
275262306a36Sopenharmony_ci * @folio: The folio.
275362306a36Sopenharmony_ci *
275462306a36Sopenharmony_ci * The folio may not be truncated while this function is running.
275562306a36Sopenharmony_ci * Holding the folio lock is sufficient to prevent truncation, but some
275662306a36Sopenharmony_ci * callers cannot acquire a sleeping lock.  These callers instead hold
275762306a36Sopenharmony_ci * the page table lock for a page table which contains at least one page
275862306a36Sopenharmony_ci * in this folio.  Truncation will block on the page table lock as it
275962306a36Sopenharmony_ci * unmaps pages before removing the folio from its mapping.
276062306a36Sopenharmony_ci *
276162306a36Sopenharmony_ci * Return: True if the folio was newly dirtied, false if it was already dirty.
276262306a36Sopenharmony_ci */
276362306a36Sopenharmony_cibool folio_mark_dirty(struct folio *folio)
276462306a36Sopenharmony_ci{
276562306a36Sopenharmony_ci	struct address_space *mapping = folio_mapping(folio);
276662306a36Sopenharmony_ci
276762306a36Sopenharmony_ci	if (likely(mapping)) {
276862306a36Sopenharmony_ci		/*
276962306a36Sopenharmony_ci		 * readahead/folio_deactivate could remain
277062306a36Sopenharmony_ci		 * PG_readahead/PG_reclaim due to race with folio_end_writeback
277162306a36Sopenharmony_ci		 * About readahead, if the folio is written, the flags would be
277262306a36Sopenharmony_ci		 * reset. So no problem.
277362306a36Sopenharmony_ci		 * About folio_deactivate, if the folio is redirtied,
277462306a36Sopenharmony_ci		 * the flag will be reset. So no problem. but if the
277562306a36Sopenharmony_ci		 * folio is used by readahead it will confuse readahead
277662306a36Sopenharmony_ci		 * and make it restart the size rampup process. But it's
277762306a36Sopenharmony_ci		 * a trivial problem.
277862306a36Sopenharmony_ci		 */
277962306a36Sopenharmony_ci		if (folio_test_reclaim(folio))
278062306a36Sopenharmony_ci			folio_clear_reclaim(folio);
278162306a36Sopenharmony_ci		return mapping->a_ops->dirty_folio(mapping, folio);
278262306a36Sopenharmony_ci	}
278362306a36Sopenharmony_ci
278462306a36Sopenharmony_ci	return noop_dirty_folio(mapping, folio);
278562306a36Sopenharmony_ci}
278662306a36Sopenharmony_ciEXPORT_SYMBOL(folio_mark_dirty);
278762306a36Sopenharmony_ci
278862306a36Sopenharmony_ci/*
278962306a36Sopenharmony_ci * set_page_dirty() is racy if the caller has no reference against
279062306a36Sopenharmony_ci * page->mapping->host, and if the page is unlocked.  This is because another
279162306a36Sopenharmony_ci * CPU could truncate the page off the mapping and then free the mapping.
279262306a36Sopenharmony_ci *
279362306a36Sopenharmony_ci * Usually, the page _is_ locked, or the caller is a user-space process which
279462306a36Sopenharmony_ci * holds a reference on the inode by having an open file.
279562306a36Sopenharmony_ci *
279662306a36Sopenharmony_ci * In other cases, the page should be locked before running set_page_dirty().
279762306a36Sopenharmony_ci */
279862306a36Sopenharmony_ciint set_page_dirty_lock(struct page *page)
279962306a36Sopenharmony_ci{
280062306a36Sopenharmony_ci	int ret;
280162306a36Sopenharmony_ci
280262306a36Sopenharmony_ci	lock_page(page);
280362306a36Sopenharmony_ci	ret = set_page_dirty(page);
280462306a36Sopenharmony_ci	unlock_page(page);
280562306a36Sopenharmony_ci	return ret;
280662306a36Sopenharmony_ci}
280762306a36Sopenharmony_ciEXPORT_SYMBOL(set_page_dirty_lock);
280862306a36Sopenharmony_ci
280962306a36Sopenharmony_ci/*
281062306a36Sopenharmony_ci * This cancels just the dirty bit on the kernel page itself, it does NOT
281162306a36Sopenharmony_ci * actually remove dirty bits on any mmap's that may be around. It also
281262306a36Sopenharmony_ci * leaves the page tagged dirty, so any sync activity will still find it on
281362306a36Sopenharmony_ci * the dirty lists, and in particular, clear_page_dirty_for_io() will still
281462306a36Sopenharmony_ci * look at the dirty bits in the VM.
281562306a36Sopenharmony_ci *
281662306a36Sopenharmony_ci * Doing this should *normally* only ever be done when a page is truncated,
281762306a36Sopenharmony_ci * and is not actually mapped anywhere at all. However, fs/buffer.c does
281862306a36Sopenharmony_ci * this when it notices that somebody has cleaned out all the buffers on a
281962306a36Sopenharmony_ci * page without actually doing it through the VM. Can you say "ext3 is
282062306a36Sopenharmony_ci * horribly ugly"? Thought you could.
282162306a36Sopenharmony_ci */
282262306a36Sopenharmony_civoid __folio_cancel_dirty(struct folio *folio)
282362306a36Sopenharmony_ci{
282462306a36Sopenharmony_ci	struct address_space *mapping = folio_mapping(folio);
282562306a36Sopenharmony_ci
282662306a36Sopenharmony_ci	if (mapping_can_writeback(mapping)) {
282762306a36Sopenharmony_ci		struct inode *inode = mapping->host;
282862306a36Sopenharmony_ci		struct bdi_writeback *wb;
282962306a36Sopenharmony_ci		struct wb_lock_cookie cookie = {};
283062306a36Sopenharmony_ci
283162306a36Sopenharmony_ci		folio_memcg_lock(folio);
283262306a36Sopenharmony_ci		wb = unlocked_inode_to_wb_begin(inode, &cookie);
283362306a36Sopenharmony_ci
283462306a36Sopenharmony_ci		if (folio_test_clear_dirty(folio))
283562306a36Sopenharmony_ci			folio_account_cleaned(folio, wb);
283662306a36Sopenharmony_ci
283762306a36Sopenharmony_ci		unlocked_inode_to_wb_end(inode, &cookie);
283862306a36Sopenharmony_ci		folio_memcg_unlock(folio);
283962306a36Sopenharmony_ci	} else {
284062306a36Sopenharmony_ci		folio_clear_dirty(folio);
284162306a36Sopenharmony_ci	}
284262306a36Sopenharmony_ci}
284362306a36Sopenharmony_ciEXPORT_SYMBOL(__folio_cancel_dirty);
284462306a36Sopenharmony_ci
284562306a36Sopenharmony_ci/*
284662306a36Sopenharmony_ci * Clear a folio's dirty flag, while caring for dirty memory accounting.
284762306a36Sopenharmony_ci * Returns true if the folio was previously dirty.
284862306a36Sopenharmony_ci *
284962306a36Sopenharmony_ci * This is for preparing to put the folio under writeout.  We leave
285062306a36Sopenharmony_ci * the folio tagged as dirty in the xarray so that a concurrent
285162306a36Sopenharmony_ci * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk.
285262306a36Sopenharmony_ci * The ->writepage implementation will run either folio_start_writeback()
285362306a36Sopenharmony_ci * or folio_mark_dirty(), at which stage we bring the folio's dirty flag
285462306a36Sopenharmony_ci * and xarray dirty tag back into sync.
285562306a36Sopenharmony_ci *
285662306a36Sopenharmony_ci * This incoherency between the folio's dirty flag and xarray tag is
285762306a36Sopenharmony_ci * unfortunate, but it only exists while the folio is locked.
285862306a36Sopenharmony_ci */
285962306a36Sopenharmony_cibool folio_clear_dirty_for_io(struct folio *folio)
286062306a36Sopenharmony_ci{
286162306a36Sopenharmony_ci	struct address_space *mapping = folio_mapping(folio);
286262306a36Sopenharmony_ci	bool ret = false;
286362306a36Sopenharmony_ci
286462306a36Sopenharmony_ci	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
286562306a36Sopenharmony_ci
286662306a36Sopenharmony_ci	if (mapping && mapping_can_writeback(mapping)) {
286762306a36Sopenharmony_ci		struct inode *inode = mapping->host;
286862306a36Sopenharmony_ci		struct bdi_writeback *wb;
286962306a36Sopenharmony_ci		struct wb_lock_cookie cookie = {};
287062306a36Sopenharmony_ci
287162306a36Sopenharmony_ci		/*
287262306a36Sopenharmony_ci		 * Yes, Virginia, this is indeed insane.
287362306a36Sopenharmony_ci		 *
287462306a36Sopenharmony_ci		 * We use this sequence to make sure that
287562306a36Sopenharmony_ci		 *  (a) we account for dirty stats properly
287662306a36Sopenharmony_ci		 *  (b) we tell the low-level filesystem to
287762306a36Sopenharmony_ci		 *      mark the whole folio dirty if it was
287862306a36Sopenharmony_ci		 *      dirty in a pagetable. Only to then
287962306a36Sopenharmony_ci		 *  (c) clean the folio again and return 1 to
288062306a36Sopenharmony_ci		 *      cause the writeback.
288162306a36Sopenharmony_ci		 *
288262306a36Sopenharmony_ci		 * This way we avoid all nasty races with the
288362306a36Sopenharmony_ci		 * dirty bit in multiple places and clearing
288462306a36Sopenharmony_ci		 * them concurrently from different threads.
288562306a36Sopenharmony_ci		 *
288662306a36Sopenharmony_ci		 * Note! Normally the "folio_mark_dirty(folio)"
288762306a36Sopenharmony_ci		 * has no effect on the actual dirty bit - since
288862306a36Sopenharmony_ci		 * that will already usually be set. But we
288962306a36Sopenharmony_ci		 * need the side effects, and it can help us
289062306a36Sopenharmony_ci		 * avoid races.
289162306a36Sopenharmony_ci		 *
289262306a36Sopenharmony_ci		 * We basically use the folio "master dirty bit"
289362306a36Sopenharmony_ci		 * as a serialization point for all the different
289462306a36Sopenharmony_ci		 * threads doing their things.
289562306a36Sopenharmony_ci		 */
289662306a36Sopenharmony_ci		if (folio_mkclean(folio))
289762306a36Sopenharmony_ci			folio_mark_dirty(folio);
289862306a36Sopenharmony_ci		/*
289962306a36Sopenharmony_ci		 * We carefully synchronise fault handlers against
290062306a36Sopenharmony_ci		 * installing a dirty pte and marking the folio dirty
290162306a36Sopenharmony_ci		 * at this point.  We do this by having them hold the
290262306a36Sopenharmony_ci		 * page lock while dirtying the folio, and folios are
290362306a36Sopenharmony_ci		 * always locked coming in here, so we get the desired
290462306a36Sopenharmony_ci		 * exclusion.
290562306a36Sopenharmony_ci		 */
290662306a36Sopenharmony_ci		wb = unlocked_inode_to_wb_begin(inode, &cookie);
290762306a36Sopenharmony_ci		if (folio_test_clear_dirty(folio)) {
290862306a36Sopenharmony_ci			long nr = folio_nr_pages(folio);
290962306a36Sopenharmony_ci			lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
291062306a36Sopenharmony_ci			zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
291162306a36Sopenharmony_ci			wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
291262306a36Sopenharmony_ci			ret = true;
291362306a36Sopenharmony_ci		}
291462306a36Sopenharmony_ci		unlocked_inode_to_wb_end(inode, &cookie);
291562306a36Sopenharmony_ci		return ret;
291662306a36Sopenharmony_ci	}
291762306a36Sopenharmony_ci	return folio_test_clear_dirty(folio);
291862306a36Sopenharmony_ci}
291962306a36Sopenharmony_ciEXPORT_SYMBOL(folio_clear_dirty_for_io);
292062306a36Sopenharmony_ci
292162306a36Sopenharmony_cistatic void wb_inode_writeback_start(struct bdi_writeback *wb)
292262306a36Sopenharmony_ci{
292362306a36Sopenharmony_ci	atomic_inc(&wb->writeback_inodes);
292462306a36Sopenharmony_ci}
292562306a36Sopenharmony_ci
292662306a36Sopenharmony_cistatic void wb_inode_writeback_end(struct bdi_writeback *wb)
292762306a36Sopenharmony_ci{
292862306a36Sopenharmony_ci	unsigned long flags;
292962306a36Sopenharmony_ci	atomic_dec(&wb->writeback_inodes);
293062306a36Sopenharmony_ci	/*
293162306a36Sopenharmony_ci	 * Make sure estimate of writeback throughput gets updated after
293262306a36Sopenharmony_ci	 * writeback completed. We delay the update by BANDWIDTH_INTERVAL
293362306a36Sopenharmony_ci	 * (which is the interval other bandwidth updates use for batching) so
293462306a36Sopenharmony_ci	 * that if multiple inodes end writeback at a similar time, they get
293562306a36Sopenharmony_ci	 * batched into one bandwidth update.
293662306a36Sopenharmony_ci	 */
293762306a36Sopenharmony_ci	spin_lock_irqsave(&wb->work_lock, flags);
293862306a36Sopenharmony_ci	if (test_bit(WB_registered, &wb->state))
293962306a36Sopenharmony_ci		queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
294062306a36Sopenharmony_ci	spin_unlock_irqrestore(&wb->work_lock, flags);
294162306a36Sopenharmony_ci}
294262306a36Sopenharmony_ci
294362306a36Sopenharmony_cibool __folio_end_writeback(struct folio *folio)
294462306a36Sopenharmony_ci{
294562306a36Sopenharmony_ci	long nr = folio_nr_pages(folio);
294662306a36Sopenharmony_ci	struct address_space *mapping = folio_mapping(folio);
294762306a36Sopenharmony_ci	bool ret;
294862306a36Sopenharmony_ci
294962306a36Sopenharmony_ci	folio_memcg_lock(folio);
295062306a36Sopenharmony_ci	if (mapping && mapping_use_writeback_tags(mapping)) {
295162306a36Sopenharmony_ci		struct inode *inode = mapping->host;
295262306a36Sopenharmony_ci		struct backing_dev_info *bdi = inode_to_bdi(inode);
295362306a36Sopenharmony_ci		unsigned long flags;
295462306a36Sopenharmony_ci
295562306a36Sopenharmony_ci		xa_lock_irqsave(&mapping->i_pages, flags);
295662306a36Sopenharmony_ci		ret = folio_test_clear_writeback(folio);
295762306a36Sopenharmony_ci		if (ret) {
295862306a36Sopenharmony_ci			__xa_clear_mark(&mapping->i_pages, folio_index(folio),
295962306a36Sopenharmony_ci						PAGECACHE_TAG_WRITEBACK);
296062306a36Sopenharmony_ci			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
296162306a36Sopenharmony_ci				struct bdi_writeback *wb = inode_to_wb(inode);
296262306a36Sopenharmony_ci
296362306a36Sopenharmony_ci				wb_stat_mod(wb, WB_WRITEBACK, -nr);
296462306a36Sopenharmony_ci				__wb_writeout_add(wb, nr);
296562306a36Sopenharmony_ci				if (!mapping_tagged(mapping,
296662306a36Sopenharmony_ci						    PAGECACHE_TAG_WRITEBACK))
296762306a36Sopenharmony_ci					wb_inode_writeback_end(wb);
296862306a36Sopenharmony_ci			}
296962306a36Sopenharmony_ci		}
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_ci		if (mapping->host && !mapping_tagged(mapping,
297262306a36Sopenharmony_ci						     PAGECACHE_TAG_WRITEBACK))
297362306a36Sopenharmony_ci			sb_clear_inode_writeback(mapping->host);
297462306a36Sopenharmony_ci
297562306a36Sopenharmony_ci		xa_unlock_irqrestore(&mapping->i_pages, flags);
297662306a36Sopenharmony_ci	} else {
297762306a36Sopenharmony_ci		ret = folio_test_clear_writeback(folio);
297862306a36Sopenharmony_ci	}
297962306a36Sopenharmony_ci	if (ret) {
298062306a36Sopenharmony_ci		lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
298162306a36Sopenharmony_ci		zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
298262306a36Sopenharmony_ci		node_stat_mod_folio(folio, NR_WRITTEN, nr);
298362306a36Sopenharmony_ci	}
298462306a36Sopenharmony_ci	folio_memcg_unlock(folio);
298562306a36Sopenharmony_ci	return ret;
298662306a36Sopenharmony_ci}
298762306a36Sopenharmony_ci
298862306a36Sopenharmony_cibool __folio_start_writeback(struct folio *folio, bool keep_write)
298962306a36Sopenharmony_ci{
299062306a36Sopenharmony_ci	long nr = folio_nr_pages(folio);
299162306a36Sopenharmony_ci	struct address_space *mapping = folio_mapping(folio);
299262306a36Sopenharmony_ci	bool ret;
299362306a36Sopenharmony_ci	int access_ret;
299462306a36Sopenharmony_ci
299562306a36Sopenharmony_ci	folio_memcg_lock(folio);
299662306a36Sopenharmony_ci	if (mapping && mapping_use_writeback_tags(mapping)) {
299762306a36Sopenharmony_ci		XA_STATE(xas, &mapping->i_pages, folio_index(folio));
299862306a36Sopenharmony_ci		struct inode *inode = mapping->host;
299962306a36Sopenharmony_ci		struct backing_dev_info *bdi = inode_to_bdi(inode);
300062306a36Sopenharmony_ci		unsigned long flags;
300162306a36Sopenharmony_ci
300262306a36Sopenharmony_ci		xas_lock_irqsave(&xas, flags);
300362306a36Sopenharmony_ci		xas_load(&xas);
300462306a36Sopenharmony_ci		ret = folio_test_set_writeback(folio);
300562306a36Sopenharmony_ci		if (!ret) {
300662306a36Sopenharmony_ci			bool on_wblist;
300762306a36Sopenharmony_ci
300862306a36Sopenharmony_ci			on_wblist = mapping_tagged(mapping,
300962306a36Sopenharmony_ci						   PAGECACHE_TAG_WRITEBACK);
301062306a36Sopenharmony_ci
301162306a36Sopenharmony_ci			xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
301262306a36Sopenharmony_ci			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
301362306a36Sopenharmony_ci				struct bdi_writeback *wb = inode_to_wb(inode);
301462306a36Sopenharmony_ci
301562306a36Sopenharmony_ci				wb_stat_mod(wb, WB_WRITEBACK, nr);
301662306a36Sopenharmony_ci				if (!on_wblist)
301762306a36Sopenharmony_ci					wb_inode_writeback_start(wb);
301862306a36Sopenharmony_ci			}
301962306a36Sopenharmony_ci
302062306a36Sopenharmony_ci			/*
302162306a36Sopenharmony_ci			 * We can come through here when swapping
302262306a36Sopenharmony_ci			 * anonymous folios, so we don't necessarily
302362306a36Sopenharmony_ci			 * have an inode to track for sync.
302462306a36Sopenharmony_ci			 */
302562306a36Sopenharmony_ci			if (mapping->host && !on_wblist)
302662306a36Sopenharmony_ci				sb_mark_inode_writeback(mapping->host);
302762306a36Sopenharmony_ci		}
302862306a36Sopenharmony_ci		if (!folio_test_dirty(folio))
302962306a36Sopenharmony_ci			xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
303062306a36Sopenharmony_ci		if (!keep_write)
303162306a36Sopenharmony_ci			xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
303262306a36Sopenharmony_ci		xas_unlock_irqrestore(&xas, flags);
303362306a36Sopenharmony_ci	} else {
303462306a36Sopenharmony_ci		ret = folio_test_set_writeback(folio);
303562306a36Sopenharmony_ci	}
303662306a36Sopenharmony_ci	if (!ret) {
303762306a36Sopenharmony_ci		lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr);
303862306a36Sopenharmony_ci		zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
303962306a36Sopenharmony_ci	}
304062306a36Sopenharmony_ci	folio_memcg_unlock(folio);
304162306a36Sopenharmony_ci	access_ret = arch_make_folio_accessible(folio);
304262306a36Sopenharmony_ci	/*
304362306a36Sopenharmony_ci	 * If writeback has been triggered on a page that cannot be made
304462306a36Sopenharmony_ci	 * accessible, it is too late to recover here.
304562306a36Sopenharmony_ci	 */
304662306a36Sopenharmony_ci	VM_BUG_ON_FOLIO(access_ret != 0, folio);
304762306a36Sopenharmony_ci
304862306a36Sopenharmony_ci	return ret;
304962306a36Sopenharmony_ci}
305062306a36Sopenharmony_ciEXPORT_SYMBOL(__folio_start_writeback);
305162306a36Sopenharmony_ci
305262306a36Sopenharmony_ci/**
305362306a36Sopenharmony_ci * folio_wait_writeback - Wait for a folio to finish writeback.
305462306a36Sopenharmony_ci * @folio: The folio to wait for.
305562306a36Sopenharmony_ci *
305662306a36Sopenharmony_ci * If the folio is currently being written back to storage, wait for the
305762306a36Sopenharmony_ci * I/O to complete.
305862306a36Sopenharmony_ci *
305962306a36Sopenharmony_ci * Context: Sleeps.  Must be called in process context and with
306062306a36Sopenharmony_ci * no spinlocks held.  Caller should hold a reference on the folio.
306162306a36Sopenharmony_ci * If the folio is not locked, writeback may start again after writeback
306262306a36Sopenharmony_ci * has finished.
306362306a36Sopenharmony_ci */
306462306a36Sopenharmony_civoid folio_wait_writeback(struct folio *folio)
306562306a36Sopenharmony_ci{
306662306a36Sopenharmony_ci	while (folio_test_writeback(folio)) {
306762306a36Sopenharmony_ci		trace_folio_wait_writeback(folio, folio_mapping(folio));
306862306a36Sopenharmony_ci		folio_wait_bit(folio, PG_writeback);
306962306a36Sopenharmony_ci	}
307062306a36Sopenharmony_ci}
307162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(folio_wait_writeback);
307262306a36Sopenharmony_ci
307362306a36Sopenharmony_ci/**
307462306a36Sopenharmony_ci * folio_wait_writeback_killable - Wait for a folio to finish writeback.
307562306a36Sopenharmony_ci * @folio: The folio to wait for.
307662306a36Sopenharmony_ci *
307762306a36Sopenharmony_ci * If the folio is currently being written back to storage, wait for the
307862306a36Sopenharmony_ci * I/O to complete or a fatal signal to arrive.
307962306a36Sopenharmony_ci *
308062306a36Sopenharmony_ci * Context: Sleeps.  Must be called in process context and with
308162306a36Sopenharmony_ci * no spinlocks held.  Caller should hold a reference on the folio.
308262306a36Sopenharmony_ci * If the folio is not locked, writeback may start again after writeback
308362306a36Sopenharmony_ci * has finished.
308462306a36Sopenharmony_ci * Return: 0 on success, -EINTR if we get a fatal signal while waiting.
308562306a36Sopenharmony_ci */
308662306a36Sopenharmony_ciint folio_wait_writeback_killable(struct folio *folio)
308762306a36Sopenharmony_ci{
308862306a36Sopenharmony_ci	while (folio_test_writeback(folio)) {
308962306a36Sopenharmony_ci		trace_folio_wait_writeback(folio, folio_mapping(folio));
309062306a36Sopenharmony_ci		if (folio_wait_bit_killable(folio, PG_writeback))
309162306a36Sopenharmony_ci			return -EINTR;
309262306a36Sopenharmony_ci	}
309362306a36Sopenharmony_ci
309462306a36Sopenharmony_ci	return 0;
309562306a36Sopenharmony_ci}
309662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
309762306a36Sopenharmony_ci
309862306a36Sopenharmony_ci/**
309962306a36Sopenharmony_ci * folio_wait_stable() - wait for writeback to finish, if necessary.
310062306a36Sopenharmony_ci * @folio: The folio to wait on.
310162306a36Sopenharmony_ci *
310262306a36Sopenharmony_ci * This function determines if the given folio is related to a backing
310362306a36Sopenharmony_ci * device that requires folio contents to be held stable during writeback.
310462306a36Sopenharmony_ci * If so, then it will wait for any pending writeback to complete.
310562306a36Sopenharmony_ci *
310662306a36Sopenharmony_ci * Context: Sleeps.  Must be called in process context and with
310762306a36Sopenharmony_ci * no spinlocks held.  Caller should hold a reference on the folio.
310862306a36Sopenharmony_ci * If the folio is not locked, writeback may start again after writeback
310962306a36Sopenharmony_ci * has finished.
311062306a36Sopenharmony_ci */
311162306a36Sopenharmony_civoid folio_wait_stable(struct folio *folio)
311262306a36Sopenharmony_ci{
311362306a36Sopenharmony_ci	if (mapping_stable_writes(folio_mapping(folio)))
311462306a36Sopenharmony_ci		folio_wait_writeback(folio);
311562306a36Sopenharmony_ci}
311662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(folio_wait_stable);
3117