18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * mm/page-writeback.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2002, Linus Torvalds.
68c2ecf20Sopenharmony_ci * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Contains functions related to writing back dirty pages at the
98c2ecf20Sopenharmony_ci * address_space level.
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * 10Apr2002	Andrew Morton
128c2ecf20Sopenharmony_ci *		Initial version
138c2ecf20Sopenharmony_ci */
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#include <linux/kernel.h>
168c2ecf20Sopenharmony_ci#include <linux/export.h>
178c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
188c2ecf20Sopenharmony_ci#include <linux/fs.h>
198c2ecf20Sopenharmony_ci#include <linux/mm.h>
208c2ecf20Sopenharmony_ci#include <linux/swap.h>
218c2ecf20Sopenharmony_ci#include <linux/slab.h>
228c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
238c2ecf20Sopenharmony_ci#include <linux/writeback.h>
248c2ecf20Sopenharmony_ci#include <linux/init.h>
258c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
268c2ecf20Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
278c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
288c2ecf20Sopenharmony_ci#include <linux/mpage.h>
298c2ecf20Sopenharmony_ci#include <linux/rmap.h>
308c2ecf20Sopenharmony_ci#include <linux/percpu.h>
318c2ecf20Sopenharmony_ci#include <linux/smp.h>
328c2ecf20Sopenharmony_ci#include <linux/sysctl.h>
338c2ecf20Sopenharmony_ci#include <linux/cpu.h>
348c2ecf20Sopenharmony_ci#include <linux/syscalls.h>
358c2ecf20Sopenharmony_ci#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
368c2ecf20Sopenharmony_ci#include <linux/pagevec.h>
378c2ecf20Sopenharmony_ci#include <linux/timer.h>
388c2ecf20Sopenharmony_ci#include <linux/sched/rt.h>
398c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
408c2ecf20Sopenharmony_ci#include <linux/mm_inline.h>
418c2ecf20Sopenharmony_ci#include <trace/events/writeback.h>
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#include "internal.h"
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/*
468c2ecf20Sopenharmony_ci * Sleep at most 200ms at a time in balance_dirty_pages().
478c2ecf20Sopenharmony_ci */
488c2ecf20Sopenharmony_ci#define MAX_PAUSE		max(HZ/5, 1)
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci/*
518c2ecf20Sopenharmony_ci * Try to keep balance_dirty_pages() call intervals higher than this many pages
528c2ecf20Sopenharmony_ci * by raising pause time to max_pause when falls below it.
538c2ecf20Sopenharmony_ci */
548c2ecf20Sopenharmony_ci#define DIRTY_POLL_THRESH	(128 >> (PAGE_SHIFT - 10))
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci/*
578c2ecf20Sopenharmony_ci * Estimate write bandwidth at 200ms intervals.
588c2ecf20Sopenharmony_ci */
598c2ecf20Sopenharmony_ci#define BANDWIDTH_INTERVAL	max(HZ/5, 1)
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci#define RATELIMIT_CALC_SHIFT	10
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci/*
648c2ecf20Sopenharmony_ci * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
658c2ecf20Sopenharmony_ci * will look to see if it needs to force writeback or throttling.
668c2ecf20Sopenharmony_ci */
678c2ecf20Sopenharmony_cistatic long ratelimit_pages = 32;
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci/* The following parameters are exported via /proc/sys/vm */
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci/*
728c2ecf20Sopenharmony_ci * Start background writeback (via writeback threads) at this percentage
738c2ecf20Sopenharmony_ci */
748c2ecf20Sopenharmony_ciint dirty_background_ratio = 10;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci/*
778c2ecf20Sopenharmony_ci * dirty_background_bytes starts at 0 (disabled) so that it is a function of
788c2ecf20Sopenharmony_ci * dirty_background_ratio * the amount of dirtyable memory
798c2ecf20Sopenharmony_ci */
808c2ecf20Sopenharmony_ciunsigned long dirty_background_bytes;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci/*
838c2ecf20Sopenharmony_ci * free highmem will not be subtracted from the total free memory
848c2ecf20Sopenharmony_ci * for calculating free ratios if vm_highmem_is_dirtyable is true
858c2ecf20Sopenharmony_ci */
868c2ecf20Sopenharmony_ciint vm_highmem_is_dirtyable;
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci/*
898c2ecf20Sopenharmony_ci * The generator of dirty data starts writeback at this percentage
908c2ecf20Sopenharmony_ci */
918c2ecf20Sopenharmony_ciint vm_dirty_ratio = 20;
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci/*
948c2ecf20Sopenharmony_ci * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
958c2ecf20Sopenharmony_ci * vm_dirty_ratio * the amount of dirtyable memory
968c2ecf20Sopenharmony_ci */
978c2ecf20Sopenharmony_ciunsigned long vm_dirty_bytes;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci/*
1008c2ecf20Sopenharmony_ci * The interval between `kupdate'-style writebacks
1018c2ecf20Sopenharmony_ci */
1028c2ecf20Sopenharmony_ciunsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dirty_writeback_interval);
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci/*
1078c2ecf20Sopenharmony_ci * The longest time for which data is allowed to remain dirty
1088c2ecf20Sopenharmony_ci */
1098c2ecf20Sopenharmony_ciunsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci/*
1128c2ecf20Sopenharmony_ci * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
1138c2ecf20Sopenharmony_ci * a full sync is triggered after this time elapses without any disk activity.
1148c2ecf20Sopenharmony_ci */
1158c2ecf20Sopenharmony_ciint laptop_mode;
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ciEXPORT_SYMBOL(laptop_mode);
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci/* End of sysctl-exported parameters */
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_cistruct wb_domain global_wb_domain;
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci/* consolidated parameters for balance_dirty_pages() and its subroutines */
1248c2ecf20Sopenharmony_cistruct dirty_throttle_control {
1258c2ecf20Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK
1268c2ecf20Sopenharmony_ci	struct wb_domain	*dom;
1278c2ecf20Sopenharmony_ci	struct dirty_throttle_control *gdtc;	/* only set in memcg dtc's */
1288c2ecf20Sopenharmony_ci#endif
1298c2ecf20Sopenharmony_ci	struct bdi_writeback	*wb;
1308c2ecf20Sopenharmony_ci	struct fprop_local_percpu *wb_completions;
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	unsigned long		avail;		/* dirtyable */
1338c2ecf20Sopenharmony_ci	unsigned long		dirty;		/* file_dirty + write + nfs */
1348c2ecf20Sopenharmony_ci	unsigned long		thresh;		/* dirty threshold */
1358c2ecf20Sopenharmony_ci	unsigned long		bg_thresh;	/* dirty background threshold */
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	unsigned long		wb_dirty;	/* per-wb counterparts */
1388c2ecf20Sopenharmony_ci	unsigned long		wb_thresh;
1398c2ecf20Sopenharmony_ci	unsigned long		wb_bg_thresh;
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	unsigned long		pos_ratio;
1428c2ecf20Sopenharmony_ci};
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci/*
1458c2ecf20Sopenharmony_ci * Length of period for aging writeout fractions of bdis. This is an
1468c2ecf20Sopenharmony_ci * arbitrarily chosen number. The longer the period, the slower fractions will
1478c2ecf20Sopenharmony_ci * reflect changes in current writeout rate.
1488c2ecf20Sopenharmony_ci */
1498c2ecf20Sopenharmony_ci#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci#define GDTC_INIT(__wb)		.wb = (__wb),				\
1548c2ecf20Sopenharmony_ci				.dom = &global_wb_domain,		\
1558c2ecf20Sopenharmony_ci				.wb_completions = &(__wb)->completions
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci#define GDTC_INIT_NO_WB		.dom = &global_wb_domain
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci#define MDTC_INIT(__wb, __gdtc)	.wb = (__wb),				\
1608c2ecf20Sopenharmony_ci				.dom = mem_cgroup_wb_domain(__wb),	\
1618c2ecf20Sopenharmony_ci				.wb_completions = &(__wb)->memcg_completions, \
1628c2ecf20Sopenharmony_ci				.gdtc = __gdtc
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_cistatic bool mdtc_valid(struct dirty_throttle_control *dtc)
1658c2ecf20Sopenharmony_ci{
1668c2ecf20Sopenharmony_ci	return dtc->dom;
1678c2ecf20Sopenharmony_ci}
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_cistatic struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
1708c2ecf20Sopenharmony_ci{
1718c2ecf20Sopenharmony_ci	return dtc->dom;
1728c2ecf20Sopenharmony_ci}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_cistatic struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
1758c2ecf20Sopenharmony_ci{
1768c2ecf20Sopenharmony_ci	return mdtc->gdtc;
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_cistatic struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
1808c2ecf20Sopenharmony_ci{
1818c2ecf20Sopenharmony_ci	return &wb->memcg_completions;
1828c2ecf20Sopenharmony_ci}
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_cistatic void wb_min_max_ratio(struct bdi_writeback *wb,
1858c2ecf20Sopenharmony_ci			     unsigned long *minp, unsigned long *maxp)
1868c2ecf20Sopenharmony_ci{
1878c2ecf20Sopenharmony_ci	unsigned long this_bw = wb->avg_write_bandwidth;
1888c2ecf20Sopenharmony_ci	unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
1898c2ecf20Sopenharmony_ci	unsigned long long min = wb->bdi->min_ratio;
1908c2ecf20Sopenharmony_ci	unsigned long long max = wb->bdi->max_ratio;
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	/*
1938c2ecf20Sopenharmony_ci	 * @wb may already be clean by the time control reaches here and
1948c2ecf20Sopenharmony_ci	 * the total may not include its bw.
1958c2ecf20Sopenharmony_ci	 */
1968c2ecf20Sopenharmony_ci	if (this_bw < tot_bw) {
1978c2ecf20Sopenharmony_ci		if (min) {
1988c2ecf20Sopenharmony_ci			min *= this_bw;
1998c2ecf20Sopenharmony_ci			min = div64_ul(min, tot_bw);
2008c2ecf20Sopenharmony_ci		}
2018c2ecf20Sopenharmony_ci		if (max < 100) {
2028c2ecf20Sopenharmony_ci			max *= this_bw;
2038c2ecf20Sopenharmony_ci			max = div64_ul(max, tot_bw);
2048c2ecf20Sopenharmony_ci		}
2058c2ecf20Sopenharmony_ci	}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	*minp = min;
2088c2ecf20Sopenharmony_ci	*maxp = max;
2098c2ecf20Sopenharmony_ci}
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci#else	/* CONFIG_CGROUP_WRITEBACK */
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci#define GDTC_INIT(__wb)		.wb = (__wb),                           \
2148c2ecf20Sopenharmony_ci				.wb_completions = &(__wb)->completions
2158c2ecf20Sopenharmony_ci#define GDTC_INIT_NO_WB
2168c2ecf20Sopenharmony_ci#define MDTC_INIT(__wb, __gdtc)
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_cistatic bool mdtc_valid(struct dirty_throttle_control *dtc)
2198c2ecf20Sopenharmony_ci{
2208c2ecf20Sopenharmony_ci	return false;
2218c2ecf20Sopenharmony_ci}
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_cistatic struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
2248c2ecf20Sopenharmony_ci{
2258c2ecf20Sopenharmony_ci	return &global_wb_domain;
2268c2ecf20Sopenharmony_ci}
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_cistatic struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
2298c2ecf20Sopenharmony_ci{
2308c2ecf20Sopenharmony_ci	return NULL;
2318c2ecf20Sopenharmony_ci}
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_cistatic struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
2348c2ecf20Sopenharmony_ci{
2358c2ecf20Sopenharmony_ci	return NULL;
2368c2ecf20Sopenharmony_ci}
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_cistatic void wb_min_max_ratio(struct bdi_writeback *wb,
2398c2ecf20Sopenharmony_ci			     unsigned long *minp, unsigned long *maxp)
2408c2ecf20Sopenharmony_ci{
2418c2ecf20Sopenharmony_ci	*minp = wb->bdi->min_ratio;
2428c2ecf20Sopenharmony_ci	*maxp = wb->bdi->max_ratio;
2438c2ecf20Sopenharmony_ci}
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci#endif	/* CONFIG_CGROUP_WRITEBACK */
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci/*
2488c2ecf20Sopenharmony_ci * In a memory zone, there is a certain amount of pages we consider
2498c2ecf20Sopenharmony_ci * available for the page cache, which is essentially the number of
2508c2ecf20Sopenharmony_ci * free and reclaimable pages, minus some zone reserves to protect
2518c2ecf20Sopenharmony_ci * lowmem and the ability to uphold the zone's watermarks without
2528c2ecf20Sopenharmony_ci * requiring writeback.
2538c2ecf20Sopenharmony_ci *
2548c2ecf20Sopenharmony_ci * This number of dirtyable pages is the base value of which the
2558c2ecf20Sopenharmony_ci * user-configurable dirty ratio is the effective number of pages that
2568c2ecf20Sopenharmony_ci * are allowed to be actually dirtied.  Per individual zone, or
2578c2ecf20Sopenharmony_ci * globally by using the sum of dirtyable pages over all zones.
2588c2ecf20Sopenharmony_ci *
2598c2ecf20Sopenharmony_ci * Because the user is allowed to specify the dirty limit globally as
2608c2ecf20Sopenharmony_ci * absolute number of bytes, calculating the per-zone dirty limit can
2618c2ecf20Sopenharmony_ci * require translating the configured limit into a percentage of
2628c2ecf20Sopenharmony_ci * global dirtyable memory first.
2638c2ecf20Sopenharmony_ci */
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci/**
2668c2ecf20Sopenharmony_ci * node_dirtyable_memory - number of dirtyable pages in a node
2678c2ecf20Sopenharmony_ci * @pgdat: the node
2688c2ecf20Sopenharmony_ci *
2698c2ecf20Sopenharmony_ci * Return: the node's number of pages potentially available for dirty
2708c2ecf20Sopenharmony_ci * page cache.  This is the base value for the per-node dirty limits.
2718c2ecf20Sopenharmony_ci */
2728c2ecf20Sopenharmony_cistatic unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
2738c2ecf20Sopenharmony_ci{
2748c2ecf20Sopenharmony_ci	unsigned long nr_pages = 0;
2758c2ecf20Sopenharmony_ci	int z;
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	for (z = 0; z < MAX_NR_ZONES; z++) {
2788c2ecf20Sopenharmony_ci		struct zone *zone = pgdat->node_zones + z;
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci		if (!populated_zone(zone))
2818c2ecf20Sopenharmony_ci			continue;
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci		nr_pages += zone_page_state(zone, NR_FREE_PAGES);
2848c2ecf20Sopenharmony_ci	}
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	/*
2878c2ecf20Sopenharmony_ci	 * Pages reserved for the kernel should not be considered
2888c2ecf20Sopenharmony_ci	 * dirtyable, to prevent a situation where reclaim has to
2898c2ecf20Sopenharmony_ci	 * clean pages in order to balance the zones.
2908c2ecf20Sopenharmony_ci	 */
2918c2ecf20Sopenharmony_ci	nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
2948c2ecf20Sopenharmony_ci	nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	return nr_pages;
2978c2ecf20Sopenharmony_ci}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_cistatic unsigned long highmem_dirtyable_memory(unsigned long total)
3008c2ecf20Sopenharmony_ci{
3018c2ecf20Sopenharmony_ci#ifdef CONFIG_HIGHMEM
3028c2ecf20Sopenharmony_ci	int node;
3038c2ecf20Sopenharmony_ci	unsigned long x = 0;
3048c2ecf20Sopenharmony_ci	int i;
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	for_each_node_state(node, N_HIGH_MEMORY) {
3078c2ecf20Sopenharmony_ci		for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
3088c2ecf20Sopenharmony_ci			struct zone *z;
3098c2ecf20Sopenharmony_ci			unsigned long nr_pages;
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci			if (!is_highmem_idx(i))
3128c2ecf20Sopenharmony_ci				continue;
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci			z = &NODE_DATA(node)->node_zones[i];
3158c2ecf20Sopenharmony_ci			if (!populated_zone(z))
3168c2ecf20Sopenharmony_ci				continue;
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci			nr_pages = zone_page_state(z, NR_FREE_PAGES);
3198c2ecf20Sopenharmony_ci			/* watch for underflows */
3208c2ecf20Sopenharmony_ci			nr_pages -= min(nr_pages, high_wmark_pages(z));
3218c2ecf20Sopenharmony_ci			nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
3228c2ecf20Sopenharmony_ci			nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
3238c2ecf20Sopenharmony_ci			x += nr_pages;
3248c2ecf20Sopenharmony_ci		}
3258c2ecf20Sopenharmony_ci	}
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	/*
3288c2ecf20Sopenharmony_ci	 * Unreclaimable memory (kernel memory or anonymous memory
3298c2ecf20Sopenharmony_ci	 * without swap) can bring down the dirtyable pages below
3308c2ecf20Sopenharmony_ci	 * the zone's dirty balance reserve and the above calculation
3318c2ecf20Sopenharmony_ci	 * will underflow.  However we still want to add in nodes
3328c2ecf20Sopenharmony_ci	 * which are below threshold (negative values) to get a more
3338c2ecf20Sopenharmony_ci	 * accurate calculation but make sure that the total never
3348c2ecf20Sopenharmony_ci	 * underflows.
3358c2ecf20Sopenharmony_ci	 */
3368c2ecf20Sopenharmony_ci	if ((long)x < 0)
3378c2ecf20Sopenharmony_ci		x = 0;
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	/*
3408c2ecf20Sopenharmony_ci	 * Make sure that the number of highmem pages is never larger
3418c2ecf20Sopenharmony_ci	 * than the number of the total dirtyable memory. This can only
3428c2ecf20Sopenharmony_ci	 * occur in very strange VM situations but we want to make sure
3438c2ecf20Sopenharmony_ci	 * that this does not occur.
3448c2ecf20Sopenharmony_ci	 */
3458c2ecf20Sopenharmony_ci	return min(x, total);
3468c2ecf20Sopenharmony_ci#else
3478c2ecf20Sopenharmony_ci	return 0;
3488c2ecf20Sopenharmony_ci#endif
3498c2ecf20Sopenharmony_ci}
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci/**
3528c2ecf20Sopenharmony_ci * global_dirtyable_memory - number of globally dirtyable pages
3538c2ecf20Sopenharmony_ci *
3548c2ecf20Sopenharmony_ci * Return: the global number of pages potentially available for dirty
3558c2ecf20Sopenharmony_ci * page cache.  This is the base value for the global dirty limits.
3568c2ecf20Sopenharmony_ci */
3578c2ecf20Sopenharmony_cistatic unsigned long global_dirtyable_memory(void)
3588c2ecf20Sopenharmony_ci{
3598c2ecf20Sopenharmony_ci	unsigned long x;
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	x = global_zone_page_state(NR_FREE_PAGES);
3628c2ecf20Sopenharmony_ci	/*
3638c2ecf20Sopenharmony_ci	 * Pages reserved for the kernel should not be considered
3648c2ecf20Sopenharmony_ci	 * dirtyable, to prevent a situation where reclaim has to
3658c2ecf20Sopenharmony_ci	 * clean pages in order to balance the zones.
3668c2ecf20Sopenharmony_ci	 */
3678c2ecf20Sopenharmony_ci	x -= min(x, totalreserve_pages);
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci	x += global_node_page_state(NR_INACTIVE_FILE);
3708c2ecf20Sopenharmony_ci	x += global_node_page_state(NR_ACTIVE_FILE);
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	if (!vm_highmem_is_dirtyable)
3738c2ecf20Sopenharmony_ci		x -= highmem_dirtyable_memory(x);
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	return x + 1;	/* Ensure that we never return 0 */
3768c2ecf20Sopenharmony_ci}
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci/**
3798c2ecf20Sopenharmony_ci * domain_dirty_limits - calculate thresh and bg_thresh for a wb_domain
3808c2ecf20Sopenharmony_ci * @dtc: dirty_throttle_control of interest
3818c2ecf20Sopenharmony_ci *
3828c2ecf20Sopenharmony_ci * Calculate @dtc->thresh and ->bg_thresh considering
3838c2ecf20Sopenharmony_ci * vm_dirty_{bytes|ratio} and dirty_background_{bytes|ratio}.  The caller
3848c2ecf20Sopenharmony_ci * must ensure that @dtc->avail is set before calling this function.  The
3858c2ecf20Sopenharmony_ci * dirty limits will be lifted by 1/4 for real-time tasks.
3868c2ecf20Sopenharmony_ci */
3878c2ecf20Sopenharmony_cistatic void domain_dirty_limits(struct dirty_throttle_control *dtc)
3888c2ecf20Sopenharmony_ci{
3898c2ecf20Sopenharmony_ci	const unsigned long available_memory = dtc->avail;
3908c2ecf20Sopenharmony_ci	struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
3918c2ecf20Sopenharmony_ci	unsigned long bytes = vm_dirty_bytes;
3928c2ecf20Sopenharmony_ci	unsigned long bg_bytes = dirty_background_bytes;
3938c2ecf20Sopenharmony_ci	/* convert ratios to per-PAGE_SIZE for higher precision */
3948c2ecf20Sopenharmony_ci	unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
3958c2ecf20Sopenharmony_ci	unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
3968c2ecf20Sopenharmony_ci	unsigned long thresh;
3978c2ecf20Sopenharmony_ci	unsigned long bg_thresh;
3988c2ecf20Sopenharmony_ci	struct task_struct *tsk;
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	/* gdtc is !NULL iff @dtc is for memcg domain */
4018c2ecf20Sopenharmony_ci	if (gdtc) {
4028c2ecf20Sopenharmony_ci		unsigned long global_avail = gdtc->avail;
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci		/*
4058c2ecf20Sopenharmony_ci		 * The byte settings can't be applied directly to memcg
4068c2ecf20Sopenharmony_ci		 * domains.  Convert them to ratios by scaling against
4078c2ecf20Sopenharmony_ci		 * globally available memory.  As the ratios are in
4088c2ecf20Sopenharmony_ci		 * per-PAGE_SIZE, they can be obtained by dividing bytes by
4098c2ecf20Sopenharmony_ci		 * number of pages.
4108c2ecf20Sopenharmony_ci		 */
4118c2ecf20Sopenharmony_ci		if (bytes)
4128c2ecf20Sopenharmony_ci			ratio = min(DIV_ROUND_UP(bytes, global_avail),
4138c2ecf20Sopenharmony_ci				    PAGE_SIZE);
4148c2ecf20Sopenharmony_ci		if (bg_bytes)
4158c2ecf20Sopenharmony_ci			bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
4168c2ecf20Sopenharmony_ci				       PAGE_SIZE);
4178c2ecf20Sopenharmony_ci		bytes = bg_bytes = 0;
4188c2ecf20Sopenharmony_ci	}
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci	if (bytes)
4218c2ecf20Sopenharmony_ci		thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
4228c2ecf20Sopenharmony_ci	else
4238c2ecf20Sopenharmony_ci		thresh = (ratio * available_memory) / PAGE_SIZE;
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	if (bg_bytes)
4268c2ecf20Sopenharmony_ci		bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
4278c2ecf20Sopenharmony_ci	else
4288c2ecf20Sopenharmony_ci		bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	tsk = current;
4318c2ecf20Sopenharmony_ci	if (rt_task(tsk)) {
4328c2ecf20Sopenharmony_ci		bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
4338c2ecf20Sopenharmony_ci		thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
4348c2ecf20Sopenharmony_ci	}
4358c2ecf20Sopenharmony_ci	/*
4368c2ecf20Sopenharmony_ci	 * Dirty throttling logic assumes the limits in page units fit into
4378c2ecf20Sopenharmony_ci	 * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
4388c2ecf20Sopenharmony_ci	 */
4398c2ecf20Sopenharmony_ci	if (thresh > UINT_MAX)
4408c2ecf20Sopenharmony_ci		thresh = UINT_MAX;
4418c2ecf20Sopenharmony_ci	/* This makes sure bg_thresh is within 32-bits as well */
4428c2ecf20Sopenharmony_ci	if (bg_thresh >= thresh)
4438c2ecf20Sopenharmony_ci		bg_thresh = thresh / 2;
4448c2ecf20Sopenharmony_ci	dtc->thresh = thresh;
4458c2ecf20Sopenharmony_ci	dtc->bg_thresh = bg_thresh;
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	/* we should eventually report the domain in the TP */
4488c2ecf20Sopenharmony_ci	if (!gdtc)
4498c2ecf20Sopenharmony_ci		trace_global_dirty_state(bg_thresh, thresh);
4508c2ecf20Sopenharmony_ci}
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci/**
4538c2ecf20Sopenharmony_ci * global_dirty_limits - background-writeback and dirty-throttling thresholds
4548c2ecf20Sopenharmony_ci * @pbackground: out parameter for bg_thresh
4558c2ecf20Sopenharmony_ci * @pdirty: out parameter for thresh
4568c2ecf20Sopenharmony_ci *
4578c2ecf20Sopenharmony_ci * Calculate bg_thresh and thresh for global_wb_domain.  See
4588c2ecf20Sopenharmony_ci * domain_dirty_limits() for details.
4598c2ecf20Sopenharmony_ci */
4608c2ecf20Sopenharmony_civoid global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
4618c2ecf20Sopenharmony_ci{
4628c2ecf20Sopenharmony_ci	struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci	gdtc.avail = global_dirtyable_memory();
4658c2ecf20Sopenharmony_ci	domain_dirty_limits(&gdtc);
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci	*pbackground = gdtc.bg_thresh;
4688c2ecf20Sopenharmony_ci	*pdirty = gdtc.thresh;
4698c2ecf20Sopenharmony_ci}
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci/**
4728c2ecf20Sopenharmony_ci * node_dirty_limit - maximum number of dirty pages allowed in a node
4738c2ecf20Sopenharmony_ci * @pgdat: the node
4748c2ecf20Sopenharmony_ci *
4758c2ecf20Sopenharmony_ci * Return: the maximum number of dirty pages allowed in a node, based
4768c2ecf20Sopenharmony_ci * on the node's dirtyable memory.
4778c2ecf20Sopenharmony_ci */
4788c2ecf20Sopenharmony_cistatic unsigned long node_dirty_limit(struct pglist_data *pgdat)
4798c2ecf20Sopenharmony_ci{
4808c2ecf20Sopenharmony_ci	unsigned long node_memory = node_dirtyable_memory(pgdat);
4818c2ecf20Sopenharmony_ci	struct task_struct *tsk = current;
4828c2ecf20Sopenharmony_ci	unsigned long dirty;
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	if (vm_dirty_bytes)
4858c2ecf20Sopenharmony_ci		dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
4868c2ecf20Sopenharmony_ci			node_memory / global_dirtyable_memory();
4878c2ecf20Sopenharmony_ci	else
4888c2ecf20Sopenharmony_ci		dirty = vm_dirty_ratio * node_memory / 100;
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	if (rt_task(tsk))
4918c2ecf20Sopenharmony_ci		dirty += dirty / 4;
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci	/*
4948c2ecf20Sopenharmony_ci	 * Dirty throttling logic assumes the limits in page units fit into
4958c2ecf20Sopenharmony_ci	 * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
4968c2ecf20Sopenharmony_ci	 */
4978c2ecf20Sopenharmony_ci	return min_t(unsigned long, dirty, UINT_MAX);
4988c2ecf20Sopenharmony_ci}
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci/**
5018c2ecf20Sopenharmony_ci * node_dirty_ok - tells whether a node is within its dirty limits
5028c2ecf20Sopenharmony_ci * @pgdat: the node to check
5038c2ecf20Sopenharmony_ci *
5048c2ecf20Sopenharmony_ci * Return: %true when the dirty pages in @pgdat are within the node's
5058c2ecf20Sopenharmony_ci * dirty limit, %false if the limit is exceeded.
5068c2ecf20Sopenharmony_ci */
5078c2ecf20Sopenharmony_cibool node_dirty_ok(struct pglist_data *pgdat)
5088c2ecf20Sopenharmony_ci{
5098c2ecf20Sopenharmony_ci	unsigned long limit = node_dirty_limit(pgdat);
5108c2ecf20Sopenharmony_ci	unsigned long nr_pages = 0;
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
5138c2ecf20Sopenharmony_ci	nr_pages += node_page_state(pgdat, NR_WRITEBACK);
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	return nr_pages <= limit;
5168c2ecf20Sopenharmony_ci}
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ciint dirty_background_ratio_handler(struct ctl_table *table, int write,
5198c2ecf20Sopenharmony_ci		void *buffer, size_t *lenp, loff_t *ppos)
5208c2ecf20Sopenharmony_ci{
5218c2ecf20Sopenharmony_ci	int ret;
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
5248c2ecf20Sopenharmony_ci	if (ret == 0 && write)
5258c2ecf20Sopenharmony_ci		dirty_background_bytes = 0;
5268c2ecf20Sopenharmony_ci	return ret;
5278c2ecf20Sopenharmony_ci}
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ciint dirty_background_bytes_handler(struct ctl_table *table, int write,
5308c2ecf20Sopenharmony_ci		void *buffer, size_t *lenp, loff_t *ppos)
5318c2ecf20Sopenharmony_ci{
5328c2ecf20Sopenharmony_ci	int ret;
5338c2ecf20Sopenharmony_ci	unsigned long old_bytes = dirty_background_bytes;
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
5368c2ecf20Sopenharmony_ci	if (ret == 0 && write) {
5378c2ecf20Sopenharmony_ci		if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) >
5388c2ecf20Sopenharmony_ci								UINT_MAX) {
5398c2ecf20Sopenharmony_ci			dirty_background_bytes = old_bytes;
5408c2ecf20Sopenharmony_ci			return -ERANGE;
5418c2ecf20Sopenharmony_ci		}
5428c2ecf20Sopenharmony_ci		dirty_background_ratio = 0;
5438c2ecf20Sopenharmony_ci	}
5448c2ecf20Sopenharmony_ci	return ret;
5458c2ecf20Sopenharmony_ci}
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ciint dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
5488c2ecf20Sopenharmony_ci		size_t *lenp, loff_t *ppos)
5498c2ecf20Sopenharmony_ci{
5508c2ecf20Sopenharmony_ci	int old_ratio = vm_dirty_ratio;
5518c2ecf20Sopenharmony_ci	int ret;
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
5548c2ecf20Sopenharmony_ci	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
5558c2ecf20Sopenharmony_ci		writeback_set_ratelimit();
5568c2ecf20Sopenharmony_ci		vm_dirty_bytes = 0;
5578c2ecf20Sopenharmony_ci	}
5588c2ecf20Sopenharmony_ci	return ret;
5598c2ecf20Sopenharmony_ci}
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ciint dirty_bytes_handler(struct ctl_table *table, int write,
5628c2ecf20Sopenharmony_ci		void *buffer, size_t *lenp, loff_t *ppos)
5638c2ecf20Sopenharmony_ci{
5648c2ecf20Sopenharmony_ci	unsigned long old_bytes = vm_dirty_bytes;
5658c2ecf20Sopenharmony_ci	int ret;
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
5688c2ecf20Sopenharmony_ci	if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
5698c2ecf20Sopenharmony_ci		if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) {
5708c2ecf20Sopenharmony_ci			vm_dirty_bytes = old_bytes;
5718c2ecf20Sopenharmony_ci			return -ERANGE;
5728c2ecf20Sopenharmony_ci		}
5738c2ecf20Sopenharmony_ci		writeback_set_ratelimit();
5748c2ecf20Sopenharmony_ci		vm_dirty_ratio = 0;
5758c2ecf20Sopenharmony_ci	}
5768c2ecf20Sopenharmony_ci	return ret;
5778c2ecf20Sopenharmony_ci}
5788c2ecf20Sopenharmony_ci
5798c2ecf20Sopenharmony_cistatic unsigned long wp_next_time(unsigned long cur_time)
5808c2ecf20Sopenharmony_ci{
5818c2ecf20Sopenharmony_ci	cur_time += VM_COMPLETIONS_PERIOD_LEN;
5828c2ecf20Sopenharmony_ci	/* 0 has a special meaning... */
5838c2ecf20Sopenharmony_ci	if (!cur_time)
5848c2ecf20Sopenharmony_ci		return 1;
5858c2ecf20Sopenharmony_ci	return cur_time;
5868c2ecf20Sopenharmony_ci}
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_cistatic void wb_domain_writeout_inc(struct wb_domain *dom,
5898c2ecf20Sopenharmony_ci				   struct fprop_local_percpu *completions,
5908c2ecf20Sopenharmony_ci				   unsigned int max_prop_frac)
5918c2ecf20Sopenharmony_ci{
5928c2ecf20Sopenharmony_ci	__fprop_inc_percpu_max(&dom->completions, completions,
5938c2ecf20Sopenharmony_ci			       max_prop_frac);
5948c2ecf20Sopenharmony_ci	/* First event after period switching was turned off? */
5958c2ecf20Sopenharmony_ci	if (unlikely(!dom->period_time)) {
5968c2ecf20Sopenharmony_ci		/*
5978c2ecf20Sopenharmony_ci		 * We can race with other __bdi_writeout_inc calls here but
5988c2ecf20Sopenharmony_ci		 * it does not cause any harm since the resulting time when
5998c2ecf20Sopenharmony_ci		 * timer will fire and what is in writeout_period_time will be
6008c2ecf20Sopenharmony_ci		 * roughly the same.
6018c2ecf20Sopenharmony_ci		 */
6028c2ecf20Sopenharmony_ci		dom->period_time = wp_next_time(jiffies);
6038c2ecf20Sopenharmony_ci		mod_timer(&dom->period_timer, dom->period_time);
6048c2ecf20Sopenharmony_ci	}
6058c2ecf20Sopenharmony_ci}
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci/*
6088c2ecf20Sopenharmony_ci * Increment @wb's writeout completion count and the global writeout
6098c2ecf20Sopenharmony_ci * completion count. Called from test_clear_page_writeback().
6108c2ecf20Sopenharmony_ci */
6118c2ecf20Sopenharmony_cistatic inline void __wb_writeout_inc(struct bdi_writeback *wb)
6128c2ecf20Sopenharmony_ci{
6138c2ecf20Sopenharmony_ci	struct wb_domain *cgdom;
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	inc_wb_stat(wb, WB_WRITTEN);
6168c2ecf20Sopenharmony_ci	wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
6178c2ecf20Sopenharmony_ci			       wb->bdi->max_prop_frac);
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci	cgdom = mem_cgroup_wb_domain(wb);
6208c2ecf20Sopenharmony_ci	if (cgdom)
6218c2ecf20Sopenharmony_ci		wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
6228c2ecf20Sopenharmony_ci				       wb->bdi->max_prop_frac);
6238c2ecf20Sopenharmony_ci}
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_civoid wb_writeout_inc(struct bdi_writeback *wb)
6268c2ecf20Sopenharmony_ci{
6278c2ecf20Sopenharmony_ci	unsigned long flags;
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	local_irq_save(flags);
6308c2ecf20Sopenharmony_ci	__wb_writeout_inc(wb);
6318c2ecf20Sopenharmony_ci	local_irq_restore(flags);
6328c2ecf20Sopenharmony_ci}
6338c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(wb_writeout_inc);
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci/*
6368c2ecf20Sopenharmony_ci * On idle system, we can be called long after we scheduled because we use
6378c2ecf20Sopenharmony_ci * deferred timers so count with missed periods.
6388c2ecf20Sopenharmony_ci */
6398c2ecf20Sopenharmony_cistatic void writeout_period(struct timer_list *t)
6408c2ecf20Sopenharmony_ci{
6418c2ecf20Sopenharmony_ci	struct wb_domain *dom = from_timer(dom, t, period_timer);
6428c2ecf20Sopenharmony_ci	int miss_periods = (jiffies - dom->period_time) /
6438c2ecf20Sopenharmony_ci						 VM_COMPLETIONS_PERIOD_LEN;
6448c2ecf20Sopenharmony_ci
6458c2ecf20Sopenharmony_ci	if (fprop_new_period(&dom->completions, miss_periods + 1)) {
6468c2ecf20Sopenharmony_ci		dom->period_time = wp_next_time(dom->period_time +
6478c2ecf20Sopenharmony_ci				miss_periods * VM_COMPLETIONS_PERIOD_LEN);
6488c2ecf20Sopenharmony_ci		mod_timer(&dom->period_timer, dom->period_time);
6498c2ecf20Sopenharmony_ci	} else {
6508c2ecf20Sopenharmony_ci		/*
6518c2ecf20Sopenharmony_ci		 * Aging has zeroed all fractions. Stop wasting CPU on period
6528c2ecf20Sopenharmony_ci		 * updates.
6538c2ecf20Sopenharmony_ci		 */
6548c2ecf20Sopenharmony_ci		dom->period_time = 0;
6558c2ecf20Sopenharmony_ci	}
6568c2ecf20Sopenharmony_ci}
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_ciint wb_domain_init(struct wb_domain *dom, gfp_t gfp)
6598c2ecf20Sopenharmony_ci{
6608c2ecf20Sopenharmony_ci	memset(dom, 0, sizeof(*dom));
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	spin_lock_init(&dom->lock);
6638c2ecf20Sopenharmony_ci
6648c2ecf20Sopenharmony_ci	timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	dom->dirty_limit_tstamp = jiffies;
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ci	return fprop_global_init(&dom->completions, gfp);
6698c2ecf20Sopenharmony_ci}
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK
6728c2ecf20Sopenharmony_civoid wb_domain_exit(struct wb_domain *dom)
6738c2ecf20Sopenharmony_ci{
6748c2ecf20Sopenharmony_ci	del_timer_sync(&dom->period_timer);
6758c2ecf20Sopenharmony_ci	fprop_global_destroy(&dom->completions);
6768c2ecf20Sopenharmony_ci}
6778c2ecf20Sopenharmony_ci#endif
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci/*
6808c2ecf20Sopenharmony_ci * bdi_min_ratio keeps the sum of the minimum dirty shares of all
6818c2ecf20Sopenharmony_ci * registered backing devices, which, for obvious reasons, can not
6828c2ecf20Sopenharmony_ci * exceed 100%.
6838c2ecf20Sopenharmony_ci */
6848c2ecf20Sopenharmony_cistatic unsigned int bdi_min_ratio;
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_ciint bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
6878c2ecf20Sopenharmony_ci{
6888c2ecf20Sopenharmony_ci	int ret = 0;
6898c2ecf20Sopenharmony_ci
6908c2ecf20Sopenharmony_ci	spin_lock_bh(&bdi_lock);
6918c2ecf20Sopenharmony_ci	if (min_ratio > bdi->max_ratio) {
6928c2ecf20Sopenharmony_ci		ret = -EINVAL;
6938c2ecf20Sopenharmony_ci	} else {
6948c2ecf20Sopenharmony_ci		min_ratio -= bdi->min_ratio;
6958c2ecf20Sopenharmony_ci		if (bdi_min_ratio + min_ratio < 100) {
6968c2ecf20Sopenharmony_ci			bdi_min_ratio += min_ratio;
6978c2ecf20Sopenharmony_ci			bdi->min_ratio += min_ratio;
6988c2ecf20Sopenharmony_ci		} else {
6998c2ecf20Sopenharmony_ci			ret = -EINVAL;
7008c2ecf20Sopenharmony_ci		}
7018c2ecf20Sopenharmony_ci	}
7028c2ecf20Sopenharmony_ci	spin_unlock_bh(&bdi_lock);
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_ci	return ret;
7058c2ecf20Sopenharmony_ci}
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ciint bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
7088c2ecf20Sopenharmony_ci{
7098c2ecf20Sopenharmony_ci	int ret = 0;
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_ci	if (max_ratio > 100)
7128c2ecf20Sopenharmony_ci		return -EINVAL;
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	spin_lock_bh(&bdi_lock);
7158c2ecf20Sopenharmony_ci	if (bdi->min_ratio > max_ratio) {
7168c2ecf20Sopenharmony_ci		ret = -EINVAL;
7178c2ecf20Sopenharmony_ci	} else {
7188c2ecf20Sopenharmony_ci		bdi->max_ratio = max_ratio;
7198c2ecf20Sopenharmony_ci		bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
7208c2ecf20Sopenharmony_ci	}
7218c2ecf20Sopenharmony_ci	spin_unlock_bh(&bdi_lock);
7228c2ecf20Sopenharmony_ci
7238c2ecf20Sopenharmony_ci	return ret;
7248c2ecf20Sopenharmony_ci}
7258c2ecf20Sopenharmony_ciEXPORT_SYMBOL(bdi_set_max_ratio);
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_cistatic unsigned long dirty_freerun_ceiling(unsigned long thresh,
7288c2ecf20Sopenharmony_ci					   unsigned long bg_thresh)
7298c2ecf20Sopenharmony_ci{
7308c2ecf20Sopenharmony_ci	return (thresh + bg_thresh) / 2;
7318c2ecf20Sopenharmony_ci}
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_cistatic unsigned long hard_dirty_limit(struct wb_domain *dom,
7348c2ecf20Sopenharmony_ci				      unsigned long thresh)
7358c2ecf20Sopenharmony_ci{
7368c2ecf20Sopenharmony_ci	return max(thresh, dom->dirty_limit);
7378c2ecf20Sopenharmony_ci}
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci/*
7408c2ecf20Sopenharmony_ci * Memory which can be further allocated to a memcg domain is capped by
7418c2ecf20Sopenharmony_ci * system-wide clean memory excluding the amount being used in the domain.
7428c2ecf20Sopenharmony_ci */
7438c2ecf20Sopenharmony_cistatic void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
7448c2ecf20Sopenharmony_ci			    unsigned long filepages, unsigned long headroom)
7458c2ecf20Sopenharmony_ci{
7468c2ecf20Sopenharmony_ci	struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
7478c2ecf20Sopenharmony_ci	unsigned long clean = filepages - min(filepages, mdtc->dirty);
7488c2ecf20Sopenharmony_ci	unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
7498c2ecf20Sopenharmony_ci	unsigned long other_clean = global_clean - min(global_clean, clean);
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci	mdtc->avail = filepages + min(headroom, other_clean);
7528c2ecf20Sopenharmony_ci}
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci/**
7558c2ecf20Sopenharmony_ci * __wb_calc_thresh - @wb's share of dirty throttling threshold
7568c2ecf20Sopenharmony_ci * @dtc: dirty_throttle_context of interest
7578c2ecf20Sopenharmony_ci *
7588c2ecf20Sopenharmony_ci * Note that balance_dirty_pages() will only seriously take it as a hard limit
7598c2ecf20Sopenharmony_ci * when sleeping max_pause per page is not enough to keep the dirty pages under
7608c2ecf20Sopenharmony_ci * control. For example, when the device is completely stalled due to some error
7618c2ecf20Sopenharmony_ci * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
7628c2ecf20Sopenharmony_ci * In the other normal situations, it acts more gently by throttling the tasks
7638c2ecf20Sopenharmony_ci * more (rather than completely block them) when the wb dirty pages go high.
7648c2ecf20Sopenharmony_ci *
7658c2ecf20Sopenharmony_ci * It allocates high/low dirty limits to fast/slow devices, in order to prevent
7668c2ecf20Sopenharmony_ci * - starving fast devices
7678c2ecf20Sopenharmony_ci * - piling up dirty pages (that will take long time to sync) on slow devices
7688c2ecf20Sopenharmony_ci *
7698c2ecf20Sopenharmony_ci * The wb's share of dirty limit will be adapting to its throughput and
7708c2ecf20Sopenharmony_ci * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
7718c2ecf20Sopenharmony_ci *
7728c2ecf20Sopenharmony_ci * Return: @wb's dirty limit in pages. The term "dirty" in the context of
7738c2ecf20Sopenharmony_ci * dirty balancing includes all PG_dirty and PG_writeback pages.
7748c2ecf20Sopenharmony_ci */
7758c2ecf20Sopenharmony_cistatic unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
7768c2ecf20Sopenharmony_ci{
7778c2ecf20Sopenharmony_ci	struct wb_domain *dom = dtc_dom(dtc);
7788c2ecf20Sopenharmony_ci	unsigned long thresh = dtc->thresh;
7798c2ecf20Sopenharmony_ci	u64 wb_thresh;
7808c2ecf20Sopenharmony_ci	unsigned long numerator, denominator;
7818c2ecf20Sopenharmony_ci	unsigned long wb_min_ratio, wb_max_ratio;
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_ci	/*
7848c2ecf20Sopenharmony_ci	 * Calculate this BDI's share of the thresh ratio.
7858c2ecf20Sopenharmony_ci	 */
7868c2ecf20Sopenharmony_ci	fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
7878c2ecf20Sopenharmony_ci			      &numerator, &denominator);
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
7908c2ecf20Sopenharmony_ci	wb_thresh *= numerator;
7918c2ecf20Sopenharmony_ci	wb_thresh = div64_ul(wb_thresh, denominator);
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci	wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci	wb_thresh += (thresh * wb_min_ratio) / 100;
7968c2ecf20Sopenharmony_ci	if (wb_thresh > (thresh * wb_max_ratio) / 100)
7978c2ecf20Sopenharmony_ci		wb_thresh = thresh * wb_max_ratio / 100;
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ci	return wb_thresh;
8008c2ecf20Sopenharmony_ci}
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ciunsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
8038c2ecf20Sopenharmony_ci{
8048c2ecf20Sopenharmony_ci	struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
8058c2ecf20Sopenharmony_ci					       .thresh = thresh };
8068c2ecf20Sopenharmony_ci	return __wb_calc_thresh(&gdtc);
8078c2ecf20Sopenharmony_ci}
8088c2ecf20Sopenharmony_ci
8098c2ecf20Sopenharmony_ci/*
8108c2ecf20Sopenharmony_ci *                           setpoint - dirty 3
8118c2ecf20Sopenharmony_ci *        f(dirty) := 1.0 + (----------------)
8128c2ecf20Sopenharmony_ci *                           limit - setpoint
8138c2ecf20Sopenharmony_ci *
8148c2ecf20Sopenharmony_ci * it's a 3rd order polynomial that subjects to
8158c2ecf20Sopenharmony_ci *
8168c2ecf20Sopenharmony_ci * (1) f(freerun)  = 2.0 => rampup dirty_ratelimit reasonably fast
8178c2ecf20Sopenharmony_ci * (2) f(setpoint) = 1.0 => the balance point
8188c2ecf20Sopenharmony_ci * (3) f(limit)    = 0   => the hard limit
8198c2ecf20Sopenharmony_ci * (4) df/dx      <= 0	 => negative feedback control
8208c2ecf20Sopenharmony_ci * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
8218c2ecf20Sopenharmony_ci *     => fast response on large errors; small oscillation near setpoint
8228c2ecf20Sopenharmony_ci */
8238c2ecf20Sopenharmony_cistatic long long pos_ratio_polynom(unsigned long setpoint,
8248c2ecf20Sopenharmony_ci					  unsigned long dirty,
8258c2ecf20Sopenharmony_ci					  unsigned long limit)
8268c2ecf20Sopenharmony_ci{
8278c2ecf20Sopenharmony_ci	long long pos_ratio;
8288c2ecf20Sopenharmony_ci	long x;
8298c2ecf20Sopenharmony_ci
8308c2ecf20Sopenharmony_ci	x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
8318c2ecf20Sopenharmony_ci		      (limit - setpoint) | 1);
8328c2ecf20Sopenharmony_ci	pos_ratio = x;
8338c2ecf20Sopenharmony_ci	pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
8348c2ecf20Sopenharmony_ci	pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
8358c2ecf20Sopenharmony_ci	pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_ci	return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
8388c2ecf20Sopenharmony_ci}
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_ci/*
8418c2ecf20Sopenharmony_ci * Dirty position control.
8428c2ecf20Sopenharmony_ci *
8438c2ecf20Sopenharmony_ci * (o) global/bdi setpoints
8448c2ecf20Sopenharmony_ci *
8458c2ecf20Sopenharmony_ci * We want the dirty pages be balanced around the global/wb setpoints.
8468c2ecf20Sopenharmony_ci * When the number of dirty pages is higher/lower than the setpoint, the
8478c2ecf20Sopenharmony_ci * dirty position control ratio (and hence task dirty ratelimit) will be
8488c2ecf20Sopenharmony_ci * decreased/increased to bring the dirty pages back to the setpoint.
8498c2ecf20Sopenharmony_ci *
8508c2ecf20Sopenharmony_ci *     pos_ratio = 1 << RATELIMIT_CALC_SHIFT
8518c2ecf20Sopenharmony_ci *
8528c2ecf20Sopenharmony_ci *     if (dirty < setpoint) scale up   pos_ratio
8538c2ecf20Sopenharmony_ci *     if (dirty > setpoint) scale down pos_ratio
8548c2ecf20Sopenharmony_ci *
8558c2ecf20Sopenharmony_ci *     if (wb_dirty < wb_setpoint) scale up   pos_ratio
8568c2ecf20Sopenharmony_ci *     if (wb_dirty > wb_setpoint) scale down pos_ratio
8578c2ecf20Sopenharmony_ci *
8588c2ecf20Sopenharmony_ci *     task_ratelimit = dirty_ratelimit * pos_ratio >> RATELIMIT_CALC_SHIFT
8598c2ecf20Sopenharmony_ci *
8608c2ecf20Sopenharmony_ci * (o) global control line
8618c2ecf20Sopenharmony_ci *
8628c2ecf20Sopenharmony_ci *     ^ pos_ratio
8638c2ecf20Sopenharmony_ci *     |
8648c2ecf20Sopenharmony_ci *     |            |<===== global dirty control scope ======>|
8658c2ecf20Sopenharmony_ci * 2.0 .............*
8668c2ecf20Sopenharmony_ci *     |            .*
8678c2ecf20Sopenharmony_ci *     |            . *
8688c2ecf20Sopenharmony_ci *     |            .   *
8698c2ecf20Sopenharmony_ci *     |            .     *
8708c2ecf20Sopenharmony_ci *     |            .        *
8718c2ecf20Sopenharmony_ci *     |            .            *
8728c2ecf20Sopenharmony_ci * 1.0 ................................*
8738c2ecf20Sopenharmony_ci *     |            .                  .     *
8748c2ecf20Sopenharmony_ci *     |            .                  .          *
8758c2ecf20Sopenharmony_ci *     |            .                  .              *
8768c2ecf20Sopenharmony_ci *     |            .                  .                 *
8778c2ecf20Sopenharmony_ci *     |            .                  .                    *
8788c2ecf20Sopenharmony_ci *   0 +------------.------------------.----------------------*------------->
8798c2ecf20Sopenharmony_ci *           freerun^          setpoint^                 limit^   dirty pages
8808c2ecf20Sopenharmony_ci *
8818c2ecf20Sopenharmony_ci * (o) wb control line
8828c2ecf20Sopenharmony_ci *
8838c2ecf20Sopenharmony_ci *     ^ pos_ratio
8848c2ecf20Sopenharmony_ci *     |
8858c2ecf20Sopenharmony_ci *     |            *
8868c2ecf20Sopenharmony_ci *     |              *
8878c2ecf20Sopenharmony_ci *     |                *
8888c2ecf20Sopenharmony_ci *     |                  *
8898c2ecf20Sopenharmony_ci *     |                    * |<=========== span ============>|
8908c2ecf20Sopenharmony_ci * 1.0 .......................*
8918c2ecf20Sopenharmony_ci *     |                      . *
8928c2ecf20Sopenharmony_ci *     |                      .   *
8938c2ecf20Sopenharmony_ci *     |                      .     *
8948c2ecf20Sopenharmony_ci *     |                      .       *
8958c2ecf20Sopenharmony_ci *     |                      .         *
8968c2ecf20Sopenharmony_ci *     |                      .           *
8978c2ecf20Sopenharmony_ci *     |                      .             *
8988c2ecf20Sopenharmony_ci *     |                      .               *
8998c2ecf20Sopenharmony_ci *     |                      .                 *
9008c2ecf20Sopenharmony_ci *     |                      .                   *
9018c2ecf20Sopenharmony_ci *     |                      .                     *
9028c2ecf20Sopenharmony_ci * 1/4 ...............................................* * * * * * * * * * * *
9038c2ecf20Sopenharmony_ci *     |                      .                         .
9048c2ecf20Sopenharmony_ci *     |                      .                           .
9058c2ecf20Sopenharmony_ci *     |                      .                             .
9068c2ecf20Sopenharmony_ci *   0 +----------------------.-------------------------------.------------->
9078c2ecf20Sopenharmony_ci *                wb_setpoint^                    x_intercept^
9088c2ecf20Sopenharmony_ci *
9098c2ecf20Sopenharmony_ci * The wb control line won't drop below pos_ratio=1/4, so that wb_dirty can
9108c2ecf20Sopenharmony_ci * be smoothly throttled down to normal if it starts high in situations like
9118c2ecf20Sopenharmony_ci * - start writing to a slow SD card and a fast disk at the same time. The SD
9128c2ecf20Sopenharmony_ci *   card's wb_dirty may rush to many times higher than wb_setpoint.
9138c2ecf20Sopenharmony_ci * - the wb dirty thresh drops quickly due to change of JBOD workload
9148c2ecf20Sopenharmony_ci */
9158c2ecf20Sopenharmony_cistatic void wb_position_ratio(struct dirty_throttle_control *dtc)
9168c2ecf20Sopenharmony_ci{
9178c2ecf20Sopenharmony_ci	struct bdi_writeback *wb = dtc->wb;
9188c2ecf20Sopenharmony_ci	unsigned long write_bw = wb->avg_write_bandwidth;
9198c2ecf20Sopenharmony_ci	unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
9208c2ecf20Sopenharmony_ci	unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
9218c2ecf20Sopenharmony_ci	unsigned long wb_thresh = dtc->wb_thresh;
9228c2ecf20Sopenharmony_ci	unsigned long x_intercept;
9238c2ecf20Sopenharmony_ci	unsigned long setpoint;		/* dirty pages' target balance point */
9248c2ecf20Sopenharmony_ci	unsigned long wb_setpoint;
9258c2ecf20Sopenharmony_ci	unsigned long span;
9268c2ecf20Sopenharmony_ci	long long pos_ratio;		/* for scaling up/down the rate limit */
9278c2ecf20Sopenharmony_ci	long x;
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci	dtc->pos_ratio = 0;
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci	if (unlikely(dtc->dirty >= limit))
9328c2ecf20Sopenharmony_ci		return;
9338c2ecf20Sopenharmony_ci
9348c2ecf20Sopenharmony_ci	/*
9358c2ecf20Sopenharmony_ci	 * global setpoint
9368c2ecf20Sopenharmony_ci	 *
9378c2ecf20Sopenharmony_ci	 * See comment for pos_ratio_polynom().
9388c2ecf20Sopenharmony_ci	 */
9398c2ecf20Sopenharmony_ci	setpoint = (freerun + limit) / 2;
9408c2ecf20Sopenharmony_ci	pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ci	/*
9438c2ecf20Sopenharmony_ci	 * The strictlimit feature is a tool preventing mistrusted filesystems
9448c2ecf20Sopenharmony_ci	 * from growing a large number of dirty pages before throttling. For
9458c2ecf20Sopenharmony_ci	 * such filesystems balance_dirty_pages always checks wb counters
9468c2ecf20Sopenharmony_ci	 * against wb limits. Even if global "nr_dirty" is under "freerun".
9478c2ecf20Sopenharmony_ci	 * This is especially important for fuse which sets bdi->max_ratio to
9488c2ecf20Sopenharmony_ci	 * 1% by default. Without strictlimit feature, fuse writeback may
9498c2ecf20Sopenharmony_ci	 * consume arbitrary amount of RAM because it is accounted in
9508c2ecf20Sopenharmony_ci	 * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty".
9518c2ecf20Sopenharmony_ci	 *
9528c2ecf20Sopenharmony_ci	 * Here, in wb_position_ratio(), we calculate pos_ratio based on
9538c2ecf20Sopenharmony_ci	 * two values: wb_dirty and wb_thresh. Let's consider an example:
9548c2ecf20Sopenharmony_ci	 * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global
9558c2ecf20Sopenharmony_ci	 * limits are set by default to 10% and 20% (background and throttle).
9568c2ecf20Sopenharmony_ci	 * Then wb_thresh is 1% of 20% of 16GB. This amounts to ~8K pages.
9578c2ecf20Sopenharmony_ci	 * wb_calc_thresh(wb, bg_thresh) is about ~4K pages. wb_setpoint is
9588c2ecf20Sopenharmony_ci	 * about ~6K pages (as the average of background and throttle wb
9598c2ecf20Sopenharmony_ci	 * limits). The 3rd order polynomial will provide positive feedback if
9608c2ecf20Sopenharmony_ci	 * wb_dirty is under wb_setpoint and vice versa.
9618c2ecf20Sopenharmony_ci	 *
9628c2ecf20Sopenharmony_ci	 * Note, that we cannot use global counters in these calculations
9638c2ecf20Sopenharmony_ci	 * because we want to throttle process writing to a strictlimit wb
9648c2ecf20Sopenharmony_ci	 * much earlier than global "freerun" is reached (~23MB vs. ~2.3GB
9658c2ecf20Sopenharmony_ci	 * in the example above).
9668c2ecf20Sopenharmony_ci	 */
9678c2ecf20Sopenharmony_ci	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
9688c2ecf20Sopenharmony_ci		long long wb_pos_ratio;
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_ci		if (dtc->wb_dirty < 8) {
9718c2ecf20Sopenharmony_ci			dtc->pos_ratio = min_t(long long, pos_ratio * 2,
9728c2ecf20Sopenharmony_ci					   2 << RATELIMIT_CALC_SHIFT);
9738c2ecf20Sopenharmony_ci			return;
9748c2ecf20Sopenharmony_ci		}
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci		if (dtc->wb_dirty >= wb_thresh)
9778c2ecf20Sopenharmony_ci			return;
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_ci		wb_setpoint = dirty_freerun_ceiling(wb_thresh,
9808c2ecf20Sopenharmony_ci						    dtc->wb_bg_thresh);
9818c2ecf20Sopenharmony_ci
9828c2ecf20Sopenharmony_ci		if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
9838c2ecf20Sopenharmony_ci			return;
9848c2ecf20Sopenharmony_ci
9858c2ecf20Sopenharmony_ci		wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
9868c2ecf20Sopenharmony_ci						 wb_thresh);
9878c2ecf20Sopenharmony_ci
9888c2ecf20Sopenharmony_ci		/*
9898c2ecf20Sopenharmony_ci		 * Typically, for strictlimit case, wb_setpoint << setpoint
9908c2ecf20Sopenharmony_ci		 * and pos_ratio >> wb_pos_ratio. In the other words global
9918c2ecf20Sopenharmony_ci		 * state ("dirty") is not limiting factor and we have to
9928c2ecf20Sopenharmony_ci		 * make decision based on wb counters. But there is an
9938c2ecf20Sopenharmony_ci		 * important case when global pos_ratio should get precedence:
9948c2ecf20Sopenharmony_ci		 * global limits are exceeded (e.g. due to activities on other
9958c2ecf20Sopenharmony_ci		 * wb's) while given strictlimit wb is below limit.
9968c2ecf20Sopenharmony_ci		 *
9978c2ecf20Sopenharmony_ci		 * "pos_ratio * wb_pos_ratio" would work for the case above,
9988c2ecf20Sopenharmony_ci		 * but it would look too non-natural for the case of all
9998c2ecf20Sopenharmony_ci		 * activity in the system coming from a single strictlimit wb
10008c2ecf20Sopenharmony_ci		 * with bdi->max_ratio == 100%.
10018c2ecf20Sopenharmony_ci		 *
10028c2ecf20Sopenharmony_ci		 * Note that min() below somewhat changes the dynamics of the
10038c2ecf20Sopenharmony_ci		 * control system. Normally, pos_ratio value can be well over 3
10048c2ecf20Sopenharmony_ci		 * (when globally we are at freerun and wb is well below wb
10058c2ecf20Sopenharmony_ci		 * setpoint). Now the maximum pos_ratio in the same situation
10068c2ecf20Sopenharmony_ci		 * is 2. We might want to tweak this if we observe the control
10078c2ecf20Sopenharmony_ci		 * system is too slow to adapt.
10088c2ecf20Sopenharmony_ci		 */
10098c2ecf20Sopenharmony_ci		dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
10108c2ecf20Sopenharmony_ci		return;
10118c2ecf20Sopenharmony_ci	}
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci	/*
10148c2ecf20Sopenharmony_ci	 * We have computed basic pos_ratio above based on global situation. If
10158c2ecf20Sopenharmony_ci	 * the wb is over/under its share of dirty pages, we want to scale
10168c2ecf20Sopenharmony_ci	 * pos_ratio further down/up. That is done by the following mechanism.
10178c2ecf20Sopenharmony_ci	 */
10188c2ecf20Sopenharmony_ci
10198c2ecf20Sopenharmony_ci	/*
10208c2ecf20Sopenharmony_ci	 * wb setpoint
10218c2ecf20Sopenharmony_ci	 *
10228c2ecf20Sopenharmony_ci	 *        f(wb_dirty) := 1.0 + k * (wb_dirty - wb_setpoint)
10238c2ecf20Sopenharmony_ci	 *
10248c2ecf20Sopenharmony_ci	 *                        x_intercept - wb_dirty
10258c2ecf20Sopenharmony_ci	 *                     := --------------------------
10268c2ecf20Sopenharmony_ci	 *                        x_intercept - wb_setpoint
10278c2ecf20Sopenharmony_ci	 *
10288c2ecf20Sopenharmony_ci	 * The main wb control line is a linear function that subjects to
10298c2ecf20Sopenharmony_ci	 *
10308c2ecf20Sopenharmony_ci	 * (1) f(wb_setpoint) = 1.0
10318c2ecf20Sopenharmony_ci	 * (2) k = - 1 / (8 * write_bw)  (in single wb case)
10328c2ecf20Sopenharmony_ci	 *     or equally: x_intercept = wb_setpoint + 8 * write_bw
10338c2ecf20Sopenharmony_ci	 *
10348c2ecf20Sopenharmony_ci	 * For single wb case, the dirty pages are observed to fluctuate
10358c2ecf20Sopenharmony_ci	 * regularly within range
10368c2ecf20Sopenharmony_ci	 *        [wb_setpoint - write_bw/2, wb_setpoint + write_bw/2]
10378c2ecf20Sopenharmony_ci	 * for various filesystems, where (2) can yield in a reasonable 12.5%
10388c2ecf20Sopenharmony_ci	 * fluctuation range for pos_ratio.
10398c2ecf20Sopenharmony_ci	 *
10408c2ecf20Sopenharmony_ci	 * For JBOD case, wb_thresh (not wb_dirty!) could fluctuate up to its
10418c2ecf20Sopenharmony_ci	 * own size, so move the slope over accordingly and choose a slope that
10428c2ecf20Sopenharmony_ci	 * yields 100% pos_ratio fluctuation on suddenly doubled wb_thresh.
10438c2ecf20Sopenharmony_ci	 */
10448c2ecf20Sopenharmony_ci	if (unlikely(wb_thresh > dtc->thresh))
10458c2ecf20Sopenharmony_ci		wb_thresh = dtc->thresh;
10468c2ecf20Sopenharmony_ci	/*
10478c2ecf20Sopenharmony_ci	 * It's very possible that wb_thresh is close to 0 not because the
10488c2ecf20Sopenharmony_ci	 * device is slow, but that it has remained inactive for long time.
10498c2ecf20Sopenharmony_ci	 * Honour such devices a reasonable good (hopefully IO efficient)
10508c2ecf20Sopenharmony_ci	 * threshold, so that the occasional writes won't be blocked and active
10518c2ecf20Sopenharmony_ci	 * writes can rampup the threshold quickly.
10528c2ecf20Sopenharmony_ci	 */
10538c2ecf20Sopenharmony_ci	wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
10548c2ecf20Sopenharmony_ci	/*
10558c2ecf20Sopenharmony_ci	 * scale global setpoint to wb's:
10568c2ecf20Sopenharmony_ci	 *	wb_setpoint = setpoint * wb_thresh / thresh
10578c2ecf20Sopenharmony_ci	 */
10588c2ecf20Sopenharmony_ci	x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
10598c2ecf20Sopenharmony_ci	wb_setpoint = setpoint * (u64)x >> 16;
10608c2ecf20Sopenharmony_ci	/*
10618c2ecf20Sopenharmony_ci	 * Use span=(8*write_bw) in single wb case as indicated by
10628c2ecf20Sopenharmony_ci	 * (thresh - wb_thresh ~= 0) and transit to wb_thresh in JBOD case.
10638c2ecf20Sopenharmony_ci	 *
10648c2ecf20Sopenharmony_ci	 *        wb_thresh                    thresh - wb_thresh
10658c2ecf20Sopenharmony_ci	 * span = --------- * (8 * write_bw) + ------------------ * wb_thresh
10668c2ecf20Sopenharmony_ci	 *         thresh                           thresh
10678c2ecf20Sopenharmony_ci	 */
10688c2ecf20Sopenharmony_ci	span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
10698c2ecf20Sopenharmony_ci	x_intercept = wb_setpoint + span;
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	if (dtc->wb_dirty < x_intercept - span / 4) {
10728c2ecf20Sopenharmony_ci		pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
10738c2ecf20Sopenharmony_ci				      (x_intercept - wb_setpoint) | 1);
10748c2ecf20Sopenharmony_ci	} else
10758c2ecf20Sopenharmony_ci		pos_ratio /= 4;
10768c2ecf20Sopenharmony_ci
10778c2ecf20Sopenharmony_ci	/*
10788c2ecf20Sopenharmony_ci	 * wb reserve area, safeguard against dirty pool underrun and disk idle
10798c2ecf20Sopenharmony_ci	 * It may push the desired control point of global dirty pages higher
10808c2ecf20Sopenharmony_ci	 * than setpoint.
10818c2ecf20Sopenharmony_ci	 */
10828c2ecf20Sopenharmony_ci	x_intercept = wb_thresh / 2;
10838c2ecf20Sopenharmony_ci	if (dtc->wb_dirty < x_intercept) {
10848c2ecf20Sopenharmony_ci		if (dtc->wb_dirty > x_intercept / 8)
10858c2ecf20Sopenharmony_ci			pos_ratio = div_u64(pos_ratio * x_intercept,
10868c2ecf20Sopenharmony_ci					    dtc->wb_dirty);
10878c2ecf20Sopenharmony_ci		else
10888c2ecf20Sopenharmony_ci			pos_ratio *= 8;
10898c2ecf20Sopenharmony_ci	}
10908c2ecf20Sopenharmony_ci
10918c2ecf20Sopenharmony_ci	dtc->pos_ratio = pos_ratio;
10928c2ecf20Sopenharmony_ci}
10938c2ecf20Sopenharmony_ci
10948c2ecf20Sopenharmony_cistatic void wb_update_write_bandwidth(struct bdi_writeback *wb,
10958c2ecf20Sopenharmony_ci				      unsigned long elapsed,
10968c2ecf20Sopenharmony_ci				      unsigned long written)
10978c2ecf20Sopenharmony_ci{
10988c2ecf20Sopenharmony_ci	const unsigned long period = roundup_pow_of_two(3 * HZ);
10998c2ecf20Sopenharmony_ci	unsigned long avg = wb->avg_write_bandwidth;
11008c2ecf20Sopenharmony_ci	unsigned long old = wb->write_bandwidth;
11018c2ecf20Sopenharmony_ci	u64 bw;
11028c2ecf20Sopenharmony_ci
11038c2ecf20Sopenharmony_ci	/*
11048c2ecf20Sopenharmony_ci	 * bw = written * HZ / elapsed
11058c2ecf20Sopenharmony_ci	 *
11068c2ecf20Sopenharmony_ci	 *                   bw * elapsed + write_bandwidth * (period - elapsed)
11078c2ecf20Sopenharmony_ci	 * write_bandwidth = ---------------------------------------------------
11088c2ecf20Sopenharmony_ci	 *                                          period
11098c2ecf20Sopenharmony_ci	 *
11108c2ecf20Sopenharmony_ci	 * @written may have decreased due to account_page_redirty().
11118c2ecf20Sopenharmony_ci	 * Avoid underflowing @bw calculation.
11128c2ecf20Sopenharmony_ci	 */
11138c2ecf20Sopenharmony_ci	bw = written - min(written, wb->written_stamp);
11148c2ecf20Sopenharmony_ci	bw *= HZ;
11158c2ecf20Sopenharmony_ci	if (unlikely(elapsed > period)) {
11168c2ecf20Sopenharmony_ci		bw = div64_ul(bw, elapsed);
11178c2ecf20Sopenharmony_ci		avg = bw;
11188c2ecf20Sopenharmony_ci		goto out;
11198c2ecf20Sopenharmony_ci	}
11208c2ecf20Sopenharmony_ci	bw += (u64)wb->write_bandwidth * (period - elapsed);
11218c2ecf20Sopenharmony_ci	bw >>= ilog2(period);
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_ci	/*
11248c2ecf20Sopenharmony_ci	 * one more level of smoothing, for filtering out sudden spikes
11258c2ecf20Sopenharmony_ci	 */
11268c2ecf20Sopenharmony_ci	if (avg > old && old >= (unsigned long)bw)
11278c2ecf20Sopenharmony_ci		avg -= (avg - old) >> 3;
11288c2ecf20Sopenharmony_ci
11298c2ecf20Sopenharmony_ci	if (avg < old && old <= (unsigned long)bw)
11308c2ecf20Sopenharmony_ci		avg += (old - avg) >> 3;
11318c2ecf20Sopenharmony_ci
11328c2ecf20Sopenharmony_ciout:
11338c2ecf20Sopenharmony_ci	/* keep avg > 0 to guarantee that tot > 0 if there are dirty wbs */
11348c2ecf20Sopenharmony_ci	avg = max(avg, 1LU);
11358c2ecf20Sopenharmony_ci	if (wb_has_dirty_io(wb)) {
11368c2ecf20Sopenharmony_ci		long delta = avg - wb->avg_write_bandwidth;
11378c2ecf20Sopenharmony_ci		WARN_ON_ONCE(atomic_long_add_return(delta,
11388c2ecf20Sopenharmony_ci					&wb->bdi->tot_write_bandwidth) <= 0);
11398c2ecf20Sopenharmony_ci	}
11408c2ecf20Sopenharmony_ci	wb->write_bandwidth = bw;
11418c2ecf20Sopenharmony_ci	wb->avg_write_bandwidth = avg;
11428c2ecf20Sopenharmony_ci}
11438c2ecf20Sopenharmony_ci
11448c2ecf20Sopenharmony_cistatic void update_dirty_limit(struct dirty_throttle_control *dtc)
11458c2ecf20Sopenharmony_ci{
11468c2ecf20Sopenharmony_ci	struct wb_domain *dom = dtc_dom(dtc);
11478c2ecf20Sopenharmony_ci	unsigned long thresh = dtc->thresh;
11488c2ecf20Sopenharmony_ci	unsigned long limit = dom->dirty_limit;
11498c2ecf20Sopenharmony_ci
11508c2ecf20Sopenharmony_ci	/*
11518c2ecf20Sopenharmony_ci	 * Follow up in one step.
11528c2ecf20Sopenharmony_ci	 */
11538c2ecf20Sopenharmony_ci	if (limit < thresh) {
11548c2ecf20Sopenharmony_ci		limit = thresh;
11558c2ecf20Sopenharmony_ci		goto update;
11568c2ecf20Sopenharmony_ci	}
11578c2ecf20Sopenharmony_ci
11588c2ecf20Sopenharmony_ci	/*
11598c2ecf20Sopenharmony_ci	 * Follow down slowly. Use the higher one as the target, because thresh
11608c2ecf20Sopenharmony_ci	 * may drop below dirty. This is exactly the reason to introduce
11618c2ecf20Sopenharmony_ci	 * dom->dirty_limit which is guaranteed to lie above the dirty pages.
11628c2ecf20Sopenharmony_ci	 */
11638c2ecf20Sopenharmony_ci	thresh = max(thresh, dtc->dirty);
11648c2ecf20Sopenharmony_ci	if (limit > thresh) {
11658c2ecf20Sopenharmony_ci		limit -= (limit - thresh) >> 5;
11668c2ecf20Sopenharmony_ci		goto update;
11678c2ecf20Sopenharmony_ci	}
11688c2ecf20Sopenharmony_ci	return;
11698c2ecf20Sopenharmony_ciupdate:
11708c2ecf20Sopenharmony_ci	dom->dirty_limit = limit;
11718c2ecf20Sopenharmony_ci}
11728c2ecf20Sopenharmony_ci
11738c2ecf20Sopenharmony_cistatic void domain_update_bandwidth(struct dirty_throttle_control *dtc,
11748c2ecf20Sopenharmony_ci				    unsigned long now)
11758c2ecf20Sopenharmony_ci{
11768c2ecf20Sopenharmony_ci	struct wb_domain *dom = dtc_dom(dtc);
11778c2ecf20Sopenharmony_ci
11788c2ecf20Sopenharmony_ci	/*
11798c2ecf20Sopenharmony_ci	 * check locklessly first to optimize away locking for the most time
11808c2ecf20Sopenharmony_ci	 */
11818c2ecf20Sopenharmony_ci	if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
11828c2ecf20Sopenharmony_ci		return;
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_ci	spin_lock(&dom->lock);
11858c2ecf20Sopenharmony_ci	if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
11868c2ecf20Sopenharmony_ci		update_dirty_limit(dtc);
11878c2ecf20Sopenharmony_ci		dom->dirty_limit_tstamp = now;
11888c2ecf20Sopenharmony_ci	}
11898c2ecf20Sopenharmony_ci	spin_unlock(&dom->lock);
11908c2ecf20Sopenharmony_ci}
11918c2ecf20Sopenharmony_ci
11928c2ecf20Sopenharmony_ci/*
11938c2ecf20Sopenharmony_ci * Maintain wb->dirty_ratelimit, the base dirty throttle rate.
11948c2ecf20Sopenharmony_ci *
11958c2ecf20Sopenharmony_ci * Normal wb tasks will be curbed at or below it in long term.
11968c2ecf20Sopenharmony_ci * Obviously it should be around (write_bw / N) when there are N dd tasks.
11978c2ecf20Sopenharmony_ci */
11988c2ecf20Sopenharmony_cistatic void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
11998c2ecf20Sopenharmony_ci				      unsigned long dirtied,
12008c2ecf20Sopenharmony_ci				      unsigned long elapsed)
12018c2ecf20Sopenharmony_ci{
12028c2ecf20Sopenharmony_ci	struct bdi_writeback *wb = dtc->wb;
12038c2ecf20Sopenharmony_ci	unsigned long dirty = dtc->dirty;
12048c2ecf20Sopenharmony_ci	unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
12058c2ecf20Sopenharmony_ci	unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
12068c2ecf20Sopenharmony_ci	unsigned long setpoint = (freerun + limit) / 2;
12078c2ecf20Sopenharmony_ci	unsigned long write_bw = wb->avg_write_bandwidth;
12088c2ecf20Sopenharmony_ci	unsigned long dirty_ratelimit = wb->dirty_ratelimit;
12098c2ecf20Sopenharmony_ci	unsigned long dirty_rate;
12108c2ecf20Sopenharmony_ci	unsigned long task_ratelimit;
12118c2ecf20Sopenharmony_ci	unsigned long balanced_dirty_ratelimit;
12128c2ecf20Sopenharmony_ci	unsigned long step;
12138c2ecf20Sopenharmony_ci	unsigned long x;
12148c2ecf20Sopenharmony_ci	unsigned long shift;
12158c2ecf20Sopenharmony_ci
12168c2ecf20Sopenharmony_ci	/*
12178c2ecf20Sopenharmony_ci	 * The dirty rate will match the writeout rate in long term, except
12188c2ecf20Sopenharmony_ci	 * when dirty pages are truncated by userspace or re-dirtied by FS.
12198c2ecf20Sopenharmony_ci	 */
12208c2ecf20Sopenharmony_ci	dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
12218c2ecf20Sopenharmony_ci
12228c2ecf20Sopenharmony_ci	/*
12238c2ecf20Sopenharmony_ci	 * task_ratelimit reflects each dd's dirty rate for the past 200ms.
12248c2ecf20Sopenharmony_ci	 */
12258c2ecf20Sopenharmony_ci	task_ratelimit = (u64)dirty_ratelimit *
12268c2ecf20Sopenharmony_ci					dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
12278c2ecf20Sopenharmony_ci	task_ratelimit++; /* it helps rampup dirty_ratelimit from tiny values */
12288c2ecf20Sopenharmony_ci
12298c2ecf20Sopenharmony_ci	/*
12308c2ecf20Sopenharmony_ci	 * A linear estimation of the "balanced" throttle rate. The theory is,
12318c2ecf20Sopenharmony_ci	 * if there are N dd tasks, each throttled at task_ratelimit, the wb's
12328c2ecf20Sopenharmony_ci	 * dirty_rate will be measured to be (N * task_ratelimit). So the below
12338c2ecf20Sopenharmony_ci	 * formula will yield the balanced rate limit (write_bw / N).
12348c2ecf20Sopenharmony_ci	 *
12358c2ecf20Sopenharmony_ci	 * Note that the expanded form is not a pure rate feedback:
12368c2ecf20Sopenharmony_ci	 *	rate_(i+1) = rate_(i) * (write_bw / dirty_rate)		     (1)
12378c2ecf20Sopenharmony_ci	 * but also takes pos_ratio into account:
12388c2ecf20Sopenharmony_ci	 *	rate_(i+1) = rate_(i) * (write_bw / dirty_rate) * pos_ratio  (2)
12398c2ecf20Sopenharmony_ci	 *
12408c2ecf20Sopenharmony_ci	 * (1) is not realistic because pos_ratio also takes part in balancing
12418c2ecf20Sopenharmony_ci	 * the dirty rate.  Consider the state
12428c2ecf20Sopenharmony_ci	 *	pos_ratio = 0.5						     (3)
12438c2ecf20Sopenharmony_ci	 *	rate = 2 * (write_bw / N)				     (4)
12448c2ecf20Sopenharmony_ci	 * If (1) is used, it will stuck in that state! Because each dd will
12458c2ecf20Sopenharmony_ci	 * be throttled at
12468c2ecf20Sopenharmony_ci	 *	task_ratelimit = pos_ratio * rate = (write_bw / N)	     (5)
12478c2ecf20Sopenharmony_ci	 * yielding
12488c2ecf20Sopenharmony_ci	 *	dirty_rate = N * task_ratelimit = write_bw		     (6)
12498c2ecf20Sopenharmony_ci	 * put (6) into (1) we get
12508c2ecf20Sopenharmony_ci	 *	rate_(i+1) = rate_(i)					     (7)
12518c2ecf20Sopenharmony_ci	 *
12528c2ecf20Sopenharmony_ci	 * So we end up using (2) to always keep
12538c2ecf20Sopenharmony_ci	 *	rate_(i+1) ~= (write_bw / N)				     (8)
12548c2ecf20Sopenharmony_ci	 * regardless of the value of pos_ratio. As long as (8) is satisfied,
12558c2ecf20Sopenharmony_ci	 * pos_ratio is able to drive itself to 1.0, which is not only where
12568c2ecf20Sopenharmony_ci	 * the dirty count meet the setpoint, but also where the slope of
12578c2ecf20Sopenharmony_ci	 * pos_ratio is most flat and hence task_ratelimit is least fluctuated.
12588c2ecf20Sopenharmony_ci	 */
12598c2ecf20Sopenharmony_ci	balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
12608c2ecf20Sopenharmony_ci					   dirty_rate | 1);
12618c2ecf20Sopenharmony_ci	/*
12628c2ecf20Sopenharmony_ci	 * balanced_dirty_ratelimit ~= (write_bw / N) <= write_bw
12638c2ecf20Sopenharmony_ci	 */
12648c2ecf20Sopenharmony_ci	if (unlikely(balanced_dirty_ratelimit > write_bw))
12658c2ecf20Sopenharmony_ci		balanced_dirty_ratelimit = write_bw;
12668c2ecf20Sopenharmony_ci
12678c2ecf20Sopenharmony_ci	/*
12688c2ecf20Sopenharmony_ci	 * We could safely do this and return immediately:
12698c2ecf20Sopenharmony_ci	 *
12708c2ecf20Sopenharmony_ci	 *	wb->dirty_ratelimit = balanced_dirty_ratelimit;
12718c2ecf20Sopenharmony_ci	 *
12728c2ecf20Sopenharmony_ci	 * However to get a more stable dirty_ratelimit, the below elaborated
12738c2ecf20Sopenharmony_ci	 * code makes use of task_ratelimit to filter out singular points and
12748c2ecf20Sopenharmony_ci	 * limit the step size.
12758c2ecf20Sopenharmony_ci	 *
12768c2ecf20Sopenharmony_ci	 * The below code essentially only uses the relative value of
12778c2ecf20Sopenharmony_ci	 *
12788c2ecf20Sopenharmony_ci	 *	task_ratelimit - dirty_ratelimit
12798c2ecf20Sopenharmony_ci	 *	= (pos_ratio - 1) * dirty_ratelimit
12808c2ecf20Sopenharmony_ci	 *
12818c2ecf20Sopenharmony_ci	 * which reflects the direction and size of dirty position error.
12828c2ecf20Sopenharmony_ci	 */
12838c2ecf20Sopenharmony_ci
12848c2ecf20Sopenharmony_ci	/*
12858c2ecf20Sopenharmony_ci	 * dirty_ratelimit will follow balanced_dirty_ratelimit iff
12868c2ecf20Sopenharmony_ci	 * task_ratelimit is on the same side of dirty_ratelimit, too.
12878c2ecf20Sopenharmony_ci	 * For example, when
12888c2ecf20Sopenharmony_ci	 * - dirty_ratelimit > balanced_dirty_ratelimit
12898c2ecf20Sopenharmony_ci	 * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint)
12908c2ecf20Sopenharmony_ci	 * lowering dirty_ratelimit will help meet both the position and rate
12918c2ecf20Sopenharmony_ci	 * control targets. Otherwise, don't update dirty_ratelimit if it will
12928c2ecf20Sopenharmony_ci	 * only help meet the rate target. After all, what the users ultimately
12938c2ecf20Sopenharmony_ci	 * feel and care are stable dirty rate and small position error.
12948c2ecf20Sopenharmony_ci	 *
12958c2ecf20Sopenharmony_ci	 * |task_ratelimit - dirty_ratelimit| is used to limit the step size
12968c2ecf20Sopenharmony_ci	 * and filter out the singular points of balanced_dirty_ratelimit. Which
12978c2ecf20Sopenharmony_ci	 * keeps jumping around randomly and can even leap far away at times
12988c2ecf20Sopenharmony_ci	 * due to the small 200ms estimation period of dirty_rate (we want to
12998c2ecf20Sopenharmony_ci	 * keep that period small to reduce time lags).
13008c2ecf20Sopenharmony_ci	 */
13018c2ecf20Sopenharmony_ci	step = 0;
13028c2ecf20Sopenharmony_ci
13038c2ecf20Sopenharmony_ci	/*
13048c2ecf20Sopenharmony_ci	 * For strictlimit case, calculations above were based on wb counters
13058c2ecf20Sopenharmony_ci	 * and limits (starting from pos_ratio = wb_position_ratio() and up to
13068c2ecf20Sopenharmony_ci	 * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
13078c2ecf20Sopenharmony_ci	 * Hence, to calculate "step" properly, we have to use wb_dirty as
13088c2ecf20Sopenharmony_ci	 * "dirty" and wb_setpoint as "setpoint".
13098c2ecf20Sopenharmony_ci	 *
13108c2ecf20Sopenharmony_ci	 * We rampup dirty_ratelimit forcibly if wb_dirty is low because
13118c2ecf20Sopenharmony_ci	 * it's possible that wb_thresh is close to zero due to inactivity
13128c2ecf20Sopenharmony_ci	 * of backing device.
13138c2ecf20Sopenharmony_ci	 */
13148c2ecf20Sopenharmony_ci	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
13158c2ecf20Sopenharmony_ci		dirty = dtc->wb_dirty;
13168c2ecf20Sopenharmony_ci		if (dtc->wb_dirty < 8)
13178c2ecf20Sopenharmony_ci			setpoint = dtc->wb_dirty + 1;
13188c2ecf20Sopenharmony_ci		else
13198c2ecf20Sopenharmony_ci			setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
13208c2ecf20Sopenharmony_ci	}
13218c2ecf20Sopenharmony_ci
13228c2ecf20Sopenharmony_ci	if (dirty < setpoint) {
13238c2ecf20Sopenharmony_ci		x = min3(wb->balanced_dirty_ratelimit,
13248c2ecf20Sopenharmony_ci			 balanced_dirty_ratelimit, task_ratelimit);
13258c2ecf20Sopenharmony_ci		if (dirty_ratelimit < x)
13268c2ecf20Sopenharmony_ci			step = x - dirty_ratelimit;
13278c2ecf20Sopenharmony_ci	} else {
13288c2ecf20Sopenharmony_ci		x = max3(wb->balanced_dirty_ratelimit,
13298c2ecf20Sopenharmony_ci			 balanced_dirty_ratelimit, task_ratelimit);
13308c2ecf20Sopenharmony_ci		if (dirty_ratelimit > x)
13318c2ecf20Sopenharmony_ci			step = dirty_ratelimit - x;
13328c2ecf20Sopenharmony_ci	}
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci	/*
13358c2ecf20Sopenharmony_ci	 * Don't pursue 100% rate matching. It's impossible since the balanced
13368c2ecf20Sopenharmony_ci	 * rate itself is constantly fluctuating. So decrease the track speed
13378c2ecf20Sopenharmony_ci	 * when it gets close to the target. Helps eliminate pointless tremors.
13388c2ecf20Sopenharmony_ci	 */
13398c2ecf20Sopenharmony_ci	shift = dirty_ratelimit / (2 * step + 1);
13408c2ecf20Sopenharmony_ci	if (shift < BITS_PER_LONG)
13418c2ecf20Sopenharmony_ci		step = DIV_ROUND_UP(step >> shift, 8);
13428c2ecf20Sopenharmony_ci	else
13438c2ecf20Sopenharmony_ci		step = 0;
13448c2ecf20Sopenharmony_ci
13458c2ecf20Sopenharmony_ci	if (dirty_ratelimit < balanced_dirty_ratelimit)
13468c2ecf20Sopenharmony_ci		dirty_ratelimit += step;
13478c2ecf20Sopenharmony_ci	else
13488c2ecf20Sopenharmony_ci		dirty_ratelimit -= step;
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
13518c2ecf20Sopenharmony_ci	wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
13528c2ecf20Sopenharmony_ci
13538c2ecf20Sopenharmony_ci	trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
13548c2ecf20Sopenharmony_ci}
13558c2ecf20Sopenharmony_ci
13568c2ecf20Sopenharmony_cistatic void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
13578c2ecf20Sopenharmony_ci				  struct dirty_throttle_control *mdtc,
13588c2ecf20Sopenharmony_ci				  unsigned long start_time,
13598c2ecf20Sopenharmony_ci				  bool update_ratelimit)
13608c2ecf20Sopenharmony_ci{
13618c2ecf20Sopenharmony_ci	struct bdi_writeback *wb = gdtc->wb;
13628c2ecf20Sopenharmony_ci	unsigned long now = jiffies;
13638c2ecf20Sopenharmony_ci	unsigned long elapsed = now - wb->bw_time_stamp;
13648c2ecf20Sopenharmony_ci	unsigned long dirtied;
13658c2ecf20Sopenharmony_ci	unsigned long written;
13668c2ecf20Sopenharmony_ci
13678c2ecf20Sopenharmony_ci	lockdep_assert_held(&wb->list_lock);
13688c2ecf20Sopenharmony_ci
13698c2ecf20Sopenharmony_ci	/*
13708c2ecf20Sopenharmony_ci	 * rate-limit, only update once every 200ms.
13718c2ecf20Sopenharmony_ci	 */
13728c2ecf20Sopenharmony_ci	if (elapsed < BANDWIDTH_INTERVAL)
13738c2ecf20Sopenharmony_ci		return;
13748c2ecf20Sopenharmony_ci
13758c2ecf20Sopenharmony_ci	dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
13768c2ecf20Sopenharmony_ci	written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
13778c2ecf20Sopenharmony_ci
13788c2ecf20Sopenharmony_ci	/*
13798c2ecf20Sopenharmony_ci	 * Skip quiet periods when disk bandwidth is under-utilized.
13808c2ecf20Sopenharmony_ci	 * (at least 1s idle time between two flusher runs)
13818c2ecf20Sopenharmony_ci	 */
13828c2ecf20Sopenharmony_ci	if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
13838c2ecf20Sopenharmony_ci		goto snapshot;
13848c2ecf20Sopenharmony_ci
13858c2ecf20Sopenharmony_ci	if (update_ratelimit) {
13868c2ecf20Sopenharmony_ci		domain_update_bandwidth(gdtc, now);
13878c2ecf20Sopenharmony_ci		wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
13888c2ecf20Sopenharmony_ci
13898c2ecf20Sopenharmony_ci		/*
13908c2ecf20Sopenharmony_ci		 * @mdtc is always NULL if !CGROUP_WRITEBACK but the
13918c2ecf20Sopenharmony_ci		 * compiler has no way to figure that out.  Help it.
13928c2ecf20Sopenharmony_ci		 */
13938c2ecf20Sopenharmony_ci		if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
13948c2ecf20Sopenharmony_ci			domain_update_bandwidth(mdtc, now);
13958c2ecf20Sopenharmony_ci			wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
13968c2ecf20Sopenharmony_ci		}
13978c2ecf20Sopenharmony_ci	}
13988c2ecf20Sopenharmony_ci	wb_update_write_bandwidth(wb, elapsed, written);
13998c2ecf20Sopenharmony_ci
14008c2ecf20Sopenharmony_cisnapshot:
14018c2ecf20Sopenharmony_ci	wb->dirtied_stamp = dirtied;
14028c2ecf20Sopenharmony_ci	wb->written_stamp = written;
14038c2ecf20Sopenharmony_ci	wb->bw_time_stamp = now;
14048c2ecf20Sopenharmony_ci}
14058c2ecf20Sopenharmony_ci
14068c2ecf20Sopenharmony_civoid wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
14078c2ecf20Sopenharmony_ci{
14088c2ecf20Sopenharmony_ci	struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
14098c2ecf20Sopenharmony_ci
14108c2ecf20Sopenharmony_ci	__wb_update_bandwidth(&gdtc, NULL, start_time, false);
14118c2ecf20Sopenharmony_ci}
14128c2ecf20Sopenharmony_ci
14138c2ecf20Sopenharmony_ci/*
14148c2ecf20Sopenharmony_ci * After a task dirtied this many pages, balance_dirty_pages_ratelimited()
14158c2ecf20Sopenharmony_ci * will look to see if it needs to start dirty throttling.
14168c2ecf20Sopenharmony_ci *
14178c2ecf20Sopenharmony_ci * If dirty_poll_interval is too low, big NUMA machines will call the expensive
14188c2ecf20Sopenharmony_ci * global_zone_page_state() too often. So scale it near-sqrt to the safety margin
14198c2ecf20Sopenharmony_ci * (the number of pages we may dirty without exceeding the dirty limits).
14208c2ecf20Sopenharmony_ci */
14218c2ecf20Sopenharmony_cistatic unsigned long dirty_poll_interval(unsigned long dirty,
14228c2ecf20Sopenharmony_ci					 unsigned long thresh)
14238c2ecf20Sopenharmony_ci{
14248c2ecf20Sopenharmony_ci	if (thresh > dirty)
14258c2ecf20Sopenharmony_ci		return 1UL << (ilog2(thresh - dirty) >> 1);
14268c2ecf20Sopenharmony_ci
14278c2ecf20Sopenharmony_ci	return 1;
14288c2ecf20Sopenharmony_ci}
14298c2ecf20Sopenharmony_ci
14308c2ecf20Sopenharmony_cistatic unsigned long wb_max_pause(struct bdi_writeback *wb,
14318c2ecf20Sopenharmony_ci				  unsigned long wb_dirty)
14328c2ecf20Sopenharmony_ci{
14338c2ecf20Sopenharmony_ci	unsigned long bw = wb->avg_write_bandwidth;
14348c2ecf20Sopenharmony_ci	unsigned long t;
14358c2ecf20Sopenharmony_ci
14368c2ecf20Sopenharmony_ci	/*
14378c2ecf20Sopenharmony_ci	 * Limit pause time for small memory systems. If sleeping for too long
14388c2ecf20Sopenharmony_ci	 * time, a small pool of dirty/writeback pages may go empty and disk go
14398c2ecf20Sopenharmony_ci	 * idle.
14408c2ecf20Sopenharmony_ci	 *
14418c2ecf20Sopenharmony_ci	 * 8 serves as the safety ratio.
14428c2ecf20Sopenharmony_ci	 */
14438c2ecf20Sopenharmony_ci	t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
14448c2ecf20Sopenharmony_ci	t++;
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	return min_t(unsigned long, t, MAX_PAUSE);
14478c2ecf20Sopenharmony_ci}
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_cistatic long wb_min_pause(struct bdi_writeback *wb,
14508c2ecf20Sopenharmony_ci			 long max_pause,
14518c2ecf20Sopenharmony_ci			 unsigned long task_ratelimit,
14528c2ecf20Sopenharmony_ci			 unsigned long dirty_ratelimit,
14538c2ecf20Sopenharmony_ci			 int *nr_dirtied_pause)
14548c2ecf20Sopenharmony_ci{
14558c2ecf20Sopenharmony_ci	long hi = ilog2(wb->avg_write_bandwidth);
14568c2ecf20Sopenharmony_ci	long lo = ilog2(wb->dirty_ratelimit);
14578c2ecf20Sopenharmony_ci	long t;		/* target pause */
14588c2ecf20Sopenharmony_ci	long pause;	/* estimated next pause */
14598c2ecf20Sopenharmony_ci	int pages;	/* target nr_dirtied_pause */
14608c2ecf20Sopenharmony_ci
14618c2ecf20Sopenharmony_ci	/* target for 10ms pause on 1-dd case */
14628c2ecf20Sopenharmony_ci	t = max(1, HZ / 100);
14638c2ecf20Sopenharmony_ci
14648c2ecf20Sopenharmony_ci	/*
14658c2ecf20Sopenharmony_ci	 * Scale up pause time for concurrent dirtiers in order to reduce CPU
14668c2ecf20Sopenharmony_ci	 * overheads.
14678c2ecf20Sopenharmony_ci	 *
14688c2ecf20Sopenharmony_ci	 * (N * 10ms) on 2^N concurrent tasks.
14698c2ecf20Sopenharmony_ci	 */
14708c2ecf20Sopenharmony_ci	if (hi > lo)
14718c2ecf20Sopenharmony_ci		t += (hi - lo) * (10 * HZ) / 1024;
14728c2ecf20Sopenharmony_ci
14738c2ecf20Sopenharmony_ci	/*
14748c2ecf20Sopenharmony_ci	 * This is a bit convoluted. We try to base the next nr_dirtied_pause
14758c2ecf20Sopenharmony_ci	 * on the much more stable dirty_ratelimit. However the next pause time
14768c2ecf20Sopenharmony_ci	 * will be computed based on task_ratelimit and the two rate limits may
14778c2ecf20Sopenharmony_ci	 * depart considerably at some time. Especially if task_ratelimit goes
14788c2ecf20Sopenharmony_ci	 * below dirty_ratelimit/2 and the target pause is max_pause, the next
14798c2ecf20Sopenharmony_ci	 * pause time will be max_pause*2 _trimmed down_ to max_pause.  As a
14808c2ecf20Sopenharmony_ci	 * result task_ratelimit won't be executed faithfully, which could
14818c2ecf20Sopenharmony_ci	 * eventually bring down dirty_ratelimit.
14828c2ecf20Sopenharmony_ci	 *
14838c2ecf20Sopenharmony_ci	 * We apply two rules to fix it up:
14848c2ecf20Sopenharmony_ci	 * 1) try to estimate the next pause time and if necessary, use a lower
14858c2ecf20Sopenharmony_ci	 *    nr_dirtied_pause so as not to exceed max_pause. When this happens,
14868c2ecf20Sopenharmony_ci	 *    nr_dirtied_pause will be "dancing" with task_ratelimit.
14878c2ecf20Sopenharmony_ci	 * 2) limit the target pause time to max_pause/2, so that the normal
14888c2ecf20Sopenharmony_ci	 *    small fluctuations of task_ratelimit won't trigger rule (1) and
14898c2ecf20Sopenharmony_ci	 *    nr_dirtied_pause will remain as stable as dirty_ratelimit.
14908c2ecf20Sopenharmony_ci	 */
14918c2ecf20Sopenharmony_ci	t = min(t, 1 + max_pause / 2);
14928c2ecf20Sopenharmony_ci	pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
14938c2ecf20Sopenharmony_ci
14948c2ecf20Sopenharmony_ci	/*
14958c2ecf20Sopenharmony_ci	 * Tiny nr_dirtied_pause is found to hurt I/O performance in the test
14968c2ecf20Sopenharmony_ci	 * case fio-mmap-randwrite-64k, which does 16*{sync read, async write}.
14978c2ecf20Sopenharmony_ci	 * When the 16 consecutive reads are often interrupted by some dirty
14988c2ecf20Sopenharmony_ci	 * throttling pause during the async writes, cfq will go into idles
14998c2ecf20Sopenharmony_ci	 * (deadline is fine). So push nr_dirtied_pause as high as possible
15008c2ecf20Sopenharmony_ci	 * until reaches DIRTY_POLL_THRESH=32 pages.
15018c2ecf20Sopenharmony_ci	 */
15028c2ecf20Sopenharmony_ci	if (pages < DIRTY_POLL_THRESH) {
15038c2ecf20Sopenharmony_ci		t = max_pause;
15048c2ecf20Sopenharmony_ci		pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
15058c2ecf20Sopenharmony_ci		if (pages > DIRTY_POLL_THRESH) {
15068c2ecf20Sopenharmony_ci			pages = DIRTY_POLL_THRESH;
15078c2ecf20Sopenharmony_ci			t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
15088c2ecf20Sopenharmony_ci		}
15098c2ecf20Sopenharmony_ci	}
15108c2ecf20Sopenharmony_ci
15118c2ecf20Sopenharmony_ci	pause = HZ * pages / (task_ratelimit + 1);
15128c2ecf20Sopenharmony_ci	if (pause > max_pause) {
15138c2ecf20Sopenharmony_ci		t = max_pause;
15148c2ecf20Sopenharmony_ci		pages = task_ratelimit * t / roundup_pow_of_two(HZ);
15158c2ecf20Sopenharmony_ci	}
15168c2ecf20Sopenharmony_ci
15178c2ecf20Sopenharmony_ci	*nr_dirtied_pause = pages;
15188c2ecf20Sopenharmony_ci	/*
15198c2ecf20Sopenharmony_ci	 * The minimal pause time will normally be half the target pause time.
15208c2ecf20Sopenharmony_ci	 */
15218c2ecf20Sopenharmony_ci	return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
15228c2ecf20Sopenharmony_ci}
15238c2ecf20Sopenharmony_ci
15248c2ecf20Sopenharmony_cistatic inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
15258c2ecf20Sopenharmony_ci{
15268c2ecf20Sopenharmony_ci	struct bdi_writeback *wb = dtc->wb;
15278c2ecf20Sopenharmony_ci	unsigned long wb_reclaimable;
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_ci	/*
15308c2ecf20Sopenharmony_ci	 * wb_thresh is not treated as some limiting factor as
15318c2ecf20Sopenharmony_ci	 * dirty_thresh, due to reasons
15328c2ecf20Sopenharmony_ci	 * - in JBOD setup, wb_thresh can fluctuate a lot
15338c2ecf20Sopenharmony_ci	 * - in a system with HDD and USB key, the USB key may somehow
15348c2ecf20Sopenharmony_ci	 *   go into state (wb_dirty >> wb_thresh) either because
15358c2ecf20Sopenharmony_ci	 *   wb_dirty starts high, or because wb_thresh drops low.
15368c2ecf20Sopenharmony_ci	 *   In this case we don't want to hard throttle the USB key
15378c2ecf20Sopenharmony_ci	 *   dirtiers for 100 seconds until wb_dirty drops under
15388c2ecf20Sopenharmony_ci	 *   wb_thresh. Instead the auxiliary wb control line in
15398c2ecf20Sopenharmony_ci	 *   wb_position_ratio() will let the dirtier task progress
15408c2ecf20Sopenharmony_ci	 *   at some rate <= (write_bw / 2) for bringing down wb_dirty.
15418c2ecf20Sopenharmony_ci	 */
15428c2ecf20Sopenharmony_ci	dtc->wb_thresh = __wb_calc_thresh(dtc);
15438c2ecf20Sopenharmony_ci	dtc->wb_bg_thresh = dtc->thresh ?
15448c2ecf20Sopenharmony_ci		div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
15458c2ecf20Sopenharmony_ci
15468c2ecf20Sopenharmony_ci	/*
15478c2ecf20Sopenharmony_ci	 * In order to avoid the stacked BDI deadlock we need
15488c2ecf20Sopenharmony_ci	 * to ensure we accurately count the 'dirty' pages when
15498c2ecf20Sopenharmony_ci	 * the threshold is low.
15508c2ecf20Sopenharmony_ci	 *
15518c2ecf20Sopenharmony_ci	 * Otherwise it would be possible to get thresh+n pages
15528c2ecf20Sopenharmony_ci	 * reported dirty, even though there are thresh-m pages
15538c2ecf20Sopenharmony_ci	 * actually dirty; with m+n sitting in the percpu
15548c2ecf20Sopenharmony_ci	 * deltas.
15558c2ecf20Sopenharmony_ci	 */
15568c2ecf20Sopenharmony_ci	if (dtc->wb_thresh < 2 * wb_stat_error()) {
15578c2ecf20Sopenharmony_ci		wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
15588c2ecf20Sopenharmony_ci		dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
15598c2ecf20Sopenharmony_ci	} else {
15608c2ecf20Sopenharmony_ci		wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
15618c2ecf20Sopenharmony_ci		dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
15628c2ecf20Sopenharmony_ci	}
15638c2ecf20Sopenharmony_ci}
15648c2ecf20Sopenharmony_ci
15658c2ecf20Sopenharmony_ci/*
15668c2ecf20Sopenharmony_ci * balance_dirty_pages() must be called by processes which are generating dirty
15678c2ecf20Sopenharmony_ci * data.  It looks at the number of dirty pages in the machine and will force
15688c2ecf20Sopenharmony_ci * the caller to wait once crossing the (background_thresh + dirty_thresh) / 2.
15698c2ecf20Sopenharmony_ci * If we're over `background_thresh' then the writeback threads are woken to
15708c2ecf20Sopenharmony_ci * perform some writeout.
15718c2ecf20Sopenharmony_ci */
15728c2ecf20Sopenharmony_cistatic void balance_dirty_pages(struct bdi_writeback *wb,
15738c2ecf20Sopenharmony_ci				unsigned long pages_dirtied)
15748c2ecf20Sopenharmony_ci{
15758c2ecf20Sopenharmony_ci	struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
15768c2ecf20Sopenharmony_ci	struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
15778c2ecf20Sopenharmony_ci	struct dirty_throttle_control * const gdtc = &gdtc_stor;
15788c2ecf20Sopenharmony_ci	struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
15798c2ecf20Sopenharmony_ci						     &mdtc_stor : NULL;
15808c2ecf20Sopenharmony_ci	struct dirty_throttle_control *sdtc;
15818c2ecf20Sopenharmony_ci	unsigned long nr_reclaimable;	/* = file_dirty */
15828c2ecf20Sopenharmony_ci	long period;
15838c2ecf20Sopenharmony_ci	long pause;
15848c2ecf20Sopenharmony_ci	long max_pause;
15858c2ecf20Sopenharmony_ci	long min_pause;
15868c2ecf20Sopenharmony_ci	int nr_dirtied_pause;
15878c2ecf20Sopenharmony_ci	bool dirty_exceeded = false;
15888c2ecf20Sopenharmony_ci	unsigned long task_ratelimit;
15898c2ecf20Sopenharmony_ci	unsigned long dirty_ratelimit;
15908c2ecf20Sopenharmony_ci	struct backing_dev_info *bdi = wb->bdi;
15918c2ecf20Sopenharmony_ci	bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
15928c2ecf20Sopenharmony_ci	unsigned long start_time = jiffies;
15938c2ecf20Sopenharmony_ci
15948c2ecf20Sopenharmony_ci	for (;;) {
15958c2ecf20Sopenharmony_ci		unsigned long now = jiffies;
15968c2ecf20Sopenharmony_ci		unsigned long dirty, thresh, bg_thresh;
15978c2ecf20Sopenharmony_ci		unsigned long m_dirty = 0;	/* stop bogus uninit warnings */
15988c2ecf20Sopenharmony_ci		unsigned long m_thresh = 0;
15998c2ecf20Sopenharmony_ci		unsigned long m_bg_thresh = 0;
16008c2ecf20Sopenharmony_ci
16018c2ecf20Sopenharmony_ci		nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
16028c2ecf20Sopenharmony_ci		gdtc->avail = global_dirtyable_memory();
16038c2ecf20Sopenharmony_ci		gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
16048c2ecf20Sopenharmony_ci
16058c2ecf20Sopenharmony_ci		domain_dirty_limits(gdtc);
16068c2ecf20Sopenharmony_ci
16078c2ecf20Sopenharmony_ci		if (unlikely(strictlimit)) {
16088c2ecf20Sopenharmony_ci			wb_dirty_limits(gdtc);
16098c2ecf20Sopenharmony_ci
16108c2ecf20Sopenharmony_ci			dirty = gdtc->wb_dirty;
16118c2ecf20Sopenharmony_ci			thresh = gdtc->wb_thresh;
16128c2ecf20Sopenharmony_ci			bg_thresh = gdtc->wb_bg_thresh;
16138c2ecf20Sopenharmony_ci		} else {
16148c2ecf20Sopenharmony_ci			dirty = gdtc->dirty;
16158c2ecf20Sopenharmony_ci			thresh = gdtc->thresh;
16168c2ecf20Sopenharmony_ci			bg_thresh = gdtc->bg_thresh;
16178c2ecf20Sopenharmony_ci		}
16188c2ecf20Sopenharmony_ci
16198c2ecf20Sopenharmony_ci		if (mdtc) {
16208c2ecf20Sopenharmony_ci			unsigned long filepages, headroom, writeback;
16218c2ecf20Sopenharmony_ci
16228c2ecf20Sopenharmony_ci			/*
16238c2ecf20Sopenharmony_ci			 * If @wb belongs to !root memcg, repeat the same
16248c2ecf20Sopenharmony_ci			 * basic calculations for the memcg domain.
16258c2ecf20Sopenharmony_ci			 */
16268c2ecf20Sopenharmony_ci			mem_cgroup_wb_stats(wb, &filepages, &headroom,
16278c2ecf20Sopenharmony_ci					    &mdtc->dirty, &writeback);
16288c2ecf20Sopenharmony_ci			mdtc->dirty += writeback;
16298c2ecf20Sopenharmony_ci			mdtc_calc_avail(mdtc, filepages, headroom);
16308c2ecf20Sopenharmony_ci
16318c2ecf20Sopenharmony_ci			domain_dirty_limits(mdtc);
16328c2ecf20Sopenharmony_ci
16338c2ecf20Sopenharmony_ci			if (unlikely(strictlimit)) {
16348c2ecf20Sopenharmony_ci				wb_dirty_limits(mdtc);
16358c2ecf20Sopenharmony_ci				m_dirty = mdtc->wb_dirty;
16368c2ecf20Sopenharmony_ci				m_thresh = mdtc->wb_thresh;
16378c2ecf20Sopenharmony_ci				m_bg_thresh = mdtc->wb_bg_thresh;
16388c2ecf20Sopenharmony_ci			} else {
16398c2ecf20Sopenharmony_ci				m_dirty = mdtc->dirty;
16408c2ecf20Sopenharmony_ci				m_thresh = mdtc->thresh;
16418c2ecf20Sopenharmony_ci				m_bg_thresh = mdtc->bg_thresh;
16428c2ecf20Sopenharmony_ci			}
16438c2ecf20Sopenharmony_ci		}
16448c2ecf20Sopenharmony_ci
16458c2ecf20Sopenharmony_ci		/*
16468c2ecf20Sopenharmony_ci		 * Throttle it only when the background writeback cannot
16478c2ecf20Sopenharmony_ci		 * catch-up. This avoids (excessively) small writeouts
16488c2ecf20Sopenharmony_ci		 * when the wb limits are ramping up in case of !strictlimit.
16498c2ecf20Sopenharmony_ci		 *
16508c2ecf20Sopenharmony_ci		 * In strictlimit case make decision based on the wb counters
16518c2ecf20Sopenharmony_ci		 * and limits. Small writeouts when the wb limits are ramping
16528c2ecf20Sopenharmony_ci		 * up are the price we consciously pay for strictlimit-ing.
16538c2ecf20Sopenharmony_ci		 *
16548c2ecf20Sopenharmony_ci		 * If memcg domain is in effect, @dirty should be under
16558c2ecf20Sopenharmony_ci		 * both global and memcg freerun ceilings.
16568c2ecf20Sopenharmony_ci		 */
16578c2ecf20Sopenharmony_ci		if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
16588c2ecf20Sopenharmony_ci		    (!mdtc ||
16598c2ecf20Sopenharmony_ci		     m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
16608c2ecf20Sopenharmony_ci			unsigned long intv;
16618c2ecf20Sopenharmony_ci			unsigned long m_intv;
16628c2ecf20Sopenharmony_ci
16638c2ecf20Sopenharmony_cifree_running:
16648c2ecf20Sopenharmony_ci			intv = dirty_poll_interval(dirty, thresh);
16658c2ecf20Sopenharmony_ci			m_intv = ULONG_MAX;
16668c2ecf20Sopenharmony_ci
16678c2ecf20Sopenharmony_ci			current->dirty_paused_when = now;
16688c2ecf20Sopenharmony_ci			current->nr_dirtied = 0;
16698c2ecf20Sopenharmony_ci			if (mdtc)
16708c2ecf20Sopenharmony_ci				m_intv = dirty_poll_interval(m_dirty, m_thresh);
16718c2ecf20Sopenharmony_ci			current->nr_dirtied_pause = min(intv, m_intv);
16728c2ecf20Sopenharmony_ci			break;
16738c2ecf20Sopenharmony_ci		}
16748c2ecf20Sopenharmony_ci
16758c2ecf20Sopenharmony_ci		if (unlikely(!writeback_in_progress(wb)))
16768c2ecf20Sopenharmony_ci			wb_start_background_writeback(wb);
16778c2ecf20Sopenharmony_ci
16788c2ecf20Sopenharmony_ci		mem_cgroup_flush_foreign(wb);
16798c2ecf20Sopenharmony_ci
16808c2ecf20Sopenharmony_ci		/*
16818c2ecf20Sopenharmony_ci		 * Calculate global domain's pos_ratio and select the
16828c2ecf20Sopenharmony_ci		 * global dtc by default.
16838c2ecf20Sopenharmony_ci		 */
16848c2ecf20Sopenharmony_ci		if (!strictlimit) {
16858c2ecf20Sopenharmony_ci			wb_dirty_limits(gdtc);
16868c2ecf20Sopenharmony_ci
16878c2ecf20Sopenharmony_ci			if ((current->flags & PF_LOCAL_THROTTLE) &&
16888c2ecf20Sopenharmony_ci			    gdtc->wb_dirty <
16898c2ecf20Sopenharmony_ci			    dirty_freerun_ceiling(gdtc->wb_thresh,
16908c2ecf20Sopenharmony_ci						  gdtc->wb_bg_thresh))
16918c2ecf20Sopenharmony_ci				/*
16928c2ecf20Sopenharmony_ci				 * LOCAL_THROTTLE tasks must not be throttled
16938c2ecf20Sopenharmony_ci				 * when below the per-wb freerun ceiling.
16948c2ecf20Sopenharmony_ci				 */
16958c2ecf20Sopenharmony_ci				goto free_running;
16968c2ecf20Sopenharmony_ci		}
16978c2ecf20Sopenharmony_ci
16988c2ecf20Sopenharmony_ci		dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
16998c2ecf20Sopenharmony_ci			((gdtc->dirty > gdtc->thresh) || strictlimit);
17008c2ecf20Sopenharmony_ci
17018c2ecf20Sopenharmony_ci		wb_position_ratio(gdtc);
17028c2ecf20Sopenharmony_ci		sdtc = gdtc;
17038c2ecf20Sopenharmony_ci
17048c2ecf20Sopenharmony_ci		if (mdtc) {
17058c2ecf20Sopenharmony_ci			/*
17068c2ecf20Sopenharmony_ci			 * If memcg domain is in effect, calculate its
17078c2ecf20Sopenharmony_ci			 * pos_ratio.  @wb should satisfy constraints from
17088c2ecf20Sopenharmony_ci			 * both global and memcg domains.  Choose the one
17098c2ecf20Sopenharmony_ci			 * w/ lower pos_ratio.
17108c2ecf20Sopenharmony_ci			 */
17118c2ecf20Sopenharmony_ci			if (!strictlimit) {
17128c2ecf20Sopenharmony_ci				wb_dirty_limits(mdtc);
17138c2ecf20Sopenharmony_ci
17148c2ecf20Sopenharmony_ci				if ((current->flags & PF_LOCAL_THROTTLE) &&
17158c2ecf20Sopenharmony_ci				    mdtc->wb_dirty <
17168c2ecf20Sopenharmony_ci				    dirty_freerun_ceiling(mdtc->wb_thresh,
17178c2ecf20Sopenharmony_ci							  mdtc->wb_bg_thresh))
17188c2ecf20Sopenharmony_ci					/*
17198c2ecf20Sopenharmony_ci					 * LOCAL_THROTTLE tasks must not be
17208c2ecf20Sopenharmony_ci					 * throttled when below the per-wb
17218c2ecf20Sopenharmony_ci					 * freerun ceiling.
17228c2ecf20Sopenharmony_ci					 */
17238c2ecf20Sopenharmony_ci					goto free_running;
17248c2ecf20Sopenharmony_ci			}
17258c2ecf20Sopenharmony_ci			dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
17268c2ecf20Sopenharmony_ci				((mdtc->dirty > mdtc->thresh) || strictlimit);
17278c2ecf20Sopenharmony_ci
17288c2ecf20Sopenharmony_ci			wb_position_ratio(mdtc);
17298c2ecf20Sopenharmony_ci			if (mdtc->pos_ratio < gdtc->pos_ratio)
17308c2ecf20Sopenharmony_ci				sdtc = mdtc;
17318c2ecf20Sopenharmony_ci		}
17328c2ecf20Sopenharmony_ci
17338c2ecf20Sopenharmony_ci		if (dirty_exceeded && !wb->dirty_exceeded)
17348c2ecf20Sopenharmony_ci			wb->dirty_exceeded = 1;
17358c2ecf20Sopenharmony_ci
17368c2ecf20Sopenharmony_ci		if (time_is_before_jiffies(wb->bw_time_stamp +
17378c2ecf20Sopenharmony_ci					   BANDWIDTH_INTERVAL)) {
17388c2ecf20Sopenharmony_ci			spin_lock(&wb->list_lock);
17398c2ecf20Sopenharmony_ci			__wb_update_bandwidth(gdtc, mdtc, start_time, true);
17408c2ecf20Sopenharmony_ci			spin_unlock(&wb->list_lock);
17418c2ecf20Sopenharmony_ci		}
17428c2ecf20Sopenharmony_ci
17438c2ecf20Sopenharmony_ci		/* throttle according to the chosen dtc */
17448c2ecf20Sopenharmony_ci		dirty_ratelimit = wb->dirty_ratelimit;
17458c2ecf20Sopenharmony_ci		task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
17468c2ecf20Sopenharmony_ci							RATELIMIT_CALC_SHIFT;
17478c2ecf20Sopenharmony_ci		max_pause = wb_max_pause(wb, sdtc->wb_dirty);
17488c2ecf20Sopenharmony_ci		min_pause = wb_min_pause(wb, max_pause,
17498c2ecf20Sopenharmony_ci					 task_ratelimit, dirty_ratelimit,
17508c2ecf20Sopenharmony_ci					 &nr_dirtied_pause);
17518c2ecf20Sopenharmony_ci
17528c2ecf20Sopenharmony_ci		if (unlikely(task_ratelimit == 0)) {
17538c2ecf20Sopenharmony_ci			period = max_pause;
17548c2ecf20Sopenharmony_ci			pause = max_pause;
17558c2ecf20Sopenharmony_ci			goto pause;
17568c2ecf20Sopenharmony_ci		}
17578c2ecf20Sopenharmony_ci		period = HZ * pages_dirtied / task_ratelimit;
17588c2ecf20Sopenharmony_ci		pause = period;
17598c2ecf20Sopenharmony_ci		if (current->dirty_paused_when)
17608c2ecf20Sopenharmony_ci			pause -= now - current->dirty_paused_when;
17618c2ecf20Sopenharmony_ci		/*
17628c2ecf20Sopenharmony_ci		 * For less than 1s think time (ext3/4 may block the dirtier
17638c2ecf20Sopenharmony_ci		 * for up to 800ms from time to time on 1-HDD; so does xfs,
17648c2ecf20Sopenharmony_ci		 * however at much less frequency), try to compensate it in
17658c2ecf20Sopenharmony_ci		 * future periods by updating the virtual time; otherwise just
17668c2ecf20Sopenharmony_ci		 * do a reset, as it may be a light dirtier.
17678c2ecf20Sopenharmony_ci		 */
17688c2ecf20Sopenharmony_ci		if (pause < min_pause) {
17698c2ecf20Sopenharmony_ci			trace_balance_dirty_pages(wb,
17708c2ecf20Sopenharmony_ci						  sdtc->thresh,
17718c2ecf20Sopenharmony_ci						  sdtc->bg_thresh,
17728c2ecf20Sopenharmony_ci						  sdtc->dirty,
17738c2ecf20Sopenharmony_ci						  sdtc->wb_thresh,
17748c2ecf20Sopenharmony_ci						  sdtc->wb_dirty,
17758c2ecf20Sopenharmony_ci						  dirty_ratelimit,
17768c2ecf20Sopenharmony_ci						  task_ratelimit,
17778c2ecf20Sopenharmony_ci						  pages_dirtied,
17788c2ecf20Sopenharmony_ci						  period,
17798c2ecf20Sopenharmony_ci						  min(pause, 0L),
17808c2ecf20Sopenharmony_ci						  start_time);
17818c2ecf20Sopenharmony_ci			if (pause < -HZ) {
17828c2ecf20Sopenharmony_ci				current->dirty_paused_when = now;
17838c2ecf20Sopenharmony_ci				current->nr_dirtied = 0;
17848c2ecf20Sopenharmony_ci			} else if (period) {
17858c2ecf20Sopenharmony_ci				current->dirty_paused_when += period;
17868c2ecf20Sopenharmony_ci				current->nr_dirtied = 0;
17878c2ecf20Sopenharmony_ci			} else if (current->nr_dirtied_pause <= pages_dirtied)
17888c2ecf20Sopenharmony_ci				current->nr_dirtied_pause += pages_dirtied;
17898c2ecf20Sopenharmony_ci			break;
17908c2ecf20Sopenharmony_ci		}
17918c2ecf20Sopenharmony_ci		if (unlikely(pause > max_pause)) {
17928c2ecf20Sopenharmony_ci			/* for occasional dropped task_ratelimit */
17938c2ecf20Sopenharmony_ci			now += min(pause - max_pause, max_pause);
17948c2ecf20Sopenharmony_ci			pause = max_pause;
17958c2ecf20Sopenharmony_ci		}
17968c2ecf20Sopenharmony_ci
17978c2ecf20Sopenharmony_cipause:
17988c2ecf20Sopenharmony_ci		trace_balance_dirty_pages(wb,
17998c2ecf20Sopenharmony_ci					  sdtc->thresh,
18008c2ecf20Sopenharmony_ci					  sdtc->bg_thresh,
18018c2ecf20Sopenharmony_ci					  sdtc->dirty,
18028c2ecf20Sopenharmony_ci					  sdtc->wb_thresh,
18038c2ecf20Sopenharmony_ci					  sdtc->wb_dirty,
18048c2ecf20Sopenharmony_ci					  dirty_ratelimit,
18058c2ecf20Sopenharmony_ci					  task_ratelimit,
18068c2ecf20Sopenharmony_ci					  pages_dirtied,
18078c2ecf20Sopenharmony_ci					  period,
18088c2ecf20Sopenharmony_ci					  pause,
18098c2ecf20Sopenharmony_ci					  start_time);
18108c2ecf20Sopenharmony_ci		__set_current_state(TASK_KILLABLE);
18118c2ecf20Sopenharmony_ci		wb->dirty_sleep = now;
18128c2ecf20Sopenharmony_ci		io_schedule_timeout(pause);
18138c2ecf20Sopenharmony_ci
18148c2ecf20Sopenharmony_ci		current->dirty_paused_when = now + pause;
18158c2ecf20Sopenharmony_ci		current->nr_dirtied = 0;
18168c2ecf20Sopenharmony_ci		current->nr_dirtied_pause = nr_dirtied_pause;
18178c2ecf20Sopenharmony_ci
18188c2ecf20Sopenharmony_ci		/*
18198c2ecf20Sopenharmony_ci		 * This is typically equal to (dirty < thresh) and can also
18208c2ecf20Sopenharmony_ci		 * keep "1000+ dd on a slow USB stick" under control.
18218c2ecf20Sopenharmony_ci		 */
18228c2ecf20Sopenharmony_ci		if (task_ratelimit)
18238c2ecf20Sopenharmony_ci			break;
18248c2ecf20Sopenharmony_ci
18258c2ecf20Sopenharmony_ci		/*
18268c2ecf20Sopenharmony_ci		 * In the case of an unresponding NFS server and the NFS dirty
18278c2ecf20Sopenharmony_ci		 * pages exceeds dirty_thresh, give the other good wb's a pipe
18288c2ecf20Sopenharmony_ci		 * to go through, so that tasks on them still remain responsive.
18298c2ecf20Sopenharmony_ci		 *
18308c2ecf20Sopenharmony_ci		 * In theory 1 page is enough to keep the consumer-producer
18318c2ecf20Sopenharmony_ci		 * pipe going: the flusher cleans 1 page => the task dirties 1
18328c2ecf20Sopenharmony_ci		 * more page. However wb_dirty has accounting errors.  So use
18338c2ecf20Sopenharmony_ci		 * the larger and more IO friendly wb_stat_error.
18348c2ecf20Sopenharmony_ci		 */
18358c2ecf20Sopenharmony_ci		if (sdtc->wb_dirty <= wb_stat_error())
18368c2ecf20Sopenharmony_ci			break;
18378c2ecf20Sopenharmony_ci
18388c2ecf20Sopenharmony_ci		if (fatal_signal_pending(current))
18398c2ecf20Sopenharmony_ci			break;
18408c2ecf20Sopenharmony_ci	}
18418c2ecf20Sopenharmony_ci
18428c2ecf20Sopenharmony_ci	if (!dirty_exceeded && wb->dirty_exceeded)
18438c2ecf20Sopenharmony_ci		wb->dirty_exceeded = 0;
18448c2ecf20Sopenharmony_ci
18458c2ecf20Sopenharmony_ci	if (writeback_in_progress(wb))
18468c2ecf20Sopenharmony_ci		return;
18478c2ecf20Sopenharmony_ci
18488c2ecf20Sopenharmony_ci	/*
18498c2ecf20Sopenharmony_ci	 * In laptop mode, we wait until hitting the higher threshold before
18508c2ecf20Sopenharmony_ci	 * starting background writeout, and then write out all the way down
18518c2ecf20Sopenharmony_ci	 * to the lower threshold.  So slow writers cause minimal disk activity.
18528c2ecf20Sopenharmony_ci	 *
18538c2ecf20Sopenharmony_ci	 * In normal mode, we start background writeout at the lower
18548c2ecf20Sopenharmony_ci	 * background_thresh, to keep the amount of dirty memory low.
18558c2ecf20Sopenharmony_ci	 */
18568c2ecf20Sopenharmony_ci	if (laptop_mode)
18578c2ecf20Sopenharmony_ci		return;
18588c2ecf20Sopenharmony_ci
18598c2ecf20Sopenharmony_ci	if (nr_reclaimable > gdtc->bg_thresh)
18608c2ecf20Sopenharmony_ci		wb_start_background_writeback(wb);
18618c2ecf20Sopenharmony_ci}
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(int, bdp_ratelimits);
18648c2ecf20Sopenharmony_ci
18658c2ecf20Sopenharmony_ci/*
18668c2ecf20Sopenharmony_ci * Normal tasks are throttled by
18678c2ecf20Sopenharmony_ci *	loop {
18688c2ecf20Sopenharmony_ci *		dirty tsk->nr_dirtied_pause pages;
18698c2ecf20Sopenharmony_ci *		take a snap in balance_dirty_pages();
18708c2ecf20Sopenharmony_ci *	}
18718c2ecf20Sopenharmony_ci * However there is a worst case. If every task exit immediately when dirtied
18728c2ecf20Sopenharmony_ci * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
18738c2ecf20Sopenharmony_ci * called to throttle the page dirties. The solution is to save the not yet
18748c2ecf20Sopenharmony_ci * throttled page dirties in dirty_throttle_leaks on task exit and charge them
18758c2ecf20Sopenharmony_ci * randomly into the running tasks. This works well for the above worst case,
18768c2ecf20Sopenharmony_ci * as the new task will pick up and accumulate the old task's leaked dirty
18778c2ecf20Sopenharmony_ci * count and eventually get throttled.
18788c2ecf20Sopenharmony_ci */
18798c2ecf20Sopenharmony_ciDEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
18808c2ecf20Sopenharmony_ci
18818c2ecf20Sopenharmony_ci/**
18828c2ecf20Sopenharmony_ci * balance_dirty_pages_ratelimited - balance dirty memory state
18838c2ecf20Sopenharmony_ci * @mapping: address_space which was dirtied
18848c2ecf20Sopenharmony_ci *
18858c2ecf20Sopenharmony_ci * Processes which are dirtying memory should call in here once for each page
18868c2ecf20Sopenharmony_ci * which was newly dirtied.  The function will periodically check the system's
18878c2ecf20Sopenharmony_ci * dirty state and will initiate writeback if needed.
18888c2ecf20Sopenharmony_ci *
18898c2ecf20Sopenharmony_ci * On really big machines, get_writeback_state is expensive, so try to avoid
18908c2ecf20Sopenharmony_ci * calling it too often (ratelimiting).  But once we're over the dirty memory
18918c2ecf20Sopenharmony_ci * limit we decrease the ratelimiting by a lot, to prevent individual processes
18928c2ecf20Sopenharmony_ci * from overshooting the limit by (ratelimit_pages) each.
18938c2ecf20Sopenharmony_ci */
18948c2ecf20Sopenharmony_civoid balance_dirty_pages_ratelimited(struct address_space *mapping)
18958c2ecf20Sopenharmony_ci{
18968c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
18978c2ecf20Sopenharmony_ci	struct backing_dev_info *bdi = inode_to_bdi(inode);
18988c2ecf20Sopenharmony_ci	struct bdi_writeback *wb = NULL;
18998c2ecf20Sopenharmony_ci	int ratelimit;
19008c2ecf20Sopenharmony_ci	int *p;
19018c2ecf20Sopenharmony_ci
19028c2ecf20Sopenharmony_ci	if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
19038c2ecf20Sopenharmony_ci		return;
19048c2ecf20Sopenharmony_ci
19058c2ecf20Sopenharmony_ci	if (inode_cgwb_enabled(inode))
19068c2ecf20Sopenharmony_ci		wb = wb_get_create_current(bdi, GFP_KERNEL);
19078c2ecf20Sopenharmony_ci	if (!wb)
19088c2ecf20Sopenharmony_ci		wb = &bdi->wb;
19098c2ecf20Sopenharmony_ci
19108c2ecf20Sopenharmony_ci	ratelimit = current->nr_dirtied_pause;
19118c2ecf20Sopenharmony_ci	if (wb->dirty_exceeded)
19128c2ecf20Sopenharmony_ci		ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
19138c2ecf20Sopenharmony_ci
19148c2ecf20Sopenharmony_ci	preempt_disable();
19158c2ecf20Sopenharmony_ci	/*
19168c2ecf20Sopenharmony_ci	 * This prevents one CPU to accumulate too many dirtied pages without
19178c2ecf20Sopenharmony_ci	 * calling into balance_dirty_pages(), which can happen when there are
19188c2ecf20Sopenharmony_ci	 * 1000+ tasks, all of them start dirtying pages at exactly the same
19198c2ecf20Sopenharmony_ci	 * time, hence all honoured too large initial task->nr_dirtied_pause.
19208c2ecf20Sopenharmony_ci	 */
19218c2ecf20Sopenharmony_ci	p =  this_cpu_ptr(&bdp_ratelimits);
19228c2ecf20Sopenharmony_ci	if (unlikely(current->nr_dirtied >= ratelimit))
19238c2ecf20Sopenharmony_ci		*p = 0;
19248c2ecf20Sopenharmony_ci	else if (unlikely(*p >= ratelimit_pages)) {
19258c2ecf20Sopenharmony_ci		*p = 0;
19268c2ecf20Sopenharmony_ci		ratelimit = 0;
19278c2ecf20Sopenharmony_ci	}
19288c2ecf20Sopenharmony_ci	/*
19298c2ecf20Sopenharmony_ci	 * Pick up the dirtied pages by the exited tasks. This avoids lots of
19308c2ecf20Sopenharmony_ci	 * short-lived tasks (eg. gcc invocations in a kernel build) escaping
19318c2ecf20Sopenharmony_ci	 * the dirty throttling and livelock other long-run dirtiers.
19328c2ecf20Sopenharmony_ci	 */
19338c2ecf20Sopenharmony_ci	p = this_cpu_ptr(&dirty_throttle_leaks);
19348c2ecf20Sopenharmony_ci	if (*p > 0 && current->nr_dirtied < ratelimit) {
19358c2ecf20Sopenharmony_ci		unsigned long nr_pages_dirtied;
19368c2ecf20Sopenharmony_ci		nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
19378c2ecf20Sopenharmony_ci		*p -= nr_pages_dirtied;
19388c2ecf20Sopenharmony_ci		current->nr_dirtied += nr_pages_dirtied;
19398c2ecf20Sopenharmony_ci	}
19408c2ecf20Sopenharmony_ci	preempt_enable();
19418c2ecf20Sopenharmony_ci
19428c2ecf20Sopenharmony_ci	if (unlikely(current->nr_dirtied >= ratelimit))
19438c2ecf20Sopenharmony_ci		balance_dirty_pages(wb, current->nr_dirtied);
19448c2ecf20Sopenharmony_ci
19458c2ecf20Sopenharmony_ci	wb_put(wb);
19468c2ecf20Sopenharmony_ci}
19478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(balance_dirty_pages_ratelimited);
19488c2ecf20Sopenharmony_ci
19498c2ecf20Sopenharmony_ci/**
19508c2ecf20Sopenharmony_ci * wb_over_bg_thresh - does @wb need to be written back?
19518c2ecf20Sopenharmony_ci * @wb: bdi_writeback of interest
19528c2ecf20Sopenharmony_ci *
19538c2ecf20Sopenharmony_ci * Determines whether background writeback should keep writing @wb or it's
19548c2ecf20Sopenharmony_ci * clean enough.
19558c2ecf20Sopenharmony_ci *
19568c2ecf20Sopenharmony_ci * Return: %true if writeback should continue.
19578c2ecf20Sopenharmony_ci */
19588c2ecf20Sopenharmony_cibool wb_over_bg_thresh(struct bdi_writeback *wb)
19598c2ecf20Sopenharmony_ci{
19608c2ecf20Sopenharmony_ci	struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
19618c2ecf20Sopenharmony_ci	struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
19628c2ecf20Sopenharmony_ci	struct dirty_throttle_control * const gdtc = &gdtc_stor;
19638c2ecf20Sopenharmony_ci	struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
19648c2ecf20Sopenharmony_ci						     &mdtc_stor : NULL;
19658c2ecf20Sopenharmony_ci
19668c2ecf20Sopenharmony_ci	/*
19678c2ecf20Sopenharmony_ci	 * Similar to balance_dirty_pages() but ignores pages being written
19688c2ecf20Sopenharmony_ci	 * as we're trying to decide whether to put more under writeback.
19698c2ecf20Sopenharmony_ci	 */
19708c2ecf20Sopenharmony_ci	gdtc->avail = global_dirtyable_memory();
19718c2ecf20Sopenharmony_ci	gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
19728c2ecf20Sopenharmony_ci	domain_dirty_limits(gdtc);
19738c2ecf20Sopenharmony_ci
19748c2ecf20Sopenharmony_ci	if (gdtc->dirty > gdtc->bg_thresh)
19758c2ecf20Sopenharmony_ci		return true;
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_ci	if (wb_stat(wb, WB_RECLAIMABLE) >
19788c2ecf20Sopenharmony_ci	    wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
19798c2ecf20Sopenharmony_ci		return true;
19808c2ecf20Sopenharmony_ci
19818c2ecf20Sopenharmony_ci	if (mdtc) {
19828c2ecf20Sopenharmony_ci		unsigned long filepages, headroom, writeback;
19838c2ecf20Sopenharmony_ci
19848c2ecf20Sopenharmony_ci		mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
19858c2ecf20Sopenharmony_ci				    &writeback);
19868c2ecf20Sopenharmony_ci		mdtc_calc_avail(mdtc, filepages, headroom);
19878c2ecf20Sopenharmony_ci		domain_dirty_limits(mdtc);	/* ditto, ignore writeback */
19888c2ecf20Sopenharmony_ci
19898c2ecf20Sopenharmony_ci		if (mdtc->dirty > mdtc->bg_thresh)
19908c2ecf20Sopenharmony_ci			return true;
19918c2ecf20Sopenharmony_ci
19928c2ecf20Sopenharmony_ci		if (wb_stat(wb, WB_RECLAIMABLE) >
19938c2ecf20Sopenharmony_ci		    wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
19948c2ecf20Sopenharmony_ci			return true;
19958c2ecf20Sopenharmony_ci	}
19968c2ecf20Sopenharmony_ci
19978c2ecf20Sopenharmony_ci	return false;
19988c2ecf20Sopenharmony_ci}
19998c2ecf20Sopenharmony_ci
20008c2ecf20Sopenharmony_ci/*
20018c2ecf20Sopenharmony_ci * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
20028c2ecf20Sopenharmony_ci */
20038c2ecf20Sopenharmony_ciint dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
20048c2ecf20Sopenharmony_ci		void *buffer, size_t *length, loff_t *ppos)
20058c2ecf20Sopenharmony_ci{
20068c2ecf20Sopenharmony_ci	unsigned int old_interval = dirty_writeback_interval;
20078c2ecf20Sopenharmony_ci	int ret;
20088c2ecf20Sopenharmony_ci
20098c2ecf20Sopenharmony_ci	ret = proc_dointvec(table, write, buffer, length, ppos);
20108c2ecf20Sopenharmony_ci
20118c2ecf20Sopenharmony_ci	/*
20128c2ecf20Sopenharmony_ci	 * Writing 0 to dirty_writeback_interval will disable periodic writeback
20138c2ecf20Sopenharmony_ci	 * and a different non-zero value will wakeup the writeback threads.
20148c2ecf20Sopenharmony_ci	 * wb_wakeup_delayed() would be more appropriate, but it's a pain to
20158c2ecf20Sopenharmony_ci	 * iterate over all bdis and wbs.
20168c2ecf20Sopenharmony_ci	 * The reason we do this is to make the change take effect immediately.
20178c2ecf20Sopenharmony_ci	 */
20188c2ecf20Sopenharmony_ci	if (!ret && write && dirty_writeback_interval &&
20198c2ecf20Sopenharmony_ci		dirty_writeback_interval != old_interval)
20208c2ecf20Sopenharmony_ci		wakeup_flusher_threads(WB_REASON_PERIODIC);
20218c2ecf20Sopenharmony_ci
20228c2ecf20Sopenharmony_ci	return ret;
20238c2ecf20Sopenharmony_ci}
20248c2ecf20Sopenharmony_ci
20258c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
20268c2ecf20Sopenharmony_civoid laptop_mode_timer_fn(struct timer_list *t)
20278c2ecf20Sopenharmony_ci{
20288c2ecf20Sopenharmony_ci	struct backing_dev_info *backing_dev_info =
20298c2ecf20Sopenharmony_ci		from_timer(backing_dev_info, t, laptop_mode_wb_timer);
20308c2ecf20Sopenharmony_ci
20318c2ecf20Sopenharmony_ci	wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
20328c2ecf20Sopenharmony_ci}
20338c2ecf20Sopenharmony_ci
20348c2ecf20Sopenharmony_ci/*
20358c2ecf20Sopenharmony_ci * We've spun up the disk and we're in laptop mode: schedule writeback
20368c2ecf20Sopenharmony_ci * of all dirty data a few seconds from now.  If the flush is already scheduled
20378c2ecf20Sopenharmony_ci * then push it back - the user is still using the disk.
20388c2ecf20Sopenharmony_ci */
20398c2ecf20Sopenharmony_civoid laptop_io_completion(struct backing_dev_info *info)
20408c2ecf20Sopenharmony_ci{
20418c2ecf20Sopenharmony_ci	mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
20428c2ecf20Sopenharmony_ci}
20438c2ecf20Sopenharmony_ci
20448c2ecf20Sopenharmony_ci/*
20458c2ecf20Sopenharmony_ci * We're in laptop mode and we've just synced. The sync's writes will have
20468c2ecf20Sopenharmony_ci * caused another writeback to be scheduled by laptop_io_completion.
20478c2ecf20Sopenharmony_ci * Nothing needs to be written back anymore, so we unschedule the writeback.
20488c2ecf20Sopenharmony_ci */
20498c2ecf20Sopenharmony_civoid laptop_sync_completion(void)
20508c2ecf20Sopenharmony_ci{
20518c2ecf20Sopenharmony_ci	struct backing_dev_info *bdi;
20528c2ecf20Sopenharmony_ci
20538c2ecf20Sopenharmony_ci	rcu_read_lock();
20548c2ecf20Sopenharmony_ci
20558c2ecf20Sopenharmony_ci	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
20568c2ecf20Sopenharmony_ci		del_timer(&bdi->laptop_mode_wb_timer);
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_ci	rcu_read_unlock();
20598c2ecf20Sopenharmony_ci}
20608c2ecf20Sopenharmony_ci#endif
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_ci/*
20638c2ecf20Sopenharmony_ci * If ratelimit_pages is too high then we can get into dirty-data overload
20648c2ecf20Sopenharmony_ci * if a large number of processes all perform writes at the same time.
20658c2ecf20Sopenharmony_ci * If it is too low then SMP machines will call the (expensive)
20668c2ecf20Sopenharmony_ci * get_writeback_state too often.
20678c2ecf20Sopenharmony_ci *
20688c2ecf20Sopenharmony_ci * Here we set ratelimit_pages to a level which ensures that when all CPUs are
20698c2ecf20Sopenharmony_ci * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
20708c2ecf20Sopenharmony_ci * thresholds.
20718c2ecf20Sopenharmony_ci */
20728c2ecf20Sopenharmony_ci
20738c2ecf20Sopenharmony_civoid writeback_set_ratelimit(void)
20748c2ecf20Sopenharmony_ci{
20758c2ecf20Sopenharmony_ci	struct wb_domain *dom = &global_wb_domain;
20768c2ecf20Sopenharmony_ci	unsigned long background_thresh;
20778c2ecf20Sopenharmony_ci	unsigned long dirty_thresh;
20788c2ecf20Sopenharmony_ci
20798c2ecf20Sopenharmony_ci	global_dirty_limits(&background_thresh, &dirty_thresh);
20808c2ecf20Sopenharmony_ci	dom->dirty_limit = dirty_thresh;
20818c2ecf20Sopenharmony_ci	ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
20828c2ecf20Sopenharmony_ci	if (ratelimit_pages < 16)
20838c2ecf20Sopenharmony_ci		ratelimit_pages = 16;
20848c2ecf20Sopenharmony_ci}
20858c2ecf20Sopenharmony_ci
20868c2ecf20Sopenharmony_cistatic int page_writeback_cpu_online(unsigned int cpu)
20878c2ecf20Sopenharmony_ci{
20888c2ecf20Sopenharmony_ci	writeback_set_ratelimit();
20898c2ecf20Sopenharmony_ci	return 0;
20908c2ecf20Sopenharmony_ci}
20918c2ecf20Sopenharmony_ci
20928c2ecf20Sopenharmony_ci/*
20938c2ecf20Sopenharmony_ci * Called early on to tune the page writeback dirty limits.
20948c2ecf20Sopenharmony_ci *
20958c2ecf20Sopenharmony_ci * We used to scale dirty pages according to how total memory
20968c2ecf20Sopenharmony_ci * related to pages that could be allocated for buffers.
20978c2ecf20Sopenharmony_ci *
20988c2ecf20Sopenharmony_ci * However, that was when we used "dirty_ratio" to scale with
20998c2ecf20Sopenharmony_ci * all memory, and we don't do that any more. "dirty_ratio"
21008c2ecf20Sopenharmony_ci * is now applied to total non-HIGHPAGE memory, and as such we can't
21018c2ecf20Sopenharmony_ci * get into the old insane situation any more where we had
21028c2ecf20Sopenharmony_ci * large amounts of dirty pages compared to a small amount of
21038c2ecf20Sopenharmony_ci * non-HIGHMEM memory.
21048c2ecf20Sopenharmony_ci *
21058c2ecf20Sopenharmony_ci * But we might still want to scale the dirty_ratio by how
21068c2ecf20Sopenharmony_ci * much memory the box has..
21078c2ecf20Sopenharmony_ci */
21088c2ecf20Sopenharmony_civoid __init page_writeback_init(void)
21098c2ecf20Sopenharmony_ci{
21108c2ecf20Sopenharmony_ci	BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
21118c2ecf20Sopenharmony_ci
21128c2ecf20Sopenharmony_ci	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
21138c2ecf20Sopenharmony_ci			  page_writeback_cpu_online, NULL);
21148c2ecf20Sopenharmony_ci	cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
21158c2ecf20Sopenharmony_ci			  page_writeback_cpu_online);
21168c2ecf20Sopenharmony_ci}
21178c2ecf20Sopenharmony_ci
21188c2ecf20Sopenharmony_ci/**
21198c2ecf20Sopenharmony_ci * tag_pages_for_writeback - tag pages to be written by write_cache_pages
21208c2ecf20Sopenharmony_ci * @mapping: address space structure to write
21218c2ecf20Sopenharmony_ci * @start: starting page index
21228c2ecf20Sopenharmony_ci * @end: ending page index (inclusive)
21238c2ecf20Sopenharmony_ci *
21248c2ecf20Sopenharmony_ci * This function scans the page range from @start to @end (inclusive) and tags
21258c2ecf20Sopenharmony_ci * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is
21268c2ecf20Sopenharmony_ci * that write_cache_pages (or whoever calls this function) will then use
21278c2ecf20Sopenharmony_ci * TOWRITE tag to identify pages eligible for writeback.  This mechanism is
21288c2ecf20Sopenharmony_ci * used to avoid livelocking of writeback by a process steadily creating new
21298c2ecf20Sopenharmony_ci * dirty pages in the file (thus it is important for this function to be quick
21308c2ecf20Sopenharmony_ci * so that it can tag pages faster than a dirtying process can create them).
21318c2ecf20Sopenharmony_ci */
21328c2ecf20Sopenharmony_civoid tag_pages_for_writeback(struct address_space *mapping,
21338c2ecf20Sopenharmony_ci			     pgoff_t start, pgoff_t end)
21348c2ecf20Sopenharmony_ci{
21358c2ecf20Sopenharmony_ci	XA_STATE(xas, &mapping->i_pages, start);
21368c2ecf20Sopenharmony_ci	unsigned int tagged = 0;
21378c2ecf20Sopenharmony_ci	void *page;
21388c2ecf20Sopenharmony_ci
21398c2ecf20Sopenharmony_ci	xas_lock_irq(&xas);
21408c2ecf20Sopenharmony_ci	xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
21418c2ecf20Sopenharmony_ci		xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
21428c2ecf20Sopenharmony_ci		if (++tagged % XA_CHECK_SCHED)
21438c2ecf20Sopenharmony_ci			continue;
21448c2ecf20Sopenharmony_ci
21458c2ecf20Sopenharmony_ci		xas_pause(&xas);
21468c2ecf20Sopenharmony_ci		xas_unlock_irq(&xas);
21478c2ecf20Sopenharmony_ci		cond_resched();
21488c2ecf20Sopenharmony_ci		xas_lock_irq(&xas);
21498c2ecf20Sopenharmony_ci	}
21508c2ecf20Sopenharmony_ci	xas_unlock_irq(&xas);
21518c2ecf20Sopenharmony_ci}
21528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tag_pages_for_writeback);
21538c2ecf20Sopenharmony_ci
21548c2ecf20Sopenharmony_ci/**
21558c2ecf20Sopenharmony_ci * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
21568c2ecf20Sopenharmony_ci * @mapping: address space structure to write
21578c2ecf20Sopenharmony_ci * @wbc: subtract the number of written pages from *@wbc->nr_to_write
21588c2ecf20Sopenharmony_ci * @writepage: function called for each page
21598c2ecf20Sopenharmony_ci * @data: data passed to writepage function
21608c2ecf20Sopenharmony_ci *
21618c2ecf20Sopenharmony_ci * If a page is already under I/O, write_cache_pages() skips it, even
21628c2ecf20Sopenharmony_ci * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
21638c2ecf20Sopenharmony_ci * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
21648c2ecf20Sopenharmony_ci * and msync() need to guarantee that all the data which was dirty at the time
21658c2ecf20Sopenharmony_ci * the call was made get new I/O started against them.  If wbc->sync_mode is
21668c2ecf20Sopenharmony_ci * WB_SYNC_ALL then we were called for data integrity and we must wait for
21678c2ecf20Sopenharmony_ci * existing IO to complete.
21688c2ecf20Sopenharmony_ci *
21698c2ecf20Sopenharmony_ci * To avoid livelocks (when other process dirties new pages), we first tag
21708c2ecf20Sopenharmony_ci * pages which should be written back with TOWRITE tag and only then start
21718c2ecf20Sopenharmony_ci * writing them. For data-integrity sync we have to be careful so that we do
21728c2ecf20Sopenharmony_ci * not miss some pages (e.g., because some other process has cleared TOWRITE
21738c2ecf20Sopenharmony_ci * tag we set). The rule we follow is that TOWRITE tag can be cleared only
21748c2ecf20Sopenharmony_ci * by the process clearing the DIRTY tag (and submitting the page for IO).
21758c2ecf20Sopenharmony_ci *
21768c2ecf20Sopenharmony_ci * To avoid deadlocks between range_cyclic writeback and callers that hold
21778c2ecf20Sopenharmony_ci * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
21788c2ecf20Sopenharmony_ci * we do not loop back to the start of the file. Doing so causes a page
21798c2ecf20Sopenharmony_ci * lock/page writeback access order inversion - we should only ever lock
21808c2ecf20Sopenharmony_ci * multiple pages in ascending page->index order, and looping back to the start
21818c2ecf20Sopenharmony_ci * of the file violates that rule and causes deadlocks.
21828c2ecf20Sopenharmony_ci *
21838c2ecf20Sopenharmony_ci * Return: %0 on success, negative error code otherwise
21848c2ecf20Sopenharmony_ci */
21858c2ecf20Sopenharmony_ciint write_cache_pages(struct address_space *mapping,
21868c2ecf20Sopenharmony_ci		      struct writeback_control *wbc, writepage_t writepage,
21878c2ecf20Sopenharmony_ci		      void *data)
21888c2ecf20Sopenharmony_ci{
21898c2ecf20Sopenharmony_ci	int ret = 0;
21908c2ecf20Sopenharmony_ci	int done = 0;
21918c2ecf20Sopenharmony_ci	int error;
21928c2ecf20Sopenharmony_ci	struct pagevec pvec;
21938c2ecf20Sopenharmony_ci	int nr_pages;
21948c2ecf20Sopenharmony_ci	pgoff_t index;
21958c2ecf20Sopenharmony_ci	pgoff_t end;		/* Inclusive */
21968c2ecf20Sopenharmony_ci	pgoff_t done_index;
21978c2ecf20Sopenharmony_ci	int range_whole = 0;
21988c2ecf20Sopenharmony_ci	xa_mark_t tag;
21998c2ecf20Sopenharmony_ci
22008c2ecf20Sopenharmony_ci	pagevec_init(&pvec);
22018c2ecf20Sopenharmony_ci	if (wbc->range_cyclic) {
22028c2ecf20Sopenharmony_ci		index = mapping->writeback_index; /* prev offset */
22038c2ecf20Sopenharmony_ci		end = -1;
22048c2ecf20Sopenharmony_ci	} else {
22058c2ecf20Sopenharmony_ci		index = wbc->range_start >> PAGE_SHIFT;
22068c2ecf20Sopenharmony_ci		end = wbc->range_end >> PAGE_SHIFT;
22078c2ecf20Sopenharmony_ci		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
22088c2ecf20Sopenharmony_ci			range_whole = 1;
22098c2ecf20Sopenharmony_ci	}
22108c2ecf20Sopenharmony_ci	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
22118c2ecf20Sopenharmony_ci		tag_pages_for_writeback(mapping, index, end);
22128c2ecf20Sopenharmony_ci		tag = PAGECACHE_TAG_TOWRITE;
22138c2ecf20Sopenharmony_ci	} else {
22148c2ecf20Sopenharmony_ci		tag = PAGECACHE_TAG_DIRTY;
22158c2ecf20Sopenharmony_ci	}
22168c2ecf20Sopenharmony_ci	done_index = index;
22178c2ecf20Sopenharmony_ci	while (!done && (index <= end)) {
22188c2ecf20Sopenharmony_ci		int i;
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_ci		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
22218c2ecf20Sopenharmony_ci				tag);
22228c2ecf20Sopenharmony_ci		if (nr_pages == 0)
22238c2ecf20Sopenharmony_ci			break;
22248c2ecf20Sopenharmony_ci
22258c2ecf20Sopenharmony_ci		for (i = 0; i < nr_pages; i++) {
22268c2ecf20Sopenharmony_ci			struct page *page = pvec.pages[i];
22278c2ecf20Sopenharmony_ci
22288c2ecf20Sopenharmony_ci			done_index = page->index;
22298c2ecf20Sopenharmony_ci
22308c2ecf20Sopenharmony_ci			lock_page(page);
22318c2ecf20Sopenharmony_ci
22328c2ecf20Sopenharmony_ci			/*
22338c2ecf20Sopenharmony_ci			 * Page truncated or invalidated. We can freely skip it
22348c2ecf20Sopenharmony_ci			 * then, even for data integrity operations: the page
22358c2ecf20Sopenharmony_ci			 * has disappeared concurrently, so there could be no
22368c2ecf20Sopenharmony_ci			 * real expectation of this data interity operation
22378c2ecf20Sopenharmony_ci			 * even if there is now a new, dirty page at the same
22388c2ecf20Sopenharmony_ci			 * pagecache address.
22398c2ecf20Sopenharmony_ci			 */
22408c2ecf20Sopenharmony_ci			if (unlikely(page->mapping != mapping)) {
22418c2ecf20Sopenharmony_cicontinue_unlock:
22428c2ecf20Sopenharmony_ci				unlock_page(page);
22438c2ecf20Sopenharmony_ci				continue;
22448c2ecf20Sopenharmony_ci			}
22458c2ecf20Sopenharmony_ci
22468c2ecf20Sopenharmony_ci			if (!PageDirty(page)) {
22478c2ecf20Sopenharmony_ci				/* someone wrote it for us */
22488c2ecf20Sopenharmony_ci				goto continue_unlock;
22498c2ecf20Sopenharmony_ci			}
22508c2ecf20Sopenharmony_ci
22518c2ecf20Sopenharmony_ci			if (PageWriteback(page)) {
22528c2ecf20Sopenharmony_ci				if (wbc->sync_mode != WB_SYNC_NONE)
22538c2ecf20Sopenharmony_ci					wait_on_page_writeback(page);
22548c2ecf20Sopenharmony_ci				else
22558c2ecf20Sopenharmony_ci					goto continue_unlock;
22568c2ecf20Sopenharmony_ci			}
22578c2ecf20Sopenharmony_ci
22588c2ecf20Sopenharmony_ci			BUG_ON(PageWriteback(page));
22598c2ecf20Sopenharmony_ci			if (!clear_page_dirty_for_io(page))
22608c2ecf20Sopenharmony_ci				goto continue_unlock;
22618c2ecf20Sopenharmony_ci
22628c2ecf20Sopenharmony_ci			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
22638c2ecf20Sopenharmony_ci			error = (*writepage)(page, wbc, data);
22648c2ecf20Sopenharmony_ci			if (unlikely(error)) {
22658c2ecf20Sopenharmony_ci				/*
22668c2ecf20Sopenharmony_ci				 * Handle errors according to the type of
22678c2ecf20Sopenharmony_ci				 * writeback. There's no need to continue for
22688c2ecf20Sopenharmony_ci				 * background writeback. Just push done_index
22698c2ecf20Sopenharmony_ci				 * past this page so media errors won't choke
22708c2ecf20Sopenharmony_ci				 * writeout for the entire file. For integrity
22718c2ecf20Sopenharmony_ci				 * writeback, we must process the entire dirty
22728c2ecf20Sopenharmony_ci				 * set regardless of errors because the fs may
22738c2ecf20Sopenharmony_ci				 * still have state to clear for each page. In
22748c2ecf20Sopenharmony_ci				 * that case we continue processing and return
22758c2ecf20Sopenharmony_ci				 * the first error.
22768c2ecf20Sopenharmony_ci				 */
22778c2ecf20Sopenharmony_ci				if (error == AOP_WRITEPAGE_ACTIVATE) {
22788c2ecf20Sopenharmony_ci					unlock_page(page);
22798c2ecf20Sopenharmony_ci					error = 0;
22808c2ecf20Sopenharmony_ci				} else if (wbc->sync_mode != WB_SYNC_ALL) {
22818c2ecf20Sopenharmony_ci					ret = error;
22828c2ecf20Sopenharmony_ci					done_index = page->index + 1;
22838c2ecf20Sopenharmony_ci					done = 1;
22848c2ecf20Sopenharmony_ci					break;
22858c2ecf20Sopenharmony_ci				}
22868c2ecf20Sopenharmony_ci				if (!ret)
22878c2ecf20Sopenharmony_ci					ret = error;
22888c2ecf20Sopenharmony_ci			}
22898c2ecf20Sopenharmony_ci
22908c2ecf20Sopenharmony_ci			/*
22918c2ecf20Sopenharmony_ci			 * We stop writing back only if we are not doing
22928c2ecf20Sopenharmony_ci			 * integrity sync. In case of integrity sync we have to
22938c2ecf20Sopenharmony_ci			 * keep going until we have written all the pages
22948c2ecf20Sopenharmony_ci			 * we tagged for writeback prior to entering this loop.
22958c2ecf20Sopenharmony_ci			 */
22968c2ecf20Sopenharmony_ci			if (--wbc->nr_to_write <= 0 &&
22978c2ecf20Sopenharmony_ci			    wbc->sync_mode == WB_SYNC_NONE) {
22988c2ecf20Sopenharmony_ci				done = 1;
22998c2ecf20Sopenharmony_ci				break;
23008c2ecf20Sopenharmony_ci			}
23018c2ecf20Sopenharmony_ci		}
23028c2ecf20Sopenharmony_ci		pagevec_release(&pvec);
23038c2ecf20Sopenharmony_ci		cond_resched();
23048c2ecf20Sopenharmony_ci	}
23058c2ecf20Sopenharmony_ci
23068c2ecf20Sopenharmony_ci	/*
23078c2ecf20Sopenharmony_ci	 * If we hit the last page and there is more work to be done: wrap
23088c2ecf20Sopenharmony_ci	 * back the index back to the start of the file for the next
23098c2ecf20Sopenharmony_ci	 * time we are called.
23108c2ecf20Sopenharmony_ci	 */
23118c2ecf20Sopenharmony_ci	if (wbc->range_cyclic && !done)
23128c2ecf20Sopenharmony_ci		done_index = 0;
23138c2ecf20Sopenharmony_ci	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
23148c2ecf20Sopenharmony_ci		mapping->writeback_index = done_index;
23158c2ecf20Sopenharmony_ci
23168c2ecf20Sopenharmony_ci	return ret;
23178c2ecf20Sopenharmony_ci}
23188c2ecf20Sopenharmony_ciEXPORT_SYMBOL(write_cache_pages);
23198c2ecf20Sopenharmony_ci
23208c2ecf20Sopenharmony_ci/*
23218c2ecf20Sopenharmony_ci * Function used by generic_writepages to call the real writepage
23228c2ecf20Sopenharmony_ci * function and set the mapping flags on error
23238c2ecf20Sopenharmony_ci */
23248c2ecf20Sopenharmony_cistatic int __writepage(struct page *page, struct writeback_control *wbc,
23258c2ecf20Sopenharmony_ci		       void *data)
23268c2ecf20Sopenharmony_ci{
23278c2ecf20Sopenharmony_ci	struct address_space *mapping = data;
23288c2ecf20Sopenharmony_ci	int ret = mapping->a_ops->writepage(page, wbc);
23298c2ecf20Sopenharmony_ci	mapping_set_error(mapping, ret);
23308c2ecf20Sopenharmony_ci	return ret;
23318c2ecf20Sopenharmony_ci}
23328c2ecf20Sopenharmony_ci
23338c2ecf20Sopenharmony_ci/**
23348c2ecf20Sopenharmony_ci * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them.
23358c2ecf20Sopenharmony_ci * @mapping: address space structure to write
23368c2ecf20Sopenharmony_ci * @wbc: subtract the number of written pages from *@wbc->nr_to_write
23378c2ecf20Sopenharmony_ci *
23388c2ecf20Sopenharmony_ci * This is a library function, which implements the writepages()
23398c2ecf20Sopenharmony_ci * address_space_operation.
23408c2ecf20Sopenharmony_ci *
23418c2ecf20Sopenharmony_ci * Return: %0 on success, negative error code otherwise
23428c2ecf20Sopenharmony_ci */
23438c2ecf20Sopenharmony_ciint generic_writepages(struct address_space *mapping,
23448c2ecf20Sopenharmony_ci		       struct writeback_control *wbc)
23458c2ecf20Sopenharmony_ci{
23468c2ecf20Sopenharmony_ci	struct blk_plug plug;
23478c2ecf20Sopenharmony_ci	int ret;
23488c2ecf20Sopenharmony_ci
23498c2ecf20Sopenharmony_ci	/* deal with chardevs and other special file */
23508c2ecf20Sopenharmony_ci	if (!mapping->a_ops->writepage)
23518c2ecf20Sopenharmony_ci		return 0;
23528c2ecf20Sopenharmony_ci
23538c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
23548c2ecf20Sopenharmony_ci	ret = write_cache_pages(mapping, wbc, __writepage, mapping);
23558c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
23568c2ecf20Sopenharmony_ci	return ret;
23578c2ecf20Sopenharmony_ci}
23588c2ecf20Sopenharmony_ci
23598c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_writepages);
23608c2ecf20Sopenharmony_ci
23618c2ecf20Sopenharmony_ciint do_writepages(struct address_space *mapping, struct writeback_control *wbc)
23628c2ecf20Sopenharmony_ci{
23638c2ecf20Sopenharmony_ci	int ret;
23648c2ecf20Sopenharmony_ci
23658c2ecf20Sopenharmony_ci	if (wbc->nr_to_write <= 0)
23668c2ecf20Sopenharmony_ci		return 0;
23678c2ecf20Sopenharmony_ci	while (1) {
23688c2ecf20Sopenharmony_ci		if (mapping->a_ops->writepages)
23698c2ecf20Sopenharmony_ci			ret = mapping->a_ops->writepages(mapping, wbc);
23708c2ecf20Sopenharmony_ci		else
23718c2ecf20Sopenharmony_ci			ret = generic_writepages(mapping, wbc);
23728c2ecf20Sopenharmony_ci		if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
23738c2ecf20Sopenharmony_ci			break;
23748c2ecf20Sopenharmony_ci		cond_resched();
23758c2ecf20Sopenharmony_ci		congestion_wait(BLK_RW_ASYNC, HZ/50);
23768c2ecf20Sopenharmony_ci	}
23778c2ecf20Sopenharmony_ci	return ret;
23788c2ecf20Sopenharmony_ci}
23798c2ecf20Sopenharmony_ci
23808c2ecf20Sopenharmony_ci/**
23818c2ecf20Sopenharmony_ci * write_one_page - write out a single page and wait on I/O
23828c2ecf20Sopenharmony_ci * @page: the page to write
23838c2ecf20Sopenharmony_ci *
23848c2ecf20Sopenharmony_ci * The page must be locked by the caller and will be unlocked upon return.
23858c2ecf20Sopenharmony_ci *
23868c2ecf20Sopenharmony_ci * Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this
23878c2ecf20Sopenharmony_ci * function returns.
23888c2ecf20Sopenharmony_ci *
23898c2ecf20Sopenharmony_ci * Return: %0 on success, negative error code otherwise
23908c2ecf20Sopenharmony_ci */
23918c2ecf20Sopenharmony_ciint write_one_page(struct page *page)
23928c2ecf20Sopenharmony_ci{
23938c2ecf20Sopenharmony_ci	struct address_space *mapping = page->mapping;
23948c2ecf20Sopenharmony_ci	int ret = 0;
23958c2ecf20Sopenharmony_ci	struct writeback_control wbc = {
23968c2ecf20Sopenharmony_ci		.sync_mode = WB_SYNC_ALL,
23978c2ecf20Sopenharmony_ci		.nr_to_write = 1,
23988c2ecf20Sopenharmony_ci	};
23998c2ecf20Sopenharmony_ci
24008c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
24018c2ecf20Sopenharmony_ci
24028c2ecf20Sopenharmony_ci	wait_on_page_writeback(page);
24038c2ecf20Sopenharmony_ci
24048c2ecf20Sopenharmony_ci	if (clear_page_dirty_for_io(page)) {
24058c2ecf20Sopenharmony_ci		get_page(page);
24068c2ecf20Sopenharmony_ci		ret = mapping->a_ops->writepage(page, &wbc);
24078c2ecf20Sopenharmony_ci		if (ret == 0)
24088c2ecf20Sopenharmony_ci			wait_on_page_writeback(page);
24098c2ecf20Sopenharmony_ci		put_page(page);
24108c2ecf20Sopenharmony_ci	} else {
24118c2ecf20Sopenharmony_ci		unlock_page(page);
24128c2ecf20Sopenharmony_ci	}
24138c2ecf20Sopenharmony_ci
24148c2ecf20Sopenharmony_ci	if (!ret)
24158c2ecf20Sopenharmony_ci		ret = filemap_check_errors(mapping);
24168c2ecf20Sopenharmony_ci	return ret;
24178c2ecf20Sopenharmony_ci}
24188c2ecf20Sopenharmony_ciEXPORT_SYMBOL(write_one_page);
24198c2ecf20Sopenharmony_ci
24208c2ecf20Sopenharmony_ci/*
24218c2ecf20Sopenharmony_ci * For address_spaces which do not use buffers nor write back.
24228c2ecf20Sopenharmony_ci */
24238c2ecf20Sopenharmony_ciint __set_page_dirty_no_writeback(struct page *page)
24248c2ecf20Sopenharmony_ci{
24258c2ecf20Sopenharmony_ci	if (!PageDirty(page))
24268c2ecf20Sopenharmony_ci		return !TestSetPageDirty(page);
24278c2ecf20Sopenharmony_ci	return 0;
24288c2ecf20Sopenharmony_ci}
24298c2ecf20Sopenharmony_ci
24308c2ecf20Sopenharmony_ci/*
24318c2ecf20Sopenharmony_ci * Helper function for set_page_dirty family.
24328c2ecf20Sopenharmony_ci *
24338c2ecf20Sopenharmony_ci * Caller must hold lock_page_memcg().
24348c2ecf20Sopenharmony_ci *
24358c2ecf20Sopenharmony_ci * NOTE: This relies on being atomic wrt interrupts.
24368c2ecf20Sopenharmony_ci */
24378c2ecf20Sopenharmony_civoid account_page_dirtied(struct page *page, struct address_space *mapping)
24388c2ecf20Sopenharmony_ci{
24398c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
24408c2ecf20Sopenharmony_ci
24418c2ecf20Sopenharmony_ci	trace_writeback_dirty_page(page, mapping);
24428c2ecf20Sopenharmony_ci
24438c2ecf20Sopenharmony_ci	if (mapping_can_writeback(mapping)) {
24448c2ecf20Sopenharmony_ci		struct bdi_writeback *wb;
24458c2ecf20Sopenharmony_ci
24468c2ecf20Sopenharmony_ci		inode_attach_wb(inode, page);
24478c2ecf20Sopenharmony_ci		wb = inode_to_wb(inode);
24488c2ecf20Sopenharmony_ci
24498c2ecf20Sopenharmony_ci		__inc_lruvec_page_state(page, NR_FILE_DIRTY);
24508c2ecf20Sopenharmony_ci		__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
24518c2ecf20Sopenharmony_ci		__inc_node_page_state(page, NR_DIRTIED);
24528c2ecf20Sopenharmony_ci		inc_wb_stat(wb, WB_RECLAIMABLE);
24538c2ecf20Sopenharmony_ci		inc_wb_stat(wb, WB_DIRTIED);
24548c2ecf20Sopenharmony_ci		task_io_account_write(PAGE_SIZE);
24558c2ecf20Sopenharmony_ci		current->nr_dirtied++;
24568c2ecf20Sopenharmony_ci		this_cpu_inc(bdp_ratelimits);
24578c2ecf20Sopenharmony_ci
24588c2ecf20Sopenharmony_ci		mem_cgroup_track_foreign_dirty(page, wb);
24598c2ecf20Sopenharmony_ci	}
24608c2ecf20Sopenharmony_ci}
24618c2ecf20Sopenharmony_ci
24628c2ecf20Sopenharmony_ci/*
24638c2ecf20Sopenharmony_ci * Helper function for deaccounting dirty page without writeback.
24648c2ecf20Sopenharmony_ci *
24658c2ecf20Sopenharmony_ci * Caller must hold lock_page_memcg().
24668c2ecf20Sopenharmony_ci */
24678c2ecf20Sopenharmony_civoid account_page_cleaned(struct page *page, struct address_space *mapping,
24688c2ecf20Sopenharmony_ci			  struct bdi_writeback *wb)
24698c2ecf20Sopenharmony_ci{
24708c2ecf20Sopenharmony_ci	if (mapping_can_writeback(mapping)) {
24718c2ecf20Sopenharmony_ci		dec_lruvec_page_state(page, NR_FILE_DIRTY);
24728c2ecf20Sopenharmony_ci		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
24738c2ecf20Sopenharmony_ci		dec_wb_stat(wb, WB_RECLAIMABLE);
24748c2ecf20Sopenharmony_ci		task_io_account_cancelled_write(PAGE_SIZE);
24758c2ecf20Sopenharmony_ci	}
24768c2ecf20Sopenharmony_ci}
24778c2ecf20Sopenharmony_ci
24788c2ecf20Sopenharmony_ci/*
24798c2ecf20Sopenharmony_ci * For address_spaces which do not use buffers.  Just tag the page as dirty in
24808c2ecf20Sopenharmony_ci * the xarray.
24818c2ecf20Sopenharmony_ci *
24828c2ecf20Sopenharmony_ci * This is also used when a single buffer is being dirtied: we want to set the
24838c2ecf20Sopenharmony_ci * page dirty in that case, but not all the buffers.  This is a "bottom-up"
24848c2ecf20Sopenharmony_ci * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
24858c2ecf20Sopenharmony_ci *
24868c2ecf20Sopenharmony_ci * The caller must ensure this doesn't race with truncation.  Most will simply
24878c2ecf20Sopenharmony_ci * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and
24888c2ecf20Sopenharmony_ci * the pte lock held, which also locks out truncation.
24898c2ecf20Sopenharmony_ci */
24908c2ecf20Sopenharmony_ciint __set_page_dirty_nobuffers(struct page *page)
24918c2ecf20Sopenharmony_ci{
24928c2ecf20Sopenharmony_ci	lock_page_memcg(page);
24938c2ecf20Sopenharmony_ci	if (!TestSetPageDirty(page)) {
24948c2ecf20Sopenharmony_ci		struct address_space *mapping = page_mapping(page);
24958c2ecf20Sopenharmony_ci		unsigned long flags;
24968c2ecf20Sopenharmony_ci
24978c2ecf20Sopenharmony_ci		if (!mapping) {
24988c2ecf20Sopenharmony_ci			unlock_page_memcg(page);
24998c2ecf20Sopenharmony_ci			return 1;
25008c2ecf20Sopenharmony_ci		}
25018c2ecf20Sopenharmony_ci
25028c2ecf20Sopenharmony_ci		xa_lock_irqsave(&mapping->i_pages, flags);
25038c2ecf20Sopenharmony_ci		BUG_ON(page_mapping(page) != mapping);
25048c2ecf20Sopenharmony_ci		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
25058c2ecf20Sopenharmony_ci		account_page_dirtied(page, mapping);
25068c2ecf20Sopenharmony_ci		__xa_set_mark(&mapping->i_pages, page_index(page),
25078c2ecf20Sopenharmony_ci				   PAGECACHE_TAG_DIRTY);
25088c2ecf20Sopenharmony_ci		xa_unlock_irqrestore(&mapping->i_pages, flags);
25098c2ecf20Sopenharmony_ci		unlock_page_memcg(page);
25108c2ecf20Sopenharmony_ci
25118c2ecf20Sopenharmony_ci		if (mapping->host) {
25128c2ecf20Sopenharmony_ci			/* !PageAnon && !swapper_space */
25138c2ecf20Sopenharmony_ci			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
25148c2ecf20Sopenharmony_ci		}
25158c2ecf20Sopenharmony_ci		return 1;
25168c2ecf20Sopenharmony_ci	}
25178c2ecf20Sopenharmony_ci	unlock_page_memcg(page);
25188c2ecf20Sopenharmony_ci	return 0;
25198c2ecf20Sopenharmony_ci}
25208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__set_page_dirty_nobuffers);
25218c2ecf20Sopenharmony_ci
25228c2ecf20Sopenharmony_ci/*
25238c2ecf20Sopenharmony_ci * Call this whenever redirtying a page, to de-account the dirty counters
25248c2ecf20Sopenharmony_ci * (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written
25258c2ecf20Sopenharmony_ci * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to
25268c2ecf20Sopenharmony_ci * systematic errors in balanced_dirty_ratelimit and the dirty pages position
25278c2ecf20Sopenharmony_ci * control.
25288c2ecf20Sopenharmony_ci */
25298c2ecf20Sopenharmony_civoid account_page_redirty(struct page *page)
25308c2ecf20Sopenharmony_ci{
25318c2ecf20Sopenharmony_ci	struct address_space *mapping = page->mapping;
25328c2ecf20Sopenharmony_ci
25338c2ecf20Sopenharmony_ci	if (mapping && mapping_can_writeback(mapping)) {
25348c2ecf20Sopenharmony_ci		struct inode *inode = mapping->host;
25358c2ecf20Sopenharmony_ci		struct bdi_writeback *wb;
25368c2ecf20Sopenharmony_ci		struct wb_lock_cookie cookie = {};
25378c2ecf20Sopenharmony_ci
25388c2ecf20Sopenharmony_ci		wb = unlocked_inode_to_wb_begin(inode, &cookie);
25398c2ecf20Sopenharmony_ci		current->nr_dirtied--;
25408c2ecf20Sopenharmony_ci		dec_node_page_state(page, NR_DIRTIED);
25418c2ecf20Sopenharmony_ci		dec_wb_stat(wb, WB_DIRTIED);
25428c2ecf20Sopenharmony_ci		unlocked_inode_to_wb_end(inode, &cookie);
25438c2ecf20Sopenharmony_ci	}
25448c2ecf20Sopenharmony_ci}
25458c2ecf20Sopenharmony_ciEXPORT_SYMBOL(account_page_redirty);
25468c2ecf20Sopenharmony_ci
25478c2ecf20Sopenharmony_ci/*
25488c2ecf20Sopenharmony_ci * When a writepage implementation decides that it doesn't want to write this
25498c2ecf20Sopenharmony_ci * page for some reason, it should redirty the locked page via
25508c2ecf20Sopenharmony_ci * redirty_page_for_writepage() and it should then unlock the page and return 0
25518c2ecf20Sopenharmony_ci */
25528c2ecf20Sopenharmony_ciint redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
25538c2ecf20Sopenharmony_ci{
25548c2ecf20Sopenharmony_ci	int ret;
25558c2ecf20Sopenharmony_ci
25568c2ecf20Sopenharmony_ci	wbc->pages_skipped++;
25578c2ecf20Sopenharmony_ci	ret = __set_page_dirty_nobuffers(page);
25588c2ecf20Sopenharmony_ci	account_page_redirty(page);
25598c2ecf20Sopenharmony_ci	return ret;
25608c2ecf20Sopenharmony_ci}
25618c2ecf20Sopenharmony_ciEXPORT_SYMBOL(redirty_page_for_writepage);
25628c2ecf20Sopenharmony_ci
25638c2ecf20Sopenharmony_ci/*
25648c2ecf20Sopenharmony_ci * Dirty a page.
25658c2ecf20Sopenharmony_ci *
25668c2ecf20Sopenharmony_ci * For pages with a mapping this should be done under the page lock
25678c2ecf20Sopenharmony_ci * for the benefit of asynchronous memory errors who prefer a consistent
25688c2ecf20Sopenharmony_ci * dirty state. This rule can be broken in some special cases,
25698c2ecf20Sopenharmony_ci * but should be better not to.
25708c2ecf20Sopenharmony_ci *
25718c2ecf20Sopenharmony_ci * If the mapping doesn't provide a set_page_dirty a_op, then
25728c2ecf20Sopenharmony_ci * just fall through and assume that it wants buffer_heads.
25738c2ecf20Sopenharmony_ci */
25748c2ecf20Sopenharmony_ciint set_page_dirty(struct page *page)
25758c2ecf20Sopenharmony_ci{
25768c2ecf20Sopenharmony_ci	struct address_space *mapping = page_mapping(page);
25778c2ecf20Sopenharmony_ci
25788c2ecf20Sopenharmony_ci	page = compound_head(page);
25798c2ecf20Sopenharmony_ci	if (likely(mapping)) {
25808c2ecf20Sopenharmony_ci		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
25818c2ecf20Sopenharmony_ci		/*
25828c2ecf20Sopenharmony_ci		 * readahead/lru_deactivate_page could remain
25838c2ecf20Sopenharmony_ci		 * PG_readahead/PG_reclaim due to race with end_page_writeback
25848c2ecf20Sopenharmony_ci		 * About readahead, if the page is written, the flags would be
25858c2ecf20Sopenharmony_ci		 * reset. So no problem.
25868c2ecf20Sopenharmony_ci		 * About lru_deactivate_page, if the page is redirty, the flag
25878c2ecf20Sopenharmony_ci		 * will be reset. So no problem. but if the page is used by readahead
25888c2ecf20Sopenharmony_ci		 * it will confuse readahead and make it restart the size rampup
25898c2ecf20Sopenharmony_ci		 * process. But it's a trivial problem.
25908c2ecf20Sopenharmony_ci		 */
25918c2ecf20Sopenharmony_ci		if (PageReclaim(page))
25928c2ecf20Sopenharmony_ci			ClearPageReclaim(page);
25938c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
25948c2ecf20Sopenharmony_ci		if (!spd)
25958c2ecf20Sopenharmony_ci			spd = __set_page_dirty_buffers;
25968c2ecf20Sopenharmony_ci#endif
25978c2ecf20Sopenharmony_ci		return (*spd)(page);
25988c2ecf20Sopenharmony_ci	}
25998c2ecf20Sopenharmony_ci	if (!PageDirty(page)) {
26008c2ecf20Sopenharmony_ci		if (!TestSetPageDirty(page))
26018c2ecf20Sopenharmony_ci			return 1;
26028c2ecf20Sopenharmony_ci	}
26038c2ecf20Sopenharmony_ci	return 0;
26048c2ecf20Sopenharmony_ci}
26058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(set_page_dirty);
26068c2ecf20Sopenharmony_ci
26078c2ecf20Sopenharmony_ci/*
26088c2ecf20Sopenharmony_ci * set_page_dirty() is racy if the caller has no reference against
26098c2ecf20Sopenharmony_ci * page->mapping->host, and if the page is unlocked.  This is because another
26108c2ecf20Sopenharmony_ci * CPU could truncate the page off the mapping and then free the mapping.
26118c2ecf20Sopenharmony_ci *
26128c2ecf20Sopenharmony_ci * Usually, the page _is_ locked, or the caller is a user-space process which
26138c2ecf20Sopenharmony_ci * holds a reference on the inode by having an open file.
26148c2ecf20Sopenharmony_ci *
26158c2ecf20Sopenharmony_ci * In other cases, the page should be locked before running set_page_dirty().
26168c2ecf20Sopenharmony_ci */
26178c2ecf20Sopenharmony_ciint set_page_dirty_lock(struct page *page)
26188c2ecf20Sopenharmony_ci{
26198c2ecf20Sopenharmony_ci	int ret;
26208c2ecf20Sopenharmony_ci
26218c2ecf20Sopenharmony_ci	lock_page(page);
26228c2ecf20Sopenharmony_ci	ret = set_page_dirty(page);
26238c2ecf20Sopenharmony_ci	unlock_page(page);
26248c2ecf20Sopenharmony_ci	return ret;
26258c2ecf20Sopenharmony_ci}
26268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(set_page_dirty_lock);
26278c2ecf20Sopenharmony_ci
26288c2ecf20Sopenharmony_ci/*
26298c2ecf20Sopenharmony_ci * This cancels just the dirty bit on the kernel page itself, it does NOT
26308c2ecf20Sopenharmony_ci * actually remove dirty bits on any mmap's that may be around. It also
26318c2ecf20Sopenharmony_ci * leaves the page tagged dirty, so any sync activity will still find it on
26328c2ecf20Sopenharmony_ci * the dirty lists, and in particular, clear_page_dirty_for_io() will still
26338c2ecf20Sopenharmony_ci * look at the dirty bits in the VM.
26348c2ecf20Sopenharmony_ci *
26358c2ecf20Sopenharmony_ci * Doing this should *normally* only ever be done when a page is truncated,
26368c2ecf20Sopenharmony_ci * and is not actually mapped anywhere at all. However, fs/buffer.c does
26378c2ecf20Sopenharmony_ci * this when it notices that somebody has cleaned out all the buffers on a
26388c2ecf20Sopenharmony_ci * page without actually doing it through the VM. Can you say "ext3 is
26398c2ecf20Sopenharmony_ci * horribly ugly"? Thought you could.
26408c2ecf20Sopenharmony_ci */
26418c2ecf20Sopenharmony_civoid __cancel_dirty_page(struct page *page)
26428c2ecf20Sopenharmony_ci{
26438c2ecf20Sopenharmony_ci	struct address_space *mapping = page_mapping(page);
26448c2ecf20Sopenharmony_ci
26458c2ecf20Sopenharmony_ci	if (mapping_can_writeback(mapping)) {
26468c2ecf20Sopenharmony_ci		struct inode *inode = mapping->host;
26478c2ecf20Sopenharmony_ci		struct bdi_writeback *wb;
26488c2ecf20Sopenharmony_ci		struct wb_lock_cookie cookie = {};
26498c2ecf20Sopenharmony_ci
26508c2ecf20Sopenharmony_ci		lock_page_memcg(page);
26518c2ecf20Sopenharmony_ci		wb = unlocked_inode_to_wb_begin(inode, &cookie);
26528c2ecf20Sopenharmony_ci
26538c2ecf20Sopenharmony_ci		if (TestClearPageDirty(page))
26548c2ecf20Sopenharmony_ci			account_page_cleaned(page, mapping, wb);
26558c2ecf20Sopenharmony_ci
26568c2ecf20Sopenharmony_ci		unlocked_inode_to_wb_end(inode, &cookie);
26578c2ecf20Sopenharmony_ci		unlock_page_memcg(page);
26588c2ecf20Sopenharmony_ci	} else {
26598c2ecf20Sopenharmony_ci		ClearPageDirty(page);
26608c2ecf20Sopenharmony_ci	}
26618c2ecf20Sopenharmony_ci}
26628c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__cancel_dirty_page);
26638c2ecf20Sopenharmony_ci
26648c2ecf20Sopenharmony_ci/*
26658c2ecf20Sopenharmony_ci * Clear a page's dirty flag, while caring for dirty memory accounting.
26668c2ecf20Sopenharmony_ci * Returns true if the page was previously dirty.
26678c2ecf20Sopenharmony_ci *
26688c2ecf20Sopenharmony_ci * This is for preparing to put the page under writeout.  We leave the page
26698c2ecf20Sopenharmony_ci * tagged as dirty in the xarray so that a concurrent write-for-sync
26708c2ecf20Sopenharmony_ci * can discover it via a PAGECACHE_TAG_DIRTY walk.  The ->writepage
26718c2ecf20Sopenharmony_ci * implementation will run either set_page_writeback() or set_page_dirty(),
26728c2ecf20Sopenharmony_ci * at which stage we bring the page's dirty flag and xarray dirty tag
26738c2ecf20Sopenharmony_ci * back into sync.
26748c2ecf20Sopenharmony_ci *
26758c2ecf20Sopenharmony_ci * This incoherency between the page's dirty flag and xarray tag is
26768c2ecf20Sopenharmony_ci * unfortunate, but it only exists while the page is locked.
26778c2ecf20Sopenharmony_ci */
26788c2ecf20Sopenharmony_ciint clear_page_dirty_for_io(struct page *page)
26798c2ecf20Sopenharmony_ci{
26808c2ecf20Sopenharmony_ci	struct address_space *mapping = page_mapping(page);
26818c2ecf20Sopenharmony_ci	int ret = 0;
26828c2ecf20Sopenharmony_ci
26838c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(!PageLocked(page), page);
26848c2ecf20Sopenharmony_ci
26858c2ecf20Sopenharmony_ci	if (mapping && mapping_can_writeback(mapping)) {
26868c2ecf20Sopenharmony_ci		struct inode *inode = mapping->host;
26878c2ecf20Sopenharmony_ci		struct bdi_writeback *wb;
26888c2ecf20Sopenharmony_ci		struct wb_lock_cookie cookie = {};
26898c2ecf20Sopenharmony_ci
26908c2ecf20Sopenharmony_ci		/*
26918c2ecf20Sopenharmony_ci		 * Yes, Virginia, this is indeed insane.
26928c2ecf20Sopenharmony_ci		 *
26938c2ecf20Sopenharmony_ci		 * We use this sequence to make sure that
26948c2ecf20Sopenharmony_ci		 *  (a) we account for dirty stats properly
26958c2ecf20Sopenharmony_ci		 *  (b) we tell the low-level filesystem to
26968c2ecf20Sopenharmony_ci		 *      mark the whole page dirty if it was
26978c2ecf20Sopenharmony_ci		 *      dirty in a pagetable. Only to then
26988c2ecf20Sopenharmony_ci		 *  (c) clean the page again and return 1 to
26998c2ecf20Sopenharmony_ci		 *      cause the writeback.
27008c2ecf20Sopenharmony_ci		 *
27018c2ecf20Sopenharmony_ci		 * This way we avoid all nasty races with the
27028c2ecf20Sopenharmony_ci		 * dirty bit in multiple places and clearing
27038c2ecf20Sopenharmony_ci		 * them concurrently from different threads.
27048c2ecf20Sopenharmony_ci		 *
27058c2ecf20Sopenharmony_ci		 * Note! Normally the "set_page_dirty(page)"
27068c2ecf20Sopenharmony_ci		 * has no effect on the actual dirty bit - since
27078c2ecf20Sopenharmony_ci		 * that will already usually be set. But we
27088c2ecf20Sopenharmony_ci		 * need the side effects, and it can help us
27098c2ecf20Sopenharmony_ci		 * avoid races.
27108c2ecf20Sopenharmony_ci		 *
27118c2ecf20Sopenharmony_ci		 * We basically use the page "master dirty bit"
27128c2ecf20Sopenharmony_ci		 * as a serialization point for all the different
27138c2ecf20Sopenharmony_ci		 * threads doing their things.
27148c2ecf20Sopenharmony_ci		 */
27158c2ecf20Sopenharmony_ci		if (page_mkclean(page))
27168c2ecf20Sopenharmony_ci			set_page_dirty(page);
27178c2ecf20Sopenharmony_ci		/*
27188c2ecf20Sopenharmony_ci		 * We carefully synchronise fault handlers against
27198c2ecf20Sopenharmony_ci		 * installing a dirty pte and marking the page dirty
27208c2ecf20Sopenharmony_ci		 * at this point.  We do this by having them hold the
27218c2ecf20Sopenharmony_ci		 * page lock while dirtying the page, and pages are
27228c2ecf20Sopenharmony_ci		 * always locked coming in here, so we get the desired
27238c2ecf20Sopenharmony_ci		 * exclusion.
27248c2ecf20Sopenharmony_ci		 */
27258c2ecf20Sopenharmony_ci		wb = unlocked_inode_to_wb_begin(inode, &cookie);
27268c2ecf20Sopenharmony_ci		if (TestClearPageDirty(page)) {
27278c2ecf20Sopenharmony_ci			dec_lruvec_page_state(page, NR_FILE_DIRTY);
27288c2ecf20Sopenharmony_ci			dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
27298c2ecf20Sopenharmony_ci			dec_wb_stat(wb, WB_RECLAIMABLE);
27308c2ecf20Sopenharmony_ci			ret = 1;
27318c2ecf20Sopenharmony_ci		}
27328c2ecf20Sopenharmony_ci		unlocked_inode_to_wb_end(inode, &cookie);
27338c2ecf20Sopenharmony_ci		return ret;
27348c2ecf20Sopenharmony_ci	}
27358c2ecf20Sopenharmony_ci	return TestClearPageDirty(page);
27368c2ecf20Sopenharmony_ci}
27378c2ecf20Sopenharmony_ciEXPORT_SYMBOL(clear_page_dirty_for_io);
27388c2ecf20Sopenharmony_ci
27398c2ecf20Sopenharmony_ciint test_clear_page_writeback(struct page *page)
27408c2ecf20Sopenharmony_ci{
27418c2ecf20Sopenharmony_ci	struct address_space *mapping = page_mapping(page);
27428c2ecf20Sopenharmony_ci	struct mem_cgroup *memcg;
27438c2ecf20Sopenharmony_ci	struct lruvec *lruvec;
27448c2ecf20Sopenharmony_ci	int ret;
27458c2ecf20Sopenharmony_ci
27468c2ecf20Sopenharmony_ci	memcg = lock_page_memcg(page);
27478c2ecf20Sopenharmony_ci	lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
27488c2ecf20Sopenharmony_ci	if (mapping && mapping_use_writeback_tags(mapping)) {
27498c2ecf20Sopenharmony_ci		struct inode *inode = mapping->host;
27508c2ecf20Sopenharmony_ci		struct backing_dev_info *bdi = inode_to_bdi(inode);
27518c2ecf20Sopenharmony_ci		unsigned long flags;
27528c2ecf20Sopenharmony_ci
27538c2ecf20Sopenharmony_ci		xa_lock_irqsave(&mapping->i_pages, flags);
27548c2ecf20Sopenharmony_ci		ret = TestClearPageWriteback(page);
27558c2ecf20Sopenharmony_ci		if (ret) {
27568c2ecf20Sopenharmony_ci			__xa_clear_mark(&mapping->i_pages, page_index(page),
27578c2ecf20Sopenharmony_ci						PAGECACHE_TAG_WRITEBACK);
27588c2ecf20Sopenharmony_ci			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
27598c2ecf20Sopenharmony_ci				struct bdi_writeback *wb = inode_to_wb(inode);
27608c2ecf20Sopenharmony_ci
27618c2ecf20Sopenharmony_ci				dec_wb_stat(wb, WB_WRITEBACK);
27628c2ecf20Sopenharmony_ci				__wb_writeout_inc(wb);
27638c2ecf20Sopenharmony_ci			}
27648c2ecf20Sopenharmony_ci		}
27658c2ecf20Sopenharmony_ci
27668c2ecf20Sopenharmony_ci		if (mapping->host && !mapping_tagged(mapping,
27678c2ecf20Sopenharmony_ci						     PAGECACHE_TAG_WRITEBACK))
27688c2ecf20Sopenharmony_ci			sb_clear_inode_writeback(mapping->host);
27698c2ecf20Sopenharmony_ci
27708c2ecf20Sopenharmony_ci		xa_unlock_irqrestore(&mapping->i_pages, flags);
27718c2ecf20Sopenharmony_ci	} else {
27728c2ecf20Sopenharmony_ci		ret = TestClearPageWriteback(page);
27738c2ecf20Sopenharmony_ci	}
27748c2ecf20Sopenharmony_ci	if (ret) {
27758c2ecf20Sopenharmony_ci		dec_lruvec_state(lruvec, NR_WRITEBACK);
27768c2ecf20Sopenharmony_ci		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
27778c2ecf20Sopenharmony_ci		inc_node_page_state(page, NR_WRITTEN);
27788c2ecf20Sopenharmony_ci	}
27798c2ecf20Sopenharmony_ci	__unlock_page_memcg(memcg);
27808c2ecf20Sopenharmony_ci	return ret;
27818c2ecf20Sopenharmony_ci}
27828c2ecf20Sopenharmony_ci
27838c2ecf20Sopenharmony_ciint __test_set_page_writeback(struct page *page, bool keep_write)
27848c2ecf20Sopenharmony_ci{
27858c2ecf20Sopenharmony_ci	struct address_space *mapping = page_mapping(page);
27868c2ecf20Sopenharmony_ci	int ret, access_ret;
27878c2ecf20Sopenharmony_ci
27888c2ecf20Sopenharmony_ci	lock_page_memcg(page);
27898c2ecf20Sopenharmony_ci	if (mapping && mapping_use_writeback_tags(mapping)) {
27908c2ecf20Sopenharmony_ci		XA_STATE(xas, &mapping->i_pages, page_index(page));
27918c2ecf20Sopenharmony_ci		struct inode *inode = mapping->host;
27928c2ecf20Sopenharmony_ci		struct backing_dev_info *bdi = inode_to_bdi(inode);
27938c2ecf20Sopenharmony_ci		unsigned long flags;
27948c2ecf20Sopenharmony_ci
27958c2ecf20Sopenharmony_ci		xas_lock_irqsave(&xas, flags);
27968c2ecf20Sopenharmony_ci		xas_load(&xas);
27978c2ecf20Sopenharmony_ci		ret = TestSetPageWriteback(page);
27988c2ecf20Sopenharmony_ci		if (!ret) {
27998c2ecf20Sopenharmony_ci			bool on_wblist;
28008c2ecf20Sopenharmony_ci
28018c2ecf20Sopenharmony_ci			on_wblist = mapping_tagged(mapping,
28028c2ecf20Sopenharmony_ci						   PAGECACHE_TAG_WRITEBACK);
28038c2ecf20Sopenharmony_ci
28048c2ecf20Sopenharmony_ci			xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
28058c2ecf20Sopenharmony_ci			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT)
28068c2ecf20Sopenharmony_ci				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
28078c2ecf20Sopenharmony_ci
28088c2ecf20Sopenharmony_ci			/*
28098c2ecf20Sopenharmony_ci			 * We can come through here when swapping anonymous
28108c2ecf20Sopenharmony_ci			 * pages, so we don't necessarily have an inode to track
28118c2ecf20Sopenharmony_ci			 * for sync.
28128c2ecf20Sopenharmony_ci			 */
28138c2ecf20Sopenharmony_ci			if (mapping->host && !on_wblist)
28148c2ecf20Sopenharmony_ci				sb_mark_inode_writeback(mapping->host);
28158c2ecf20Sopenharmony_ci		}
28168c2ecf20Sopenharmony_ci		if (!PageDirty(page))
28178c2ecf20Sopenharmony_ci			xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
28188c2ecf20Sopenharmony_ci		if (!keep_write)
28198c2ecf20Sopenharmony_ci			xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
28208c2ecf20Sopenharmony_ci		xas_unlock_irqrestore(&xas, flags);
28218c2ecf20Sopenharmony_ci	} else {
28228c2ecf20Sopenharmony_ci		ret = TestSetPageWriteback(page);
28238c2ecf20Sopenharmony_ci	}
28248c2ecf20Sopenharmony_ci	if (!ret) {
28258c2ecf20Sopenharmony_ci		inc_lruvec_page_state(page, NR_WRITEBACK);
28268c2ecf20Sopenharmony_ci		inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
28278c2ecf20Sopenharmony_ci	}
28288c2ecf20Sopenharmony_ci	unlock_page_memcg(page);
28298c2ecf20Sopenharmony_ci	access_ret = arch_make_page_accessible(page);
28308c2ecf20Sopenharmony_ci	/*
28318c2ecf20Sopenharmony_ci	 * If writeback has been triggered on a page that cannot be made
28328c2ecf20Sopenharmony_ci	 * accessible, it is too late to recover here.
28338c2ecf20Sopenharmony_ci	 */
28348c2ecf20Sopenharmony_ci	VM_BUG_ON_PAGE(access_ret != 0, page);
28358c2ecf20Sopenharmony_ci
28368c2ecf20Sopenharmony_ci	return ret;
28378c2ecf20Sopenharmony_ci
28388c2ecf20Sopenharmony_ci}
28398c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__test_set_page_writeback);
28408c2ecf20Sopenharmony_ci
28418c2ecf20Sopenharmony_ci/*
28428c2ecf20Sopenharmony_ci * Wait for a page to complete writeback
28438c2ecf20Sopenharmony_ci */
28448c2ecf20Sopenharmony_civoid wait_on_page_writeback(struct page *page)
28458c2ecf20Sopenharmony_ci{
28468c2ecf20Sopenharmony_ci	while (PageWriteback(page)) {
28478c2ecf20Sopenharmony_ci		trace_wait_on_page_writeback(page, page_mapping(page));
28488c2ecf20Sopenharmony_ci		wait_on_page_bit(page, PG_writeback);
28498c2ecf20Sopenharmony_ci	}
28508c2ecf20Sopenharmony_ci}
28518c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(wait_on_page_writeback);
28528c2ecf20Sopenharmony_ci
28538c2ecf20Sopenharmony_ci/**
28548c2ecf20Sopenharmony_ci * wait_for_stable_page() - wait for writeback to finish, if necessary.
28558c2ecf20Sopenharmony_ci * @page:	The page to wait on.
28568c2ecf20Sopenharmony_ci *
28578c2ecf20Sopenharmony_ci * This function determines if the given page is related to a backing device
28588c2ecf20Sopenharmony_ci * that requires page contents to be held stable during writeback.  If so, then
28598c2ecf20Sopenharmony_ci * it will wait for any pending writeback to complete.
28608c2ecf20Sopenharmony_ci */
28618c2ecf20Sopenharmony_civoid wait_for_stable_page(struct page *page)
28628c2ecf20Sopenharmony_ci{
28638c2ecf20Sopenharmony_ci	page = thp_head(page);
28648c2ecf20Sopenharmony_ci	if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
28658c2ecf20Sopenharmony_ci		wait_on_page_writeback(page);
28668c2ecf20Sopenharmony_ci}
28678c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(wait_for_stable_page);
2868