162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * buffered writeback throttling. loosely based on CoDel. We can't drop
462306a36Sopenharmony_ci * packets for IO scheduling, so the logic is something like this:
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * - Monitor latencies in a defined window of time.
762306a36Sopenharmony_ci * - If the minimum latency in the above window exceeds some target, increment
862306a36Sopenharmony_ci *   scaling step and scale down queue depth by a factor of 2x. The monitoring
962306a36Sopenharmony_ci *   window is then shrunk to 100 / sqrt(scaling step + 1).
1062306a36Sopenharmony_ci * - For any window where we don't have solid data on what the latencies
1162306a36Sopenharmony_ci *   look like, retain status quo.
1262306a36Sopenharmony_ci * - If latencies look good, decrement scaling step.
1362306a36Sopenharmony_ci * - If we're only doing writes, allow the scaling step to go negative. This
1462306a36Sopenharmony_ci *   will temporarily boost write performance, snapping back to a stable
1562306a36Sopenharmony_ci *   scaling step of 0 if reads show up or the heavy writers finish. Unlike
1662306a36Sopenharmony_ci *   positive scaling steps where we shrink the monitoring window, a negative
1762306a36Sopenharmony_ci *   scaling step retains the default step==0 window size.
1862306a36Sopenharmony_ci *
1962306a36Sopenharmony_ci * Copyright (C) 2016 Jens Axboe
2062306a36Sopenharmony_ci *
2162306a36Sopenharmony_ci */
2262306a36Sopenharmony_ci#include <linux/kernel.h>
2362306a36Sopenharmony_ci#include <linux/blk_types.h>
2462306a36Sopenharmony_ci#include <linux/slab.h>
2562306a36Sopenharmony_ci#include <linux/backing-dev.h>
2662306a36Sopenharmony_ci#include <linux/swap.h>
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#include "blk-stat.h"
2962306a36Sopenharmony_ci#include "blk-wbt.h"
3062306a36Sopenharmony_ci#include "blk-rq-qos.h"
3162306a36Sopenharmony_ci#include "elevator.h"
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#define CREATE_TRACE_POINTS
3462306a36Sopenharmony_ci#include <trace/events/wbt.h>
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_cienum wbt_flags {
3762306a36Sopenharmony_ci	WBT_TRACKED		= 1,	/* write, tracked for throttling */
3862306a36Sopenharmony_ci	WBT_READ		= 2,	/* read */
3962306a36Sopenharmony_ci	WBT_KSWAPD		= 4,	/* write, from kswapd */
4062306a36Sopenharmony_ci	WBT_DISCARD		= 8,	/* discard */
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	WBT_NR_BITS		= 4,	/* number of bits */
4362306a36Sopenharmony_ci};
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_cienum {
4662306a36Sopenharmony_ci	WBT_RWQ_BG		= 0,
4762306a36Sopenharmony_ci	WBT_RWQ_KSWAPD,
4862306a36Sopenharmony_ci	WBT_RWQ_DISCARD,
4962306a36Sopenharmony_ci	WBT_NUM_RWQ,
5062306a36Sopenharmony_ci};
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci/*
5362306a36Sopenharmony_ci * If current state is WBT_STATE_ON/OFF_DEFAULT, it can be covered to any other
5462306a36Sopenharmony_ci * state, if current state is WBT_STATE_ON/OFF_MANUAL, it can only be covered
5562306a36Sopenharmony_ci * to WBT_STATE_OFF/ON_MANUAL.
5662306a36Sopenharmony_ci */
5762306a36Sopenharmony_cienum {
5862306a36Sopenharmony_ci	WBT_STATE_ON_DEFAULT	= 1,	/* on by default */
5962306a36Sopenharmony_ci	WBT_STATE_ON_MANUAL	= 2,	/* on manually by sysfs */
6062306a36Sopenharmony_ci	WBT_STATE_OFF_DEFAULT	= 3,	/* off by default */
6162306a36Sopenharmony_ci	WBT_STATE_OFF_MANUAL	= 4,	/* off manually by sysfs */
6262306a36Sopenharmony_ci};
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_cistruct rq_wb {
6562306a36Sopenharmony_ci	/*
6662306a36Sopenharmony_ci	 * Settings that govern how we throttle
6762306a36Sopenharmony_ci	 */
6862306a36Sopenharmony_ci	unsigned int wb_background;		/* background writeback */
6962306a36Sopenharmony_ci	unsigned int wb_normal;			/* normal writeback */
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	short enable_state;			/* WBT_STATE_* */
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	/*
7462306a36Sopenharmony_ci	 * Number of consecutive periods where we don't have enough
7562306a36Sopenharmony_ci	 * information to make a firm scale up/down decision.
7662306a36Sopenharmony_ci	 */
7762306a36Sopenharmony_ci	unsigned int unknown_cnt;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	u64 win_nsec;				/* default window size */
8062306a36Sopenharmony_ci	u64 cur_win_nsec;			/* current window size */
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	struct blk_stat_callback *cb;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	u64 sync_issue;
8562306a36Sopenharmony_ci	void *sync_cookie;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	unsigned int wc;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	unsigned long last_issue;		/* last non-throttled issue */
9062306a36Sopenharmony_ci	unsigned long last_comp;		/* last non-throttled comp */
9162306a36Sopenharmony_ci	unsigned long min_lat_nsec;
9262306a36Sopenharmony_ci	struct rq_qos rqos;
9362306a36Sopenharmony_ci	struct rq_wait rq_wait[WBT_NUM_RWQ];
9462306a36Sopenharmony_ci	struct rq_depth rq_depth;
9562306a36Sopenharmony_ci};
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cistatic inline struct rq_wb *RQWB(struct rq_qos *rqos)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	return container_of(rqos, struct rq_wb, rqos);
10062306a36Sopenharmony_ci}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_cistatic inline void wbt_clear_state(struct request *rq)
10362306a36Sopenharmony_ci{
10462306a36Sopenharmony_ci	rq->wbt_flags = 0;
10562306a36Sopenharmony_ci}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_cistatic inline enum wbt_flags wbt_flags(struct request *rq)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	return rq->wbt_flags;
11062306a36Sopenharmony_ci}
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_cistatic inline bool wbt_is_tracked(struct request *rq)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	return rq->wbt_flags & WBT_TRACKED;
11562306a36Sopenharmony_ci}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic inline bool wbt_is_read(struct request *rq)
11862306a36Sopenharmony_ci{
11962306a36Sopenharmony_ci	return rq->wbt_flags & WBT_READ;
12062306a36Sopenharmony_ci}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cienum {
12362306a36Sopenharmony_ci	/*
12462306a36Sopenharmony_ci	 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
12562306a36Sopenharmony_ci	 * from here depending on device stats
12662306a36Sopenharmony_ci	 */
12762306a36Sopenharmony_ci	RWB_DEF_DEPTH	= 16,
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	/*
13062306a36Sopenharmony_ci	 * 100msec window
13162306a36Sopenharmony_ci	 */
13262306a36Sopenharmony_ci	RWB_WINDOW_NSEC		= 100 * 1000 * 1000ULL,
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	/*
13562306a36Sopenharmony_ci	 * Disregard stats, if we don't meet this minimum
13662306a36Sopenharmony_ci	 */
13762306a36Sopenharmony_ci	RWB_MIN_WRITE_SAMPLES	= 3,
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	/*
14062306a36Sopenharmony_ci	 * If we have this number of consecutive windows with not enough
14162306a36Sopenharmony_ci	 * information to scale up or down, scale up.
14262306a36Sopenharmony_ci	 */
14362306a36Sopenharmony_ci	RWB_UNKNOWN_BUMP	= 5,
14462306a36Sopenharmony_ci};
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_cistatic inline bool rwb_enabled(struct rq_wb *rwb)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
14962306a36Sopenharmony_ci		      rwb->enable_state != WBT_STATE_OFF_MANUAL;
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_cistatic void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	if (rwb_enabled(rwb)) {
15562306a36Sopenharmony_ci		const unsigned long cur = jiffies;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci		if (cur != *var)
15862306a36Sopenharmony_ci			*var = cur;
15962306a36Sopenharmony_ci	}
16062306a36Sopenharmony_ci}
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci/*
16362306a36Sopenharmony_ci * If a task was rate throttled in balance_dirty_pages() within the last
16462306a36Sopenharmony_ci * second or so, use that to indicate a higher cleaning rate.
16562306a36Sopenharmony_ci */
16662306a36Sopenharmony_cistatic bool wb_recent_wait(struct rq_wb *rwb)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	return time_before(jiffies, bdi->last_bdp_sleep + HZ);
17162306a36Sopenharmony_ci}
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_cistatic inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
17462306a36Sopenharmony_ci					  enum wbt_flags wb_acct)
17562306a36Sopenharmony_ci{
17662306a36Sopenharmony_ci	if (wb_acct & WBT_KSWAPD)
17762306a36Sopenharmony_ci		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
17862306a36Sopenharmony_ci	else if (wb_acct & WBT_DISCARD)
17962306a36Sopenharmony_ci		return &rwb->rq_wait[WBT_RWQ_DISCARD];
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	return &rwb->rq_wait[WBT_RWQ_BG];
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cistatic void rwb_wake_all(struct rq_wb *rwb)
18562306a36Sopenharmony_ci{
18662306a36Sopenharmony_ci	int i;
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	for (i = 0; i < WBT_NUM_RWQ; i++) {
18962306a36Sopenharmony_ci		struct rq_wait *rqw = &rwb->rq_wait[i];
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci		if (wq_has_sleeper(&rqw->wait))
19262306a36Sopenharmony_ci			wake_up_all(&rqw->wait);
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_cistatic void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
19762306a36Sopenharmony_ci			 enum wbt_flags wb_acct)
19862306a36Sopenharmony_ci{
19962306a36Sopenharmony_ci	int inflight, limit;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	inflight = atomic_dec_return(&rqw->inflight);
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	/*
20462306a36Sopenharmony_ci	 * For discards, our limit is always the background. For writes, if
20562306a36Sopenharmony_ci	 * the device does write back caching, drop further down before we
20662306a36Sopenharmony_ci	 * wake people up.
20762306a36Sopenharmony_ci	 */
20862306a36Sopenharmony_ci	if (wb_acct & WBT_DISCARD)
20962306a36Sopenharmony_ci		limit = rwb->wb_background;
21062306a36Sopenharmony_ci	else if (rwb->wc && !wb_recent_wait(rwb))
21162306a36Sopenharmony_ci		limit = 0;
21262306a36Sopenharmony_ci	else
21362306a36Sopenharmony_ci		limit = rwb->wb_normal;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	/*
21662306a36Sopenharmony_ci	 * Don't wake anyone up if we are above the normal limit.
21762306a36Sopenharmony_ci	 */
21862306a36Sopenharmony_ci	if (inflight && inflight >= limit)
21962306a36Sopenharmony_ci		return;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	if (wq_has_sleeper(&rqw->wait)) {
22262306a36Sopenharmony_ci		int diff = limit - inflight;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci		if (!inflight || diff >= rwb->wb_background / 2)
22562306a36Sopenharmony_ci			wake_up_all(&rqw->wait);
22662306a36Sopenharmony_ci	}
22762306a36Sopenharmony_ci}
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_cistatic void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
23062306a36Sopenharmony_ci{
23162306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
23262306a36Sopenharmony_ci	struct rq_wait *rqw;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	if (!(wb_acct & WBT_TRACKED))
23562306a36Sopenharmony_ci		return;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	rqw = get_rq_wait(rwb, wb_acct);
23862306a36Sopenharmony_ci	wbt_rqw_done(rwb, rqw, wb_acct);
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci/*
24262306a36Sopenharmony_ci * Called on completion of a request. Note that it's also called when
24362306a36Sopenharmony_ci * a request is merged, when the request gets freed.
24462306a36Sopenharmony_ci */
24562306a36Sopenharmony_cistatic void wbt_done(struct rq_qos *rqos, struct request *rq)
24662306a36Sopenharmony_ci{
24762306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	if (!wbt_is_tracked(rq)) {
25062306a36Sopenharmony_ci		if (rwb->sync_cookie == rq) {
25162306a36Sopenharmony_ci			rwb->sync_issue = 0;
25262306a36Sopenharmony_ci			rwb->sync_cookie = NULL;
25362306a36Sopenharmony_ci		}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci		if (wbt_is_read(rq))
25662306a36Sopenharmony_ci			wb_timestamp(rwb, &rwb->last_comp);
25762306a36Sopenharmony_ci	} else {
25862306a36Sopenharmony_ci		WARN_ON_ONCE(rq == rwb->sync_cookie);
25962306a36Sopenharmony_ci		__wbt_done(rqos, wbt_flags(rq));
26062306a36Sopenharmony_ci	}
26162306a36Sopenharmony_ci	wbt_clear_state(rq);
26262306a36Sopenharmony_ci}
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_cistatic inline bool stat_sample_valid(struct blk_rq_stat *stat)
26562306a36Sopenharmony_ci{
26662306a36Sopenharmony_ci	/*
26762306a36Sopenharmony_ci	 * We need at least one read sample, and a minimum of
26862306a36Sopenharmony_ci	 * RWB_MIN_WRITE_SAMPLES. We require some write samples to know
26962306a36Sopenharmony_ci	 * that it's writes impacting us, and not just some sole read on
27062306a36Sopenharmony_ci	 * a device that is in a lower power state.
27162306a36Sopenharmony_ci	 */
27262306a36Sopenharmony_ci	return (stat[READ].nr_samples >= 1 &&
27362306a36Sopenharmony_ci		stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES);
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic u64 rwb_sync_issue_lat(struct rq_wb *rwb)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci	u64 now, issue = READ_ONCE(rwb->sync_issue);
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	if (!issue || !rwb->sync_cookie)
28162306a36Sopenharmony_ci		return 0;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	now = ktime_to_ns(ktime_get());
28462306a36Sopenharmony_ci	return now - issue;
28562306a36Sopenharmony_ci}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_cistatic inline unsigned int wbt_inflight(struct rq_wb *rwb)
28862306a36Sopenharmony_ci{
28962306a36Sopenharmony_ci	unsigned int i, ret = 0;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	for (i = 0; i < WBT_NUM_RWQ; i++)
29262306a36Sopenharmony_ci		ret += atomic_read(&rwb->rq_wait[i].inflight);
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	return ret;
29562306a36Sopenharmony_ci}
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_cienum {
29862306a36Sopenharmony_ci	LAT_OK = 1,
29962306a36Sopenharmony_ci	LAT_UNKNOWN,
30062306a36Sopenharmony_ci	LAT_UNKNOWN_WRITES,
30162306a36Sopenharmony_ci	LAT_EXCEEDED,
30262306a36Sopenharmony_ci};
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_cistatic int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
30562306a36Sopenharmony_ci{
30662306a36Sopenharmony_ci	struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
30762306a36Sopenharmony_ci	struct rq_depth *rqd = &rwb->rq_depth;
30862306a36Sopenharmony_ci	u64 thislat;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	/*
31162306a36Sopenharmony_ci	 * If our stored sync issue exceeds the window size, or it
31262306a36Sopenharmony_ci	 * exceeds our min target AND we haven't logged any entries,
31362306a36Sopenharmony_ci	 * flag the latency as exceeded. wbt works off completion latencies,
31462306a36Sopenharmony_ci	 * but for a flooded device, a single sync IO can take a long time
31562306a36Sopenharmony_ci	 * to complete after being issued. If this time exceeds our
31662306a36Sopenharmony_ci	 * monitoring window AND we didn't see any other completions in that
31762306a36Sopenharmony_ci	 * window, then count that sync IO as a violation of the latency.
31862306a36Sopenharmony_ci	 */
31962306a36Sopenharmony_ci	thislat = rwb_sync_issue_lat(rwb);
32062306a36Sopenharmony_ci	if (thislat > rwb->cur_win_nsec ||
32162306a36Sopenharmony_ci	    (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) {
32262306a36Sopenharmony_ci		trace_wbt_lat(bdi, thislat);
32362306a36Sopenharmony_ci		return LAT_EXCEEDED;
32462306a36Sopenharmony_ci	}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	/*
32762306a36Sopenharmony_ci	 * No read/write mix, if stat isn't valid
32862306a36Sopenharmony_ci	 */
32962306a36Sopenharmony_ci	if (!stat_sample_valid(stat)) {
33062306a36Sopenharmony_ci		/*
33162306a36Sopenharmony_ci		 * If we had writes in this stat window and the window is
33262306a36Sopenharmony_ci		 * current, we're only doing writes. If a task recently
33362306a36Sopenharmony_ci		 * waited or still has writes in flights, consider us doing
33462306a36Sopenharmony_ci		 * just writes as well.
33562306a36Sopenharmony_ci		 */
33662306a36Sopenharmony_ci		if (stat[WRITE].nr_samples || wb_recent_wait(rwb) ||
33762306a36Sopenharmony_ci		    wbt_inflight(rwb))
33862306a36Sopenharmony_ci			return LAT_UNKNOWN_WRITES;
33962306a36Sopenharmony_ci		return LAT_UNKNOWN;
34062306a36Sopenharmony_ci	}
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	/*
34362306a36Sopenharmony_ci	 * If the 'min' latency exceeds our target, step down.
34462306a36Sopenharmony_ci	 */
34562306a36Sopenharmony_ci	if (stat[READ].min > rwb->min_lat_nsec) {
34662306a36Sopenharmony_ci		trace_wbt_lat(bdi, stat[READ].min);
34762306a36Sopenharmony_ci		trace_wbt_stat(bdi, stat);
34862306a36Sopenharmony_ci		return LAT_EXCEEDED;
34962306a36Sopenharmony_ci	}
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	if (rqd->scale_step)
35262306a36Sopenharmony_ci		trace_wbt_stat(bdi, stat);
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	return LAT_OK;
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_cistatic void rwb_trace_step(struct rq_wb *rwb, const char *msg)
35862306a36Sopenharmony_ci{
35962306a36Sopenharmony_ci	struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
36062306a36Sopenharmony_ci	struct rq_depth *rqd = &rwb->rq_depth;
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
36362306a36Sopenharmony_ci			rwb->wb_background, rwb->wb_normal, rqd->max_depth);
36462306a36Sopenharmony_ci}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_cistatic void calc_wb_limits(struct rq_wb *rwb)
36762306a36Sopenharmony_ci{
36862306a36Sopenharmony_ci	if (rwb->min_lat_nsec == 0) {
36962306a36Sopenharmony_ci		rwb->wb_normal = rwb->wb_background = 0;
37062306a36Sopenharmony_ci	} else if (rwb->rq_depth.max_depth <= 2) {
37162306a36Sopenharmony_ci		rwb->wb_normal = rwb->rq_depth.max_depth;
37262306a36Sopenharmony_ci		rwb->wb_background = 1;
37362306a36Sopenharmony_ci	} else {
37462306a36Sopenharmony_ci		rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
37562306a36Sopenharmony_ci		rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
37662306a36Sopenharmony_ci	}
37762306a36Sopenharmony_ci}
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_cistatic void scale_up(struct rq_wb *rwb)
38062306a36Sopenharmony_ci{
38162306a36Sopenharmony_ci	if (!rq_depth_scale_up(&rwb->rq_depth))
38262306a36Sopenharmony_ci		return;
38362306a36Sopenharmony_ci	calc_wb_limits(rwb);
38462306a36Sopenharmony_ci	rwb->unknown_cnt = 0;
38562306a36Sopenharmony_ci	rwb_wake_all(rwb);
38662306a36Sopenharmony_ci	rwb_trace_step(rwb, tracepoint_string("scale up"));
38762306a36Sopenharmony_ci}
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_cistatic void scale_down(struct rq_wb *rwb, bool hard_throttle)
39062306a36Sopenharmony_ci{
39162306a36Sopenharmony_ci	if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
39262306a36Sopenharmony_ci		return;
39362306a36Sopenharmony_ci	calc_wb_limits(rwb);
39462306a36Sopenharmony_ci	rwb->unknown_cnt = 0;
39562306a36Sopenharmony_ci	rwb_trace_step(rwb, tracepoint_string("scale down"));
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_cistatic void rwb_arm_timer(struct rq_wb *rwb)
39962306a36Sopenharmony_ci{
40062306a36Sopenharmony_ci	struct rq_depth *rqd = &rwb->rq_depth;
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	if (rqd->scale_step > 0) {
40362306a36Sopenharmony_ci		/*
40462306a36Sopenharmony_ci		 * We should speed this up, using some variant of a fast
40562306a36Sopenharmony_ci		 * integer inverse square root calculation. Since we only do
40662306a36Sopenharmony_ci		 * this for every window expiration, it's not a huge deal,
40762306a36Sopenharmony_ci		 * though.
40862306a36Sopenharmony_ci		 */
40962306a36Sopenharmony_ci		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
41062306a36Sopenharmony_ci					int_sqrt((rqd->scale_step + 1) << 8));
41162306a36Sopenharmony_ci	} else {
41262306a36Sopenharmony_ci		/*
41362306a36Sopenharmony_ci		 * For step < 0, we don't want to increase/decrease the
41462306a36Sopenharmony_ci		 * window size.
41562306a36Sopenharmony_ci		 */
41662306a36Sopenharmony_ci		rwb->cur_win_nsec = rwb->win_nsec;
41762306a36Sopenharmony_ci	}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec);
42062306a36Sopenharmony_ci}
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_cistatic void wb_timer_fn(struct blk_stat_callback *cb)
42362306a36Sopenharmony_ci{
42462306a36Sopenharmony_ci	struct rq_wb *rwb = cb->data;
42562306a36Sopenharmony_ci	struct rq_depth *rqd = &rwb->rq_depth;
42662306a36Sopenharmony_ci	unsigned int inflight = wbt_inflight(rwb);
42762306a36Sopenharmony_ci	int status;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	if (!rwb->rqos.disk)
43062306a36Sopenharmony_ci		return;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	status = latency_exceeded(rwb, cb->stat);
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight);
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	/*
43762306a36Sopenharmony_ci	 * If we exceeded the latency target, step down. If we did not,
43862306a36Sopenharmony_ci	 * step one level up. If we don't know enough to say either exceeded
43962306a36Sopenharmony_ci	 * or ok, then don't do anything.
44062306a36Sopenharmony_ci	 */
44162306a36Sopenharmony_ci	switch (status) {
44262306a36Sopenharmony_ci	case LAT_EXCEEDED:
44362306a36Sopenharmony_ci		scale_down(rwb, true);
44462306a36Sopenharmony_ci		break;
44562306a36Sopenharmony_ci	case LAT_OK:
44662306a36Sopenharmony_ci		scale_up(rwb);
44762306a36Sopenharmony_ci		break;
44862306a36Sopenharmony_ci	case LAT_UNKNOWN_WRITES:
44962306a36Sopenharmony_ci		/*
45062306a36Sopenharmony_ci		 * We started a the center step, but don't have a valid
45162306a36Sopenharmony_ci		 * read/write sample, but we do have writes going on.
45262306a36Sopenharmony_ci		 * Allow step to go negative, to increase write perf.
45362306a36Sopenharmony_ci		 */
45462306a36Sopenharmony_ci		scale_up(rwb);
45562306a36Sopenharmony_ci		break;
45662306a36Sopenharmony_ci	case LAT_UNKNOWN:
45762306a36Sopenharmony_ci		if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP)
45862306a36Sopenharmony_ci			break;
45962306a36Sopenharmony_ci		/*
46062306a36Sopenharmony_ci		 * We get here when previously scaled reduced depth, and we
46162306a36Sopenharmony_ci		 * currently don't have a valid read/write sample. For that
46262306a36Sopenharmony_ci		 * case, slowly return to center state (step == 0).
46362306a36Sopenharmony_ci		 */
46462306a36Sopenharmony_ci		if (rqd->scale_step > 0)
46562306a36Sopenharmony_ci			scale_up(rwb);
46662306a36Sopenharmony_ci		else if (rqd->scale_step < 0)
46762306a36Sopenharmony_ci			scale_down(rwb, false);
46862306a36Sopenharmony_ci		break;
46962306a36Sopenharmony_ci	default:
47062306a36Sopenharmony_ci		break;
47162306a36Sopenharmony_ci	}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	/*
47462306a36Sopenharmony_ci	 * Re-arm timer, if we have IO in flight
47562306a36Sopenharmony_ci	 */
47662306a36Sopenharmony_ci	if (rqd->scale_step || inflight)
47762306a36Sopenharmony_ci		rwb_arm_timer(rwb);
47862306a36Sopenharmony_ci}
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_cistatic void wbt_update_limits(struct rq_wb *rwb)
48162306a36Sopenharmony_ci{
48262306a36Sopenharmony_ci	struct rq_depth *rqd = &rwb->rq_depth;
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	rqd->scale_step = 0;
48562306a36Sopenharmony_ci	rqd->scaled_max = false;
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	rq_depth_calc_max_depth(rqd);
48862306a36Sopenharmony_ci	calc_wb_limits(rwb);
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	rwb_wake_all(rwb);
49162306a36Sopenharmony_ci}
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_cibool wbt_disabled(struct request_queue *q)
49462306a36Sopenharmony_ci{
49562306a36Sopenharmony_ci	struct rq_qos *rqos = wbt_rq_qos(q);
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	return !rqos || !rwb_enabled(RQWB(rqos));
49862306a36Sopenharmony_ci}
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ciu64 wbt_get_min_lat(struct request_queue *q)
50162306a36Sopenharmony_ci{
50262306a36Sopenharmony_ci	struct rq_qos *rqos = wbt_rq_qos(q);
50362306a36Sopenharmony_ci	if (!rqos)
50462306a36Sopenharmony_ci		return 0;
50562306a36Sopenharmony_ci	return RQWB(rqos)->min_lat_nsec;
50662306a36Sopenharmony_ci}
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_civoid wbt_set_min_lat(struct request_queue *q, u64 val)
50962306a36Sopenharmony_ci{
51062306a36Sopenharmony_ci	struct rq_qos *rqos = wbt_rq_qos(q);
51162306a36Sopenharmony_ci	if (!rqos)
51262306a36Sopenharmony_ci		return;
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	RQWB(rqos)->min_lat_nsec = val;
51562306a36Sopenharmony_ci	if (val)
51662306a36Sopenharmony_ci		RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
51762306a36Sopenharmony_ci	else
51862306a36Sopenharmony_ci		RQWB(rqos)->enable_state = WBT_STATE_OFF_MANUAL;
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	wbt_update_limits(RQWB(rqos));
52162306a36Sopenharmony_ci}
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_cistatic bool close_io(struct rq_wb *rwb)
52562306a36Sopenharmony_ci{
52662306a36Sopenharmony_ci	const unsigned long now = jiffies;
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	return time_before(now, rwb->last_issue + HZ / 10) ||
52962306a36Sopenharmony_ci		time_before(now, rwb->last_comp + HZ / 10);
53062306a36Sopenharmony_ci}
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_cistatic inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	unsigned int limit;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD)
53962306a36Sopenharmony_ci		return rwb->wb_background;
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	/*
54262306a36Sopenharmony_ci	 * At this point we know it's a buffered write. If this is
54362306a36Sopenharmony_ci	 * kswapd trying to free memory, or REQ_SYNC is set, then
54462306a36Sopenharmony_ci	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
54562306a36Sopenharmony_ci	 * that. If the write is marked as a background write, then use
54662306a36Sopenharmony_ci	 * the idle limit, or go to normal if we haven't had competing
54762306a36Sopenharmony_ci	 * IO for a bit.
54862306a36Sopenharmony_ci	 */
54962306a36Sopenharmony_ci	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
55062306a36Sopenharmony_ci		limit = rwb->rq_depth.max_depth;
55162306a36Sopenharmony_ci	else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
55262306a36Sopenharmony_ci		/*
55362306a36Sopenharmony_ci		 * If less than 100ms since we completed unrelated IO,
55462306a36Sopenharmony_ci		 * limit us to half the depth for background writeback.
55562306a36Sopenharmony_ci		 */
55662306a36Sopenharmony_ci		limit = rwb->wb_background;
55762306a36Sopenharmony_ci	} else
55862306a36Sopenharmony_ci		limit = rwb->wb_normal;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	return limit;
56162306a36Sopenharmony_ci}
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_cistruct wbt_wait_data {
56462306a36Sopenharmony_ci	struct rq_wb *rwb;
56562306a36Sopenharmony_ci	enum wbt_flags wb_acct;
56662306a36Sopenharmony_ci	blk_opf_t opf;
56762306a36Sopenharmony_ci};
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_cistatic bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
57062306a36Sopenharmony_ci{
57162306a36Sopenharmony_ci	struct wbt_wait_data *data = private_data;
57262306a36Sopenharmony_ci	return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf));
57362306a36Sopenharmony_ci}
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_cistatic void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
57662306a36Sopenharmony_ci{
57762306a36Sopenharmony_ci	struct wbt_wait_data *data = private_data;
57862306a36Sopenharmony_ci	wbt_rqw_done(data->rwb, rqw, data->wb_acct);
57962306a36Sopenharmony_ci}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci/*
58262306a36Sopenharmony_ci * Block if we will exceed our limit, or if we are currently waiting for
58362306a36Sopenharmony_ci * the timer to kick off queuing again.
58462306a36Sopenharmony_ci */
58562306a36Sopenharmony_cistatic void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
58662306a36Sopenharmony_ci		       blk_opf_t opf)
58762306a36Sopenharmony_ci{
58862306a36Sopenharmony_ci	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
58962306a36Sopenharmony_ci	struct wbt_wait_data data = {
59062306a36Sopenharmony_ci		.rwb = rwb,
59162306a36Sopenharmony_ci		.wb_acct = wb_acct,
59262306a36Sopenharmony_ci		.opf = opf,
59362306a36Sopenharmony_ci	};
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci	rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
59662306a36Sopenharmony_ci}
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_cistatic inline bool wbt_should_throttle(struct bio *bio)
59962306a36Sopenharmony_ci{
60062306a36Sopenharmony_ci	switch (bio_op(bio)) {
60162306a36Sopenharmony_ci	case REQ_OP_WRITE:
60262306a36Sopenharmony_ci		/*
60362306a36Sopenharmony_ci		 * Don't throttle WRITE_ODIRECT
60462306a36Sopenharmony_ci		 */
60562306a36Sopenharmony_ci		if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
60662306a36Sopenharmony_ci		    (REQ_SYNC | REQ_IDLE))
60762306a36Sopenharmony_ci			return false;
60862306a36Sopenharmony_ci		fallthrough;
60962306a36Sopenharmony_ci	case REQ_OP_DISCARD:
61062306a36Sopenharmony_ci		return true;
61162306a36Sopenharmony_ci	default:
61262306a36Sopenharmony_ci		return false;
61362306a36Sopenharmony_ci	}
61462306a36Sopenharmony_ci}
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_cistatic enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
61762306a36Sopenharmony_ci{
61862306a36Sopenharmony_ci	enum wbt_flags flags = 0;
61962306a36Sopenharmony_ci
62062306a36Sopenharmony_ci	if (!rwb_enabled(rwb))
62162306a36Sopenharmony_ci		return 0;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	if (bio_op(bio) == REQ_OP_READ) {
62462306a36Sopenharmony_ci		flags = WBT_READ;
62562306a36Sopenharmony_ci	} else if (wbt_should_throttle(bio)) {
62662306a36Sopenharmony_ci		if (current_is_kswapd())
62762306a36Sopenharmony_ci			flags |= WBT_KSWAPD;
62862306a36Sopenharmony_ci		if (bio_op(bio) == REQ_OP_DISCARD)
62962306a36Sopenharmony_ci			flags |= WBT_DISCARD;
63062306a36Sopenharmony_ci		flags |= WBT_TRACKED;
63162306a36Sopenharmony_ci	}
63262306a36Sopenharmony_ci	return flags;
63362306a36Sopenharmony_ci}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_cistatic void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
63662306a36Sopenharmony_ci{
63762306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
63862306a36Sopenharmony_ci	enum wbt_flags flags = bio_to_wbt_flags(rwb, bio);
63962306a36Sopenharmony_ci	__wbt_done(rqos, flags);
64062306a36Sopenharmony_ci}
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci/*
64362306a36Sopenharmony_ci * May sleep, if we have exceeded the writeback limits. Caller can pass
64462306a36Sopenharmony_ci * in an irq held spinlock, if it holds one when calling this function.
64562306a36Sopenharmony_ci * If we do sleep, we'll release and re-grab it.
64662306a36Sopenharmony_ci */
64762306a36Sopenharmony_cistatic void wbt_wait(struct rq_qos *rqos, struct bio *bio)
64862306a36Sopenharmony_ci{
64962306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
65062306a36Sopenharmony_ci	enum wbt_flags flags;
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	flags = bio_to_wbt_flags(rwb, bio);
65362306a36Sopenharmony_ci	if (!(flags & WBT_TRACKED)) {
65462306a36Sopenharmony_ci		if (flags & WBT_READ)
65562306a36Sopenharmony_ci			wb_timestamp(rwb, &rwb->last_issue);
65662306a36Sopenharmony_ci		return;
65762306a36Sopenharmony_ci	}
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	__wbt_wait(rwb, flags, bio->bi_opf);
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci	if (!blk_stat_is_active(rwb->cb))
66262306a36Sopenharmony_ci		rwb_arm_timer(rwb);
66362306a36Sopenharmony_ci}
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_cistatic void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
66662306a36Sopenharmony_ci{
66762306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
66862306a36Sopenharmony_ci	rq->wbt_flags |= bio_to_wbt_flags(rwb, bio);
66962306a36Sopenharmony_ci}
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_cistatic void wbt_issue(struct rq_qos *rqos, struct request *rq)
67262306a36Sopenharmony_ci{
67362306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci	if (!rwb_enabled(rwb))
67662306a36Sopenharmony_ci		return;
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	/*
67962306a36Sopenharmony_ci	 * Track sync issue, in case it takes a long time to complete. Allows us
68062306a36Sopenharmony_ci	 * to react quicker, if a sync IO takes a long time to complete. Note
68162306a36Sopenharmony_ci	 * that this is just a hint. The request can go away when it completes,
68262306a36Sopenharmony_ci	 * so it's important we never dereference it. We only use the address to
68362306a36Sopenharmony_ci	 * compare with, which is why we store the sync_issue time locally.
68462306a36Sopenharmony_ci	 */
68562306a36Sopenharmony_ci	if (wbt_is_read(rq) && !rwb->sync_issue) {
68662306a36Sopenharmony_ci		rwb->sync_cookie = rq;
68762306a36Sopenharmony_ci		rwb->sync_issue = rq->io_start_time_ns;
68862306a36Sopenharmony_ci	}
68962306a36Sopenharmony_ci}
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_cistatic void wbt_requeue(struct rq_qos *rqos, struct request *rq)
69262306a36Sopenharmony_ci{
69362306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
69462306a36Sopenharmony_ci	if (!rwb_enabled(rwb))
69562306a36Sopenharmony_ci		return;
69662306a36Sopenharmony_ci	if (rq == rwb->sync_cookie) {
69762306a36Sopenharmony_ci		rwb->sync_issue = 0;
69862306a36Sopenharmony_ci		rwb->sync_cookie = NULL;
69962306a36Sopenharmony_ci	}
70062306a36Sopenharmony_ci}
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_civoid wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
70362306a36Sopenharmony_ci{
70462306a36Sopenharmony_ci	struct rq_qos *rqos = wbt_rq_qos(q);
70562306a36Sopenharmony_ci	if (rqos)
70662306a36Sopenharmony_ci		RQWB(rqos)->wc = write_cache_on;
70762306a36Sopenharmony_ci}
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci/*
71062306a36Sopenharmony_ci * Enable wbt if defaults are configured that way
71162306a36Sopenharmony_ci */
71262306a36Sopenharmony_civoid wbt_enable_default(struct gendisk *disk)
71362306a36Sopenharmony_ci{
71462306a36Sopenharmony_ci	struct request_queue *q = disk->queue;
71562306a36Sopenharmony_ci	struct rq_qos *rqos;
71662306a36Sopenharmony_ci	bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	if (q->elevator &&
71962306a36Sopenharmony_ci	    test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
72062306a36Sopenharmony_ci		enable = false;
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	/* Throttling already enabled? */
72362306a36Sopenharmony_ci	rqos = wbt_rq_qos(q);
72462306a36Sopenharmony_ci	if (rqos) {
72562306a36Sopenharmony_ci		if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
72662306a36Sopenharmony_ci			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
72762306a36Sopenharmony_ci		return;
72862306a36Sopenharmony_ci	}
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	/* Queue not registered? Maybe shutting down... */
73162306a36Sopenharmony_ci	if (!blk_queue_registered(q))
73262306a36Sopenharmony_ci		return;
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	if (queue_is_mq(q) && enable)
73562306a36Sopenharmony_ci		wbt_init(disk);
73662306a36Sopenharmony_ci}
73762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(wbt_enable_default);
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ciu64 wbt_default_latency_nsec(struct request_queue *q)
74062306a36Sopenharmony_ci{
74162306a36Sopenharmony_ci	/*
74262306a36Sopenharmony_ci	 * We default to 2msec for non-rotational storage, and 75msec
74362306a36Sopenharmony_ci	 * for rotational storage.
74462306a36Sopenharmony_ci	 */
74562306a36Sopenharmony_ci	if (blk_queue_nonrot(q))
74662306a36Sopenharmony_ci		return 2000000ULL;
74762306a36Sopenharmony_ci	else
74862306a36Sopenharmony_ci		return 75000000ULL;
74962306a36Sopenharmony_ci}
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_cistatic int wbt_data_dir(const struct request *rq)
75262306a36Sopenharmony_ci{
75362306a36Sopenharmony_ci	const enum req_op op = req_op(rq);
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	if (op == REQ_OP_READ)
75662306a36Sopenharmony_ci		return READ;
75762306a36Sopenharmony_ci	else if (op_is_write(op))
75862306a36Sopenharmony_ci		return WRITE;
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	/* don't account */
76162306a36Sopenharmony_ci	return -1;
76262306a36Sopenharmony_ci}
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_cistatic void wbt_queue_depth_changed(struct rq_qos *rqos)
76562306a36Sopenharmony_ci{
76662306a36Sopenharmony_ci	RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue);
76762306a36Sopenharmony_ci	wbt_update_limits(RQWB(rqos));
76862306a36Sopenharmony_ci}
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_cistatic void wbt_exit(struct rq_qos *rqos)
77162306a36Sopenharmony_ci{
77262306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci	blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
77562306a36Sopenharmony_ci	blk_stat_free_callback(rwb->cb);
77662306a36Sopenharmony_ci	kfree(rwb);
77762306a36Sopenharmony_ci}
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci/*
78062306a36Sopenharmony_ci * Disable wbt, if enabled by default.
78162306a36Sopenharmony_ci */
78262306a36Sopenharmony_civoid wbt_disable_default(struct gendisk *disk)
78362306a36Sopenharmony_ci{
78462306a36Sopenharmony_ci	struct rq_qos *rqos = wbt_rq_qos(disk->queue);
78562306a36Sopenharmony_ci	struct rq_wb *rwb;
78662306a36Sopenharmony_ci	if (!rqos)
78762306a36Sopenharmony_ci		return;
78862306a36Sopenharmony_ci	rwb = RQWB(rqos);
78962306a36Sopenharmony_ci	if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
79062306a36Sopenharmony_ci		blk_stat_deactivate(rwb->cb);
79162306a36Sopenharmony_ci		rwb->enable_state = WBT_STATE_OFF_DEFAULT;
79262306a36Sopenharmony_ci	}
79362306a36Sopenharmony_ci}
79462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(wbt_disable_default);
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci#ifdef CONFIG_BLK_DEBUG_FS
79762306a36Sopenharmony_cistatic int wbt_curr_win_nsec_show(void *data, struct seq_file *m)
79862306a36Sopenharmony_ci{
79962306a36Sopenharmony_ci	struct rq_qos *rqos = data;
80062306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	seq_printf(m, "%llu\n", rwb->cur_win_nsec);
80362306a36Sopenharmony_ci	return 0;
80462306a36Sopenharmony_ci}
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_cistatic int wbt_enabled_show(void *data, struct seq_file *m)
80762306a36Sopenharmony_ci{
80862306a36Sopenharmony_ci	struct rq_qos *rqos = data;
80962306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	seq_printf(m, "%d\n", rwb->enable_state);
81262306a36Sopenharmony_ci	return 0;
81362306a36Sopenharmony_ci}
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_cistatic int wbt_id_show(void *data, struct seq_file *m)
81662306a36Sopenharmony_ci{
81762306a36Sopenharmony_ci	struct rq_qos *rqos = data;
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	seq_printf(m, "%u\n", rqos->id);
82062306a36Sopenharmony_ci	return 0;
82162306a36Sopenharmony_ci}
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_cistatic int wbt_inflight_show(void *data, struct seq_file *m)
82462306a36Sopenharmony_ci{
82562306a36Sopenharmony_ci	struct rq_qos *rqos = data;
82662306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
82762306a36Sopenharmony_ci	int i;
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	for (i = 0; i < WBT_NUM_RWQ; i++)
83062306a36Sopenharmony_ci		seq_printf(m, "%d: inflight %d\n", i,
83162306a36Sopenharmony_ci			   atomic_read(&rwb->rq_wait[i].inflight));
83262306a36Sopenharmony_ci	return 0;
83362306a36Sopenharmony_ci}
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_cistatic int wbt_min_lat_nsec_show(void *data, struct seq_file *m)
83662306a36Sopenharmony_ci{
83762306a36Sopenharmony_ci	struct rq_qos *rqos = data;
83862306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci	seq_printf(m, "%lu\n", rwb->min_lat_nsec);
84162306a36Sopenharmony_ci	return 0;
84262306a36Sopenharmony_ci}
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_cistatic int wbt_unknown_cnt_show(void *data, struct seq_file *m)
84562306a36Sopenharmony_ci{
84662306a36Sopenharmony_ci	struct rq_qos *rqos = data;
84762306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	seq_printf(m, "%u\n", rwb->unknown_cnt);
85062306a36Sopenharmony_ci	return 0;
85162306a36Sopenharmony_ci}
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_cistatic int wbt_normal_show(void *data, struct seq_file *m)
85462306a36Sopenharmony_ci{
85562306a36Sopenharmony_ci	struct rq_qos *rqos = data;
85662306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	seq_printf(m, "%u\n", rwb->wb_normal);
85962306a36Sopenharmony_ci	return 0;
86062306a36Sopenharmony_ci}
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_cistatic int wbt_background_show(void *data, struct seq_file *m)
86362306a36Sopenharmony_ci{
86462306a36Sopenharmony_ci	struct rq_qos *rqos = data;
86562306a36Sopenharmony_ci	struct rq_wb *rwb = RQWB(rqos);
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	seq_printf(m, "%u\n", rwb->wb_background);
86862306a36Sopenharmony_ci	return 0;
86962306a36Sopenharmony_ci}
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_cistatic const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
87262306a36Sopenharmony_ci	{"curr_win_nsec", 0400, wbt_curr_win_nsec_show},
87362306a36Sopenharmony_ci	{"enabled", 0400, wbt_enabled_show},
87462306a36Sopenharmony_ci	{"id", 0400, wbt_id_show},
87562306a36Sopenharmony_ci	{"inflight", 0400, wbt_inflight_show},
87662306a36Sopenharmony_ci	{"min_lat_nsec", 0400, wbt_min_lat_nsec_show},
87762306a36Sopenharmony_ci	{"unknown_cnt", 0400, wbt_unknown_cnt_show},
87862306a36Sopenharmony_ci	{"wb_normal", 0400, wbt_normal_show},
87962306a36Sopenharmony_ci	{"wb_background", 0400, wbt_background_show},
88062306a36Sopenharmony_ci	{},
88162306a36Sopenharmony_ci};
88262306a36Sopenharmony_ci#endif
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_cistatic const struct rq_qos_ops wbt_rqos_ops = {
88562306a36Sopenharmony_ci	.throttle = wbt_wait,
88662306a36Sopenharmony_ci	.issue = wbt_issue,
88762306a36Sopenharmony_ci	.track = wbt_track,
88862306a36Sopenharmony_ci	.requeue = wbt_requeue,
88962306a36Sopenharmony_ci	.done = wbt_done,
89062306a36Sopenharmony_ci	.cleanup = wbt_cleanup,
89162306a36Sopenharmony_ci	.queue_depth_changed = wbt_queue_depth_changed,
89262306a36Sopenharmony_ci	.exit = wbt_exit,
89362306a36Sopenharmony_ci#ifdef CONFIG_BLK_DEBUG_FS
89462306a36Sopenharmony_ci	.debugfs_attrs = wbt_debugfs_attrs,
89562306a36Sopenharmony_ci#endif
89662306a36Sopenharmony_ci};
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ciint wbt_init(struct gendisk *disk)
89962306a36Sopenharmony_ci{
90062306a36Sopenharmony_ci	struct request_queue *q = disk->queue;
90162306a36Sopenharmony_ci	struct rq_wb *rwb;
90262306a36Sopenharmony_ci	int i;
90362306a36Sopenharmony_ci	int ret;
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_ci	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
90662306a36Sopenharmony_ci	if (!rwb)
90762306a36Sopenharmony_ci		return -ENOMEM;
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
91062306a36Sopenharmony_ci	if (!rwb->cb) {
91162306a36Sopenharmony_ci		kfree(rwb);
91262306a36Sopenharmony_ci		return -ENOMEM;
91362306a36Sopenharmony_ci	}
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_ci	for (i = 0; i < WBT_NUM_RWQ; i++)
91662306a36Sopenharmony_ci		rq_wait_init(&rwb->rq_wait[i]);
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_ci	rwb->last_comp = rwb->last_issue = jiffies;
91962306a36Sopenharmony_ci	rwb->win_nsec = RWB_WINDOW_NSEC;
92062306a36Sopenharmony_ci	rwb->enable_state = WBT_STATE_ON_DEFAULT;
92162306a36Sopenharmony_ci	rwb->wc = test_bit(QUEUE_FLAG_WC, &q->queue_flags);
92262306a36Sopenharmony_ci	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
92362306a36Sopenharmony_ci	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
92462306a36Sopenharmony_ci	rwb->rq_depth.queue_depth = blk_queue_depth(q);
92562306a36Sopenharmony_ci	wbt_update_limits(rwb);
92662306a36Sopenharmony_ci
92762306a36Sopenharmony_ci	/*
92862306a36Sopenharmony_ci	 * Assign rwb and add the stats callback.
92962306a36Sopenharmony_ci	 */
93062306a36Sopenharmony_ci	mutex_lock(&q->rq_qos_mutex);
93162306a36Sopenharmony_ci	ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
93262306a36Sopenharmony_ci	mutex_unlock(&q->rq_qos_mutex);
93362306a36Sopenharmony_ci	if (ret)
93462306a36Sopenharmony_ci		goto err_free;
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	blk_stat_add_callback(q, rwb->cb);
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci	return 0;
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_cierr_free:
94162306a36Sopenharmony_ci	blk_stat_free_callback(rwb->cb);
94262306a36Sopenharmony_ci	kfree(rwb);
94362306a36Sopenharmony_ci	return ret;
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci}
946