162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * The Kyber I/O scheduler. Controls latency by throttling queue depths using
462306a36Sopenharmony_ci * scalable techniques.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Copyright (C) 2017 Facebook
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/kernel.h>
1062306a36Sopenharmony_ci#include <linux/blkdev.h>
1162306a36Sopenharmony_ci#include <linux/module.h>
1262306a36Sopenharmony_ci#include <linux/sbitmap.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <trace/events/block.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include "elevator.h"
1762306a36Sopenharmony_ci#include "blk.h"
1862306a36Sopenharmony_ci#include "blk-mq.h"
1962306a36Sopenharmony_ci#include "blk-mq-debugfs.h"
2062306a36Sopenharmony_ci#include "blk-mq-sched.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#define CREATE_TRACE_POINTS
2362306a36Sopenharmony_ci#include <trace/events/kyber.h>
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci/*
2662306a36Sopenharmony_ci * Scheduling domains: the device is divided into multiple domains based on the
2762306a36Sopenharmony_ci * request type.
2862306a36Sopenharmony_ci */
2962306a36Sopenharmony_cienum {
3062306a36Sopenharmony_ci	KYBER_READ,
3162306a36Sopenharmony_ci	KYBER_WRITE,
3262306a36Sopenharmony_ci	KYBER_DISCARD,
3362306a36Sopenharmony_ci	KYBER_OTHER,
3462306a36Sopenharmony_ci	KYBER_NUM_DOMAINS,
3562306a36Sopenharmony_ci};
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic const char *kyber_domain_names[] = {
3862306a36Sopenharmony_ci	[KYBER_READ] = "READ",
3962306a36Sopenharmony_ci	[KYBER_WRITE] = "WRITE",
4062306a36Sopenharmony_ci	[KYBER_DISCARD] = "DISCARD",
4162306a36Sopenharmony_ci	[KYBER_OTHER] = "OTHER",
4262306a36Sopenharmony_ci};
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cienum {
4562306a36Sopenharmony_ci	/*
4662306a36Sopenharmony_ci	 * In order to prevent starvation of synchronous requests by a flood of
4762306a36Sopenharmony_ci	 * asynchronous requests, we reserve 25% of requests for synchronous
4862306a36Sopenharmony_ci	 * operations.
4962306a36Sopenharmony_ci	 */
5062306a36Sopenharmony_ci	KYBER_ASYNC_PERCENT = 75,
5162306a36Sopenharmony_ci};
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * Maximum device-wide depth for each scheduling domain.
5562306a36Sopenharmony_ci *
5662306a36Sopenharmony_ci * Even for fast devices with lots of tags like NVMe, you can saturate the
5762306a36Sopenharmony_ci * device with only a fraction of the maximum possible queue depth. So, we cap
5862306a36Sopenharmony_ci * these to a reasonable value.
5962306a36Sopenharmony_ci */
6062306a36Sopenharmony_cistatic const unsigned int kyber_depth[] = {
6162306a36Sopenharmony_ci	[KYBER_READ] = 256,
6262306a36Sopenharmony_ci	[KYBER_WRITE] = 128,
6362306a36Sopenharmony_ci	[KYBER_DISCARD] = 64,
6462306a36Sopenharmony_ci	[KYBER_OTHER] = 16,
6562306a36Sopenharmony_ci};
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci/*
6862306a36Sopenharmony_ci * Default latency targets for each scheduling domain.
6962306a36Sopenharmony_ci */
7062306a36Sopenharmony_cistatic const u64 kyber_latency_targets[] = {
7162306a36Sopenharmony_ci	[KYBER_READ] = 2ULL * NSEC_PER_MSEC,
7262306a36Sopenharmony_ci	[KYBER_WRITE] = 10ULL * NSEC_PER_MSEC,
7362306a36Sopenharmony_ci	[KYBER_DISCARD] = 5ULL * NSEC_PER_SEC,
7462306a36Sopenharmony_ci};
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci/*
7762306a36Sopenharmony_ci * Batch size (number of requests we'll dispatch in a row) for each scheduling
7862306a36Sopenharmony_ci * domain.
7962306a36Sopenharmony_ci */
8062306a36Sopenharmony_cistatic const unsigned int kyber_batch_size[] = {
8162306a36Sopenharmony_ci	[KYBER_READ] = 16,
8262306a36Sopenharmony_ci	[KYBER_WRITE] = 8,
8362306a36Sopenharmony_ci	[KYBER_DISCARD] = 1,
8462306a36Sopenharmony_ci	[KYBER_OTHER] = 1,
8562306a36Sopenharmony_ci};
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci/*
8862306a36Sopenharmony_ci * Requests latencies are recorded in a histogram with buckets defined relative
8962306a36Sopenharmony_ci * to the target latency:
9062306a36Sopenharmony_ci *
9162306a36Sopenharmony_ci * <= 1/4 * target latency
9262306a36Sopenharmony_ci * <= 1/2 * target latency
9362306a36Sopenharmony_ci * <= 3/4 * target latency
9462306a36Sopenharmony_ci * <= target latency
9562306a36Sopenharmony_ci * <= 1 1/4 * target latency
9662306a36Sopenharmony_ci * <= 1 1/2 * target latency
9762306a36Sopenharmony_ci * <= 1 3/4 * target latency
9862306a36Sopenharmony_ci * > 1 3/4 * target latency
9962306a36Sopenharmony_ci */
10062306a36Sopenharmony_cienum {
10162306a36Sopenharmony_ci	/*
10262306a36Sopenharmony_ci	 * The width of the latency histogram buckets is
10362306a36Sopenharmony_ci	 * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
10462306a36Sopenharmony_ci	 */
10562306a36Sopenharmony_ci	KYBER_LATENCY_SHIFT = 2,
10662306a36Sopenharmony_ci	/*
10762306a36Sopenharmony_ci	 * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
10862306a36Sopenharmony_ci	 * thus, "good".
10962306a36Sopenharmony_ci	 */
11062306a36Sopenharmony_ci	KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT,
11162306a36Sopenharmony_ci	/* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */
11262306a36Sopenharmony_ci	KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT,
11362306a36Sopenharmony_ci};
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci/*
11662306a36Sopenharmony_ci * We measure both the total latency and the I/O latency (i.e., latency after
11762306a36Sopenharmony_ci * submitting to the device).
11862306a36Sopenharmony_ci */
11962306a36Sopenharmony_cienum {
12062306a36Sopenharmony_ci	KYBER_TOTAL_LATENCY,
12162306a36Sopenharmony_ci	KYBER_IO_LATENCY,
12262306a36Sopenharmony_ci};
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_cistatic const char *kyber_latency_type_names[] = {
12562306a36Sopenharmony_ci	[KYBER_TOTAL_LATENCY] = "total",
12662306a36Sopenharmony_ci	[KYBER_IO_LATENCY] = "I/O",
12762306a36Sopenharmony_ci};
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci/*
13062306a36Sopenharmony_ci * Per-cpu latency histograms: total latency and I/O latency for each scheduling
13162306a36Sopenharmony_ci * domain except for KYBER_OTHER.
13262306a36Sopenharmony_ci */
13362306a36Sopenharmony_cistruct kyber_cpu_latency {
13462306a36Sopenharmony_ci	atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
13562306a36Sopenharmony_ci};
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci/*
13862306a36Sopenharmony_ci * There is a same mapping between ctx & hctx and kcq & khd,
13962306a36Sopenharmony_ci * we use request->mq_ctx->index_hw to index the kcq in khd.
14062306a36Sopenharmony_ci */
14162306a36Sopenharmony_cistruct kyber_ctx_queue {
14262306a36Sopenharmony_ci	/*
14362306a36Sopenharmony_ci	 * Used to ensure operations on rq_list and kcq_map to be an atmoic one.
14462306a36Sopenharmony_ci	 * Also protect the rqs on rq_list when merge.
14562306a36Sopenharmony_ci	 */
14662306a36Sopenharmony_ci	spinlock_t lock;
14762306a36Sopenharmony_ci	struct list_head rq_list[KYBER_NUM_DOMAINS];
14862306a36Sopenharmony_ci} ____cacheline_aligned_in_smp;
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_cistruct kyber_queue_data {
15162306a36Sopenharmony_ci	struct request_queue *q;
15262306a36Sopenharmony_ci	dev_t dev;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	/*
15562306a36Sopenharmony_ci	 * Each scheduling domain has a limited number of in-flight requests
15662306a36Sopenharmony_ci	 * device-wide, limited by these tokens.
15762306a36Sopenharmony_ci	 */
15862306a36Sopenharmony_ci	struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	/*
16162306a36Sopenharmony_ci	 * Async request percentage, converted to per-word depth for
16262306a36Sopenharmony_ci	 * sbitmap_get_shallow().
16362306a36Sopenharmony_ci	 */
16462306a36Sopenharmony_ci	unsigned int async_depth;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	struct kyber_cpu_latency __percpu *cpu_latency;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	/* Timer for stats aggregation and adjusting domain tokens. */
16962306a36Sopenharmony_ci	struct timer_list timer;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	unsigned long latency_timeout[KYBER_OTHER];
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	int domain_p99[KYBER_OTHER];
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	/* Target latencies in nanoseconds. */
17862306a36Sopenharmony_ci	u64 latency_targets[KYBER_OTHER];
17962306a36Sopenharmony_ci};
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_cistruct kyber_hctx_data {
18262306a36Sopenharmony_ci	spinlock_t lock;
18362306a36Sopenharmony_ci	struct list_head rqs[KYBER_NUM_DOMAINS];
18462306a36Sopenharmony_ci	unsigned int cur_domain;
18562306a36Sopenharmony_ci	unsigned int batching;
18662306a36Sopenharmony_ci	struct kyber_ctx_queue *kcqs;
18762306a36Sopenharmony_ci	struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
18862306a36Sopenharmony_ci	struct sbq_wait domain_wait[KYBER_NUM_DOMAINS];
18962306a36Sopenharmony_ci	struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
19062306a36Sopenharmony_ci	atomic_t wait_index[KYBER_NUM_DOMAINS];
19162306a36Sopenharmony_ci};
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_cistatic int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
19462306a36Sopenharmony_ci			     void *key);
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_cistatic unsigned int kyber_sched_domain(blk_opf_t opf)
19762306a36Sopenharmony_ci{
19862306a36Sopenharmony_ci	switch (opf & REQ_OP_MASK) {
19962306a36Sopenharmony_ci	case REQ_OP_READ:
20062306a36Sopenharmony_ci		return KYBER_READ;
20162306a36Sopenharmony_ci	case REQ_OP_WRITE:
20262306a36Sopenharmony_ci		return KYBER_WRITE;
20362306a36Sopenharmony_ci	case REQ_OP_DISCARD:
20462306a36Sopenharmony_ci		return KYBER_DISCARD;
20562306a36Sopenharmony_ci	default:
20662306a36Sopenharmony_ci		return KYBER_OTHER;
20762306a36Sopenharmony_ci	}
20862306a36Sopenharmony_ci}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_cistatic void flush_latency_buckets(struct kyber_queue_data *kqd,
21162306a36Sopenharmony_ci				  struct kyber_cpu_latency *cpu_latency,
21262306a36Sopenharmony_ci				  unsigned int sched_domain, unsigned int type)
21362306a36Sopenharmony_ci{
21462306a36Sopenharmony_ci	unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
21562306a36Sopenharmony_ci	atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type];
21662306a36Sopenharmony_ci	unsigned int bucket;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
21962306a36Sopenharmony_ci		buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0);
22062306a36Sopenharmony_ci}
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci/*
22362306a36Sopenharmony_ci * Calculate the histogram bucket with the given percentile rank, or -1 if there
22462306a36Sopenharmony_ci * aren't enough samples yet.
22562306a36Sopenharmony_ci */
22662306a36Sopenharmony_cistatic int calculate_percentile(struct kyber_queue_data *kqd,
22762306a36Sopenharmony_ci				unsigned int sched_domain, unsigned int type,
22862306a36Sopenharmony_ci				unsigned int percentile)
22962306a36Sopenharmony_ci{
23062306a36Sopenharmony_ci	unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
23162306a36Sopenharmony_ci	unsigned int bucket, samples = 0, percentile_samples;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
23462306a36Sopenharmony_ci		samples += buckets[bucket];
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	if (!samples)
23762306a36Sopenharmony_ci		return -1;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	/*
24062306a36Sopenharmony_ci	 * We do the calculation once we have 500 samples or one second passes
24162306a36Sopenharmony_ci	 * since the first sample was recorded, whichever comes first.
24262306a36Sopenharmony_ci	 */
24362306a36Sopenharmony_ci	if (!kqd->latency_timeout[sched_domain])
24462306a36Sopenharmony_ci		kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL);
24562306a36Sopenharmony_ci	if (samples < 500 &&
24662306a36Sopenharmony_ci	    time_is_after_jiffies(kqd->latency_timeout[sched_domain])) {
24762306a36Sopenharmony_ci		return -1;
24862306a36Sopenharmony_ci	}
24962306a36Sopenharmony_ci	kqd->latency_timeout[sched_domain] = 0;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	percentile_samples = DIV_ROUND_UP(samples * percentile, 100);
25262306a36Sopenharmony_ci	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) {
25362306a36Sopenharmony_ci		if (buckets[bucket] >= percentile_samples)
25462306a36Sopenharmony_ci			break;
25562306a36Sopenharmony_ci		percentile_samples -= buckets[bucket];
25662306a36Sopenharmony_ci	}
25762306a36Sopenharmony_ci	memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type]));
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	trace_kyber_latency(kqd->dev, kyber_domain_names[sched_domain],
26062306a36Sopenharmony_ci			    kyber_latency_type_names[type], percentile,
26162306a36Sopenharmony_ci			    bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples);
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	return bucket;
26462306a36Sopenharmony_ci}
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_cistatic void kyber_resize_domain(struct kyber_queue_data *kqd,
26762306a36Sopenharmony_ci				unsigned int sched_domain, unsigned int depth)
26862306a36Sopenharmony_ci{
26962306a36Sopenharmony_ci	depth = clamp(depth, 1U, kyber_depth[sched_domain]);
27062306a36Sopenharmony_ci	if (depth != kqd->domain_tokens[sched_domain].sb.depth) {
27162306a36Sopenharmony_ci		sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
27262306a36Sopenharmony_ci		trace_kyber_adjust(kqd->dev, kyber_domain_names[sched_domain],
27362306a36Sopenharmony_ci				   depth);
27462306a36Sopenharmony_ci	}
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_cistatic void kyber_timer_fn(struct timer_list *t)
27862306a36Sopenharmony_ci{
27962306a36Sopenharmony_ci	struct kyber_queue_data *kqd = from_timer(kqd, t, timer);
28062306a36Sopenharmony_ci	unsigned int sched_domain;
28162306a36Sopenharmony_ci	int cpu;
28262306a36Sopenharmony_ci	bool bad = false;
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	/* Sum all of the per-cpu latency histograms. */
28562306a36Sopenharmony_ci	for_each_online_cpu(cpu) {
28662306a36Sopenharmony_ci		struct kyber_cpu_latency *cpu_latency;
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci		cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu);
28962306a36Sopenharmony_ci		for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
29062306a36Sopenharmony_ci			flush_latency_buckets(kqd, cpu_latency, sched_domain,
29162306a36Sopenharmony_ci					      KYBER_TOTAL_LATENCY);
29262306a36Sopenharmony_ci			flush_latency_buckets(kqd, cpu_latency, sched_domain,
29362306a36Sopenharmony_ci					      KYBER_IO_LATENCY);
29462306a36Sopenharmony_ci		}
29562306a36Sopenharmony_ci	}
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	/*
29862306a36Sopenharmony_ci	 * Check if any domains have a high I/O latency, which might indicate
29962306a36Sopenharmony_ci	 * congestion in the device. Note that we use the p90; we don't want to
30062306a36Sopenharmony_ci	 * be too sensitive to outliers here.
30162306a36Sopenharmony_ci	 */
30262306a36Sopenharmony_ci	for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
30362306a36Sopenharmony_ci		int p90;
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci		p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY,
30662306a36Sopenharmony_ci					   90);
30762306a36Sopenharmony_ci		if (p90 >= KYBER_GOOD_BUCKETS)
30862306a36Sopenharmony_ci			bad = true;
30962306a36Sopenharmony_ci	}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	/*
31262306a36Sopenharmony_ci	 * Adjust the scheduling domain depths. If we determined that there was
31362306a36Sopenharmony_ci	 * congestion, we throttle all domains with good latencies. Either way,
31462306a36Sopenharmony_ci	 * we ease up on throttling domains with bad latencies.
31562306a36Sopenharmony_ci	 */
31662306a36Sopenharmony_ci	for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
31762306a36Sopenharmony_ci		unsigned int orig_depth, depth;
31862306a36Sopenharmony_ci		int p99;
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci		p99 = calculate_percentile(kqd, sched_domain,
32162306a36Sopenharmony_ci					   KYBER_TOTAL_LATENCY, 99);
32262306a36Sopenharmony_ci		/*
32362306a36Sopenharmony_ci		 * This is kind of subtle: different domains will not
32462306a36Sopenharmony_ci		 * necessarily have enough samples to calculate the latency
32562306a36Sopenharmony_ci		 * percentiles during the same window, so we have to remember
32662306a36Sopenharmony_ci		 * the p99 for the next time we observe congestion; once we do,
32762306a36Sopenharmony_ci		 * we don't want to throttle again until we get more data, so we
32862306a36Sopenharmony_ci		 * reset it to -1.
32962306a36Sopenharmony_ci		 */
33062306a36Sopenharmony_ci		if (bad) {
33162306a36Sopenharmony_ci			if (p99 < 0)
33262306a36Sopenharmony_ci				p99 = kqd->domain_p99[sched_domain];
33362306a36Sopenharmony_ci			kqd->domain_p99[sched_domain] = -1;
33462306a36Sopenharmony_ci		} else if (p99 >= 0) {
33562306a36Sopenharmony_ci			kqd->domain_p99[sched_domain] = p99;
33662306a36Sopenharmony_ci		}
33762306a36Sopenharmony_ci		if (p99 < 0)
33862306a36Sopenharmony_ci			continue;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci		/*
34162306a36Sopenharmony_ci		 * If this domain has bad latency, throttle less. Otherwise,
34262306a36Sopenharmony_ci		 * throttle more iff we determined that there is congestion.
34362306a36Sopenharmony_ci		 *
34462306a36Sopenharmony_ci		 * The new depth is scaled linearly with the p99 latency vs the
34562306a36Sopenharmony_ci		 * latency target. E.g., if the p99 is 3/4 of the target, then
34662306a36Sopenharmony_ci		 * we throttle down to 3/4 of the current depth, and if the p99
34762306a36Sopenharmony_ci		 * is 2x the target, then we double the depth.
34862306a36Sopenharmony_ci		 */
34962306a36Sopenharmony_ci		if (bad || p99 >= KYBER_GOOD_BUCKETS) {
35062306a36Sopenharmony_ci			orig_depth = kqd->domain_tokens[sched_domain].sb.depth;
35162306a36Sopenharmony_ci			depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT;
35262306a36Sopenharmony_ci			kyber_resize_domain(kqd, sched_domain, depth);
35362306a36Sopenharmony_ci		}
35462306a36Sopenharmony_ci	}
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_cistatic struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
35862306a36Sopenharmony_ci{
35962306a36Sopenharmony_ci	struct kyber_queue_data *kqd;
36062306a36Sopenharmony_ci	int ret = -ENOMEM;
36162306a36Sopenharmony_ci	int i;
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci	kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
36462306a36Sopenharmony_ci	if (!kqd)
36562306a36Sopenharmony_ci		goto err;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	kqd->q = q;
36862306a36Sopenharmony_ci	kqd->dev = disk_devt(q->disk);
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency,
37162306a36Sopenharmony_ci					    GFP_KERNEL | __GFP_ZERO);
37262306a36Sopenharmony_ci	if (!kqd->cpu_latency)
37362306a36Sopenharmony_ci		goto err_kqd;
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	timer_setup(&kqd->timer, kyber_timer_fn, 0);
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
37862306a36Sopenharmony_ci		WARN_ON(!kyber_depth[i]);
37962306a36Sopenharmony_ci		WARN_ON(!kyber_batch_size[i]);
38062306a36Sopenharmony_ci		ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
38162306a36Sopenharmony_ci					      kyber_depth[i], -1, false,
38262306a36Sopenharmony_ci					      GFP_KERNEL, q->node);
38362306a36Sopenharmony_ci		if (ret) {
38462306a36Sopenharmony_ci			while (--i >= 0)
38562306a36Sopenharmony_ci				sbitmap_queue_free(&kqd->domain_tokens[i]);
38662306a36Sopenharmony_ci			goto err_buckets;
38762306a36Sopenharmony_ci		}
38862306a36Sopenharmony_ci	}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	for (i = 0; i < KYBER_OTHER; i++) {
39162306a36Sopenharmony_ci		kqd->domain_p99[i] = -1;
39262306a36Sopenharmony_ci		kqd->latency_targets[i] = kyber_latency_targets[i];
39362306a36Sopenharmony_ci	}
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	return kqd;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_cierr_buckets:
39862306a36Sopenharmony_ci	free_percpu(kqd->cpu_latency);
39962306a36Sopenharmony_cierr_kqd:
40062306a36Sopenharmony_ci	kfree(kqd);
40162306a36Sopenharmony_cierr:
40262306a36Sopenharmony_ci	return ERR_PTR(ret);
40362306a36Sopenharmony_ci}
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_cistatic int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
40662306a36Sopenharmony_ci{
40762306a36Sopenharmony_ci	struct kyber_queue_data *kqd;
40862306a36Sopenharmony_ci	struct elevator_queue *eq;
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci	eq = elevator_alloc(q, e);
41162306a36Sopenharmony_ci	if (!eq)
41262306a36Sopenharmony_ci		return -ENOMEM;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	kqd = kyber_queue_data_alloc(q);
41562306a36Sopenharmony_ci	if (IS_ERR(kqd)) {
41662306a36Sopenharmony_ci		kobject_put(&eq->kobj);
41762306a36Sopenharmony_ci		return PTR_ERR(kqd);
41862306a36Sopenharmony_ci	}
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci	blk_stat_enable_accounting(q);
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	eq->elevator_data = kqd;
42562306a36Sopenharmony_ci	q->elevator = eq;
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	return 0;
42862306a36Sopenharmony_ci}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_cistatic void kyber_exit_sched(struct elevator_queue *e)
43162306a36Sopenharmony_ci{
43262306a36Sopenharmony_ci	struct kyber_queue_data *kqd = e->elevator_data;
43362306a36Sopenharmony_ci	int i;
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	timer_shutdown_sync(&kqd->timer);
43662306a36Sopenharmony_ci	blk_stat_disable_accounting(kqd->q);
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
43962306a36Sopenharmony_ci		sbitmap_queue_free(&kqd->domain_tokens[i]);
44062306a36Sopenharmony_ci	free_percpu(kqd->cpu_latency);
44162306a36Sopenharmony_ci	kfree(kqd);
44262306a36Sopenharmony_ci}
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_cistatic void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
44562306a36Sopenharmony_ci{
44662306a36Sopenharmony_ci	unsigned int i;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	spin_lock_init(&kcq->lock);
44962306a36Sopenharmony_ci	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
45062306a36Sopenharmony_ci		INIT_LIST_HEAD(&kcq->rq_list[i]);
45162306a36Sopenharmony_ci}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_cistatic void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
45462306a36Sopenharmony_ci{
45562306a36Sopenharmony_ci	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
45662306a36Sopenharmony_ci	struct blk_mq_tags *tags = hctx->sched_tags;
45762306a36Sopenharmony_ci	unsigned int shift = tags->bitmap_tags.sb.shift;
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth);
46262306a36Sopenharmony_ci}
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_cistatic int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
46562306a36Sopenharmony_ci{
46662306a36Sopenharmony_ci	struct kyber_hctx_data *khd;
46762306a36Sopenharmony_ci	int i;
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
47062306a36Sopenharmony_ci	if (!khd)
47162306a36Sopenharmony_ci		return -ENOMEM;
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	khd->kcqs = kmalloc_array_node(hctx->nr_ctx,
47462306a36Sopenharmony_ci				       sizeof(struct kyber_ctx_queue),
47562306a36Sopenharmony_ci				       GFP_KERNEL, hctx->numa_node);
47662306a36Sopenharmony_ci	if (!khd->kcqs)
47762306a36Sopenharmony_ci		goto err_khd;
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	for (i = 0; i < hctx->nr_ctx; i++)
48062306a36Sopenharmony_ci		kyber_ctx_queue_init(&khd->kcqs[i]);
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
48362306a36Sopenharmony_ci		if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
48462306a36Sopenharmony_ci				      ilog2(8), GFP_KERNEL, hctx->numa_node,
48562306a36Sopenharmony_ci				      false, false)) {
48662306a36Sopenharmony_ci			while (--i >= 0)
48762306a36Sopenharmony_ci				sbitmap_free(&khd->kcq_map[i]);
48862306a36Sopenharmony_ci			goto err_kcqs;
48962306a36Sopenharmony_ci		}
49062306a36Sopenharmony_ci	}
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	spin_lock_init(&khd->lock);
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
49562306a36Sopenharmony_ci		INIT_LIST_HEAD(&khd->rqs[i]);
49662306a36Sopenharmony_ci		khd->domain_wait[i].sbq = NULL;
49762306a36Sopenharmony_ci		init_waitqueue_func_entry(&khd->domain_wait[i].wait,
49862306a36Sopenharmony_ci					  kyber_domain_wake);
49962306a36Sopenharmony_ci		khd->domain_wait[i].wait.private = hctx;
50062306a36Sopenharmony_ci		INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry);
50162306a36Sopenharmony_ci		atomic_set(&khd->wait_index[i], 0);
50262306a36Sopenharmony_ci	}
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	khd->cur_domain = 0;
50562306a36Sopenharmony_ci	khd->batching = 0;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	hctx->sched_data = khd;
50862306a36Sopenharmony_ci	kyber_depth_updated(hctx);
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	return 0;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_cierr_kcqs:
51362306a36Sopenharmony_ci	kfree(khd->kcqs);
51462306a36Sopenharmony_cierr_khd:
51562306a36Sopenharmony_ci	kfree(khd);
51662306a36Sopenharmony_ci	return -ENOMEM;
51762306a36Sopenharmony_ci}
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_cistatic void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
52062306a36Sopenharmony_ci{
52162306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;
52262306a36Sopenharmony_ci	int i;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
52562306a36Sopenharmony_ci		sbitmap_free(&khd->kcq_map[i]);
52662306a36Sopenharmony_ci	kfree(khd->kcqs);
52762306a36Sopenharmony_ci	kfree(hctx->sched_data);
52862306a36Sopenharmony_ci}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_cistatic int rq_get_domain_token(struct request *rq)
53162306a36Sopenharmony_ci{
53262306a36Sopenharmony_ci	return (long)rq->elv.priv[0];
53362306a36Sopenharmony_ci}
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_cistatic void rq_set_domain_token(struct request *rq, int token)
53662306a36Sopenharmony_ci{
53762306a36Sopenharmony_ci	rq->elv.priv[0] = (void *)(long)token;
53862306a36Sopenharmony_ci}
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_cistatic void rq_clear_domain_token(struct kyber_queue_data *kqd,
54162306a36Sopenharmony_ci				  struct request *rq)
54262306a36Sopenharmony_ci{
54362306a36Sopenharmony_ci	unsigned int sched_domain;
54462306a36Sopenharmony_ci	int nr;
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	nr = rq_get_domain_token(rq);
54762306a36Sopenharmony_ci	if (nr != -1) {
54862306a36Sopenharmony_ci		sched_domain = kyber_sched_domain(rq->cmd_flags);
54962306a36Sopenharmony_ci		sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
55062306a36Sopenharmony_ci				    rq->mq_ctx->cpu);
55162306a36Sopenharmony_ci	}
55262306a36Sopenharmony_ci}
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_cistatic void kyber_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	/*
55762306a36Sopenharmony_ci	 * We use the scheduler tags as per-hardware queue queueing tokens.
55862306a36Sopenharmony_ci	 * Async requests can be limited at this stage.
55962306a36Sopenharmony_ci	 */
56062306a36Sopenharmony_ci	if (!op_is_sync(opf)) {
56162306a36Sopenharmony_ci		struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci		data->shallow_depth = kqd->async_depth;
56462306a36Sopenharmony_ci	}
56562306a36Sopenharmony_ci}
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_cistatic bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
56862306a36Sopenharmony_ci		unsigned int nr_segs)
56962306a36Sopenharmony_ci{
57062306a36Sopenharmony_ci	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
57162306a36Sopenharmony_ci	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
57262306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;
57362306a36Sopenharmony_ci	struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
57462306a36Sopenharmony_ci	unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
57562306a36Sopenharmony_ci	struct list_head *rq_list = &kcq->rq_list[sched_domain];
57662306a36Sopenharmony_ci	bool merged;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	spin_lock(&kcq->lock);
57962306a36Sopenharmony_ci	merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs);
58062306a36Sopenharmony_ci	spin_unlock(&kcq->lock);
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	return merged;
58362306a36Sopenharmony_ci}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_cistatic void kyber_prepare_request(struct request *rq)
58662306a36Sopenharmony_ci{
58762306a36Sopenharmony_ci	rq_set_domain_token(rq, -1);
58862306a36Sopenharmony_ci}
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_cistatic void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
59162306a36Sopenharmony_ci				  struct list_head *rq_list,
59262306a36Sopenharmony_ci				  blk_insert_t flags)
59362306a36Sopenharmony_ci{
59462306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;
59562306a36Sopenharmony_ci	struct request *rq, *next;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	list_for_each_entry_safe(rq, next, rq_list, queuelist) {
59862306a36Sopenharmony_ci		unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
59962306a36Sopenharmony_ci		struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]];
60062306a36Sopenharmony_ci		struct list_head *head = &kcq->rq_list[sched_domain];
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci		spin_lock(&kcq->lock);
60362306a36Sopenharmony_ci		trace_block_rq_insert(rq);
60462306a36Sopenharmony_ci		if (flags & BLK_MQ_INSERT_AT_HEAD)
60562306a36Sopenharmony_ci			list_move(&rq->queuelist, head);
60662306a36Sopenharmony_ci		else
60762306a36Sopenharmony_ci			list_move_tail(&rq->queuelist, head);
60862306a36Sopenharmony_ci		sbitmap_set_bit(&khd->kcq_map[sched_domain],
60962306a36Sopenharmony_ci				rq->mq_ctx->index_hw[hctx->type]);
61062306a36Sopenharmony_ci		spin_unlock(&kcq->lock);
61162306a36Sopenharmony_ci	}
61262306a36Sopenharmony_ci}
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_cistatic void kyber_finish_request(struct request *rq)
61562306a36Sopenharmony_ci{
61662306a36Sopenharmony_ci	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	rq_clear_domain_token(kqd, rq);
61962306a36Sopenharmony_ci}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_cistatic void add_latency_sample(struct kyber_cpu_latency *cpu_latency,
62262306a36Sopenharmony_ci			       unsigned int sched_domain, unsigned int type,
62362306a36Sopenharmony_ci			       u64 target, u64 latency)
62462306a36Sopenharmony_ci{
62562306a36Sopenharmony_ci	unsigned int bucket;
62662306a36Sopenharmony_ci	u64 divisor;
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	if (latency > 0) {
62962306a36Sopenharmony_ci		divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1);
63062306a36Sopenharmony_ci		bucket = min_t(unsigned int, div64_u64(latency - 1, divisor),
63162306a36Sopenharmony_ci			       KYBER_LATENCY_BUCKETS - 1);
63262306a36Sopenharmony_ci	} else {
63362306a36Sopenharmony_ci		bucket = 0;
63462306a36Sopenharmony_ci	}
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]);
63762306a36Sopenharmony_ci}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_cistatic void kyber_completed_request(struct request *rq, u64 now)
64062306a36Sopenharmony_ci{
64162306a36Sopenharmony_ci	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
64262306a36Sopenharmony_ci	struct kyber_cpu_latency *cpu_latency;
64362306a36Sopenharmony_ci	unsigned int sched_domain;
64462306a36Sopenharmony_ci	u64 target;
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	sched_domain = kyber_sched_domain(rq->cmd_flags);
64762306a36Sopenharmony_ci	if (sched_domain == KYBER_OTHER)
64862306a36Sopenharmony_ci		return;
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	cpu_latency = get_cpu_ptr(kqd->cpu_latency);
65162306a36Sopenharmony_ci	target = kqd->latency_targets[sched_domain];
65262306a36Sopenharmony_ci	add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY,
65362306a36Sopenharmony_ci			   target, now - rq->start_time_ns);
65462306a36Sopenharmony_ci	add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target,
65562306a36Sopenharmony_ci			   now - rq->io_start_time_ns);
65662306a36Sopenharmony_ci	put_cpu_ptr(kqd->cpu_latency);
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci	timer_reduce(&kqd->timer, jiffies + HZ / 10);
65962306a36Sopenharmony_ci}
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_cistruct flush_kcq_data {
66262306a36Sopenharmony_ci	struct kyber_hctx_data *khd;
66362306a36Sopenharmony_ci	unsigned int sched_domain;
66462306a36Sopenharmony_ci	struct list_head *list;
66562306a36Sopenharmony_ci};
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_cistatic bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data)
66862306a36Sopenharmony_ci{
66962306a36Sopenharmony_ci	struct flush_kcq_data *flush_data = data;
67062306a36Sopenharmony_ci	struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr];
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	spin_lock(&kcq->lock);
67362306a36Sopenharmony_ci	list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain],
67462306a36Sopenharmony_ci			      flush_data->list);
67562306a36Sopenharmony_ci	sbitmap_clear_bit(sb, bitnr);
67662306a36Sopenharmony_ci	spin_unlock(&kcq->lock);
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	return true;
67962306a36Sopenharmony_ci}
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_cistatic void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
68262306a36Sopenharmony_ci				  unsigned int sched_domain,
68362306a36Sopenharmony_ci				  struct list_head *list)
68462306a36Sopenharmony_ci{
68562306a36Sopenharmony_ci	struct flush_kcq_data data = {
68662306a36Sopenharmony_ci		.khd = khd,
68762306a36Sopenharmony_ci		.sched_domain = sched_domain,
68862306a36Sopenharmony_ci		.list = list,
68962306a36Sopenharmony_ci	};
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	sbitmap_for_each_set(&khd->kcq_map[sched_domain],
69262306a36Sopenharmony_ci			     flush_busy_kcq, &data);
69362306a36Sopenharmony_ci}
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_cistatic int kyber_domain_wake(wait_queue_entry_t *wqe, unsigned mode, int flags,
69662306a36Sopenharmony_ci			     void *key)
69762306a36Sopenharmony_ci{
69862306a36Sopenharmony_ci	struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private);
69962306a36Sopenharmony_ci	struct sbq_wait *wait = container_of(wqe, struct sbq_wait, wait);
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_ci	sbitmap_del_wait_queue(wait);
70262306a36Sopenharmony_ci	blk_mq_run_hw_queue(hctx, true);
70362306a36Sopenharmony_ci	return 1;
70462306a36Sopenharmony_ci}
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_cistatic int kyber_get_domain_token(struct kyber_queue_data *kqd,
70762306a36Sopenharmony_ci				  struct kyber_hctx_data *khd,
70862306a36Sopenharmony_ci				  struct blk_mq_hw_ctx *hctx)
70962306a36Sopenharmony_ci{
71062306a36Sopenharmony_ci	unsigned int sched_domain = khd->cur_domain;
71162306a36Sopenharmony_ci	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
71262306a36Sopenharmony_ci	struct sbq_wait *wait = &khd->domain_wait[sched_domain];
71362306a36Sopenharmony_ci	struct sbq_wait_state *ws;
71462306a36Sopenharmony_ci	int nr;
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	nr = __sbitmap_queue_get(domain_tokens);
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	/*
71962306a36Sopenharmony_ci	 * If we failed to get a domain token, make sure the hardware queue is
72062306a36Sopenharmony_ci	 * run when one becomes available. Note that this is serialized on
72162306a36Sopenharmony_ci	 * khd->lock, but we still need to be careful about the waker.
72262306a36Sopenharmony_ci	 */
72362306a36Sopenharmony_ci	if (nr < 0 && list_empty_careful(&wait->wait.entry)) {
72462306a36Sopenharmony_ci		ws = sbq_wait_ptr(domain_tokens,
72562306a36Sopenharmony_ci				  &khd->wait_index[sched_domain]);
72662306a36Sopenharmony_ci		khd->domain_ws[sched_domain] = ws;
72762306a36Sopenharmony_ci		sbitmap_add_wait_queue(domain_tokens, ws, wait);
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci		/*
73062306a36Sopenharmony_ci		 * Try again in case a token was freed before we got on the wait
73162306a36Sopenharmony_ci		 * queue.
73262306a36Sopenharmony_ci		 */
73362306a36Sopenharmony_ci		nr = __sbitmap_queue_get(domain_tokens);
73462306a36Sopenharmony_ci	}
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	/*
73762306a36Sopenharmony_ci	 * If we got a token while we were on the wait queue, remove ourselves
73862306a36Sopenharmony_ci	 * from the wait queue to ensure that all wake ups make forward
73962306a36Sopenharmony_ci	 * progress. It's possible that the waker already deleted the entry
74062306a36Sopenharmony_ci	 * between the !list_empty_careful() check and us grabbing the lock, but
74162306a36Sopenharmony_ci	 * list_del_init() is okay with that.
74262306a36Sopenharmony_ci	 */
74362306a36Sopenharmony_ci	if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) {
74462306a36Sopenharmony_ci		ws = khd->domain_ws[sched_domain];
74562306a36Sopenharmony_ci		spin_lock_irq(&ws->wait.lock);
74662306a36Sopenharmony_ci		sbitmap_del_wait_queue(wait);
74762306a36Sopenharmony_ci		spin_unlock_irq(&ws->wait.lock);
74862306a36Sopenharmony_ci	}
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	return nr;
75162306a36Sopenharmony_ci}
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_cistatic struct request *
75462306a36Sopenharmony_cikyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
75562306a36Sopenharmony_ci			  struct kyber_hctx_data *khd,
75662306a36Sopenharmony_ci			  struct blk_mq_hw_ctx *hctx)
75762306a36Sopenharmony_ci{
75862306a36Sopenharmony_ci	struct list_head *rqs;
75962306a36Sopenharmony_ci	struct request *rq;
76062306a36Sopenharmony_ci	int nr;
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci	rqs = &khd->rqs[khd->cur_domain];
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	/*
76562306a36Sopenharmony_ci	 * If we already have a flushed request, then we just need to get a
76662306a36Sopenharmony_ci	 * token for it. Otherwise, if there are pending requests in the kcqs,
76762306a36Sopenharmony_ci	 * flush the kcqs, but only if we can get a token. If not, we should
76862306a36Sopenharmony_ci	 * leave the requests in the kcqs so that they can be merged. Note that
76962306a36Sopenharmony_ci	 * khd->lock serializes the flushes, so if we observed any bit set in
77062306a36Sopenharmony_ci	 * the kcq_map, we will always get a request.
77162306a36Sopenharmony_ci	 */
77262306a36Sopenharmony_ci	rq = list_first_entry_or_null(rqs, struct request, queuelist);
77362306a36Sopenharmony_ci	if (rq) {
77462306a36Sopenharmony_ci		nr = kyber_get_domain_token(kqd, khd, hctx);
77562306a36Sopenharmony_ci		if (nr >= 0) {
77662306a36Sopenharmony_ci			khd->batching++;
77762306a36Sopenharmony_ci			rq_set_domain_token(rq, nr);
77862306a36Sopenharmony_ci			list_del_init(&rq->queuelist);
77962306a36Sopenharmony_ci			return rq;
78062306a36Sopenharmony_ci		} else {
78162306a36Sopenharmony_ci			trace_kyber_throttled(kqd->dev,
78262306a36Sopenharmony_ci					      kyber_domain_names[khd->cur_domain]);
78362306a36Sopenharmony_ci		}
78462306a36Sopenharmony_ci	} else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
78562306a36Sopenharmony_ci		nr = kyber_get_domain_token(kqd, khd, hctx);
78662306a36Sopenharmony_ci		if (nr >= 0) {
78762306a36Sopenharmony_ci			kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs);
78862306a36Sopenharmony_ci			rq = list_first_entry(rqs, struct request, queuelist);
78962306a36Sopenharmony_ci			khd->batching++;
79062306a36Sopenharmony_ci			rq_set_domain_token(rq, nr);
79162306a36Sopenharmony_ci			list_del_init(&rq->queuelist);
79262306a36Sopenharmony_ci			return rq;
79362306a36Sopenharmony_ci		} else {
79462306a36Sopenharmony_ci			trace_kyber_throttled(kqd->dev,
79562306a36Sopenharmony_ci					      kyber_domain_names[khd->cur_domain]);
79662306a36Sopenharmony_ci		}
79762306a36Sopenharmony_ci	}
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	/* There were either no pending requests or no tokens. */
80062306a36Sopenharmony_ci	return NULL;
80162306a36Sopenharmony_ci}
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_cistatic struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
80462306a36Sopenharmony_ci{
80562306a36Sopenharmony_ci	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
80662306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;
80762306a36Sopenharmony_ci	struct request *rq;
80862306a36Sopenharmony_ci	int i;
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	spin_lock(&khd->lock);
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci	/*
81362306a36Sopenharmony_ci	 * First, if we are still entitled to batch, try to dispatch a request
81462306a36Sopenharmony_ci	 * from the batch.
81562306a36Sopenharmony_ci	 */
81662306a36Sopenharmony_ci	if (khd->batching < kyber_batch_size[khd->cur_domain]) {
81762306a36Sopenharmony_ci		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
81862306a36Sopenharmony_ci		if (rq)
81962306a36Sopenharmony_ci			goto out;
82062306a36Sopenharmony_ci	}
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	/*
82362306a36Sopenharmony_ci	 * Either,
82462306a36Sopenharmony_ci	 * 1. We were no longer entitled to a batch.
82562306a36Sopenharmony_ci	 * 2. The domain we were batching didn't have any requests.
82662306a36Sopenharmony_ci	 * 3. The domain we were batching was out of tokens.
82762306a36Sopenharmony_ci	 *
82862306a36Sopenharmony_ci	 * Start another batch. Note that this wraps back around to the original
82962306a36Sopenharmony_ci	 * domain if no other domains have requests or tokens.
83062306a36Sopenharmony_ci	 */
83162306a36Sopenharmony_ci	khd->batching = 0;
83262306a36Sopenharmony_ci	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
83362306a36Sopenharmony_ci		if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
83462306a36Sopenharmony_ci			khd->cur_domain = 0;
83562306a36Sopenharmony_ci		else
83662306a36Sopenharmony_ci			khd->cur_domain++;
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
83962306a36Sopenharmony_ci		if (rq)
84062306a36Sopenharmony_ci			goto out;
84162306a36Sopenharmony_ci	}
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci	rq = NULL;
84462306a36Sopenharmony_ciout:
84562306a36Sopenharmony_ci	spin_unlock(&khd->lock);
84662306a36Sopenharmony_ci	return rq;
84762306a36Sopenharmony_ci}
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_cistatic bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
85062306a36Sopenharmony_ci{
85162306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;
85262306a36Sopenharmony_ci	int i;
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
85562306a36Sopenharmony_ci		if (!list_empty_careful(&khd->rqs[i]) ||
85662306a36Sopenharmony_ci		    sbitmap_any_bit_set(&khd->kcq_map[i]))
85762306a36Sopenharmony_ci			return true;
85862306a36Sopenharmony_ci	}
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	return false;
86162306a36Sopenharmony_ci}
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci#define KYBER_LAT_SHOW_STORE(domain, name)				\
86462306a36Sopenharmony_cistatic ssize_t kyber_##name##_lat_show(struct elevator_queue *e,	\
86562306a36Sopenharmony_ci				       char *page)			\
86662306a36Sopenharmony_ci{									\
86762306a36Sopenharmony_ci	struct kyber_queue_data *kqd = e->elevator_data;		\
86862306a36Sopenharmony_ci									\
86962306a36Sopenharmony_ci	return sprintf(page, "%llu\n", kqd->latency_targets[domain]);	\
87062306a36Sopenharmony_ci}									\
87162306a36Sopenharmony_ci									\
87262306a36Sopenharmony_cistatic ssize_t kyber_##name##_lat_store(struct elevator_queue *e,	\
87362306a36Sopenharmony_ci					const char *page, size_t count)	\
87462306a36Sopenharmony_ci{									\
87562306a36Sopenharmony_ci	struct kyber_queue_data *kqd = e->elevator_data;		\
87662306a36Sopenharmony_ci	unsigned long long nsec;					\
87762306a36Sopenharmony_ci	int ret;							\
87862306a36Sopenharmony_ci									\
87962306a36Sopenharmony_ci	ret = kstrtoull(page, 10, &nsec);				\
88062306a36Sopenharmony_ci	if (ret)							\
88162306a36Sopenharmony_ci		return ret;						\
88262306a36Sopenharmony_ci									\
88362306a36Sopenharmony_ci	kqd->latency_targets[domain] = nsec;				\
88462306a36Sopenharmony_ci									\
88562306a36Sopenharmony_ci	return count;							\
88662306a36Sopenharmony_ci}
88762306a36Sopenharmony_ciKYBER_LAT_SHOW_STORE(KYBER_READ, read);
88862306a36Sopenharmony_ciKYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
88962306a36Sopenharmony_ci#undef KYBER_LAT_SHOW_STORE
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
89262306a36Sopenharmony_cistatic struct elv_fs_entry kyber_sched_attrs[] = {
89362306a36Sopenharmony_ci	KYBER_LAT_ATTR(read),
89462306a36Sopenharmony_ci	KYBER_LAT_ATTR(write),
89562306a36Sopenharmony_ci	__ATTR_NULL
89662306a36Sopenharmony_ci};
89762306a36Sopenharmony_ci#undef KYBER_LAT_ATTR
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci#ifdef CONFIG_BLK_DEBUG_FS
90062306a36Sopenharmony_ci#define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name)			\
90162306a36Sopenharmony_cistatic int kyber_##name##_tokens_show(void *data, struct seq_file *m)	\
90262306a36Sopenharmony_ci{									\
90362306a36Sopenharmony_ci	struct request_queue *q = data;					\
90462306a36Sopenharmony_ci	struct kyber_queue_data *kqd = q->elevator->elevator_data;	\
90562306a36Sopenharmony_ci									\
90662306a36Sopenharmony_ci	sbitmap_queue_show(&kqd->domain_tokens[domain], m);		\
90762306a36Sopenharmony_ci	return 0;							\
90862306a36Sopenharmony_ci}									\
90962306a36Sopenharmony_ci									\
91062306a36Sopenharmony_cistatic void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos)	\
91162306a36Sopenharmony_ci	__acquires(&khd->lock)						\
91262306a36Sopenharmony_ci{									\
91362306a36Sopenharmony_ci	struct blk_mq_hw_ctx *hctx = m->private;			\
91462306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;			\
91562306a36Sopenharmony_ci									\
91662306a36Sopenharmony_ci	spin_lock(&khd->lock);						\
91762306a36Sopenharmony_ci	return seq_list_start(&khd->rqs[domain], *pos);			\
91862306a36Sopenharmony_ci}									\
91962306a36Sopenharmony_ci									\
92062306a36Sopenharmony_cistatic void *kyber_##name##_rqs_next(struct seq_file *m, void *v,	\
92162306a36Sopenharmony_ci				     loff_t *pos)			\
92262306a36Sopenharmony_ci{									\
92362306a36Sopenharmony_ci	struct blk_mq_hw_ctx *hctx = m->private;			\
92462306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;			\
92562306a36Sopenharmony_ci									\
92662306a36Sopenharmony_ci	return seq_list_next(v, &khd->rqs[domain], pos);		\
92762306a36Sopenharmony_ci}									\
92862306a36Sopenharmony_ci									\
92962306a36Sopenharmony_cistatic void kyber_##name##_rqs_stop(struct seq_file *m, void *v)	\
93062306a36Sopenharmony_ci	__releases(&khd->lock)						\
93162306a36Sopenharmony_ci{									\
93262306a36Sopenharmony_ci	struct blk_mq_hw_ctx *hctx = m->private;			\
93362306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;			\
93462306a36Sopenharmony_ci									\
93562306a36Sopenharmony_ci	spin_unlock(&khd->lock);					\
93662306a36Sopenharmony_ci}									\
93762306a36Sopenharmony_ci									\
93862306a36Sopenharmony_cistatic const struct seq_operations kyber_##name##_rqs_seq_ops = {	\
93962306a36Sopenharmony_ci	.start	= kyber_##name##_rqs_start,				\
94062306a36Sopenharmony_ci	.next	= kyber_##name##_rqs_next,				\
94162306a36Sopenharmony_ci	.stop	= kyber_##name##_rqs_stop,				\
94262306a36Sopenharmony_ci	.show	= blk_mq_debugfs_rq_show,				\
94362306a36Sopenharmony_ci};									\
94462306a36Sopenharmony_ci									\
94562306a36Sopenharmony_cistatic int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
94662306a36Sopenharmony_ci{									\
94762306a36Sopenharmony_ci	struct blk_mq_hw_ctx *hctx = data;				\
94862306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;			\
94962306a36Sopenharmony_ci	wait_queue_entry_t *wait = &khd->domain_wait[domain].wait;	\
95062306a36Sopenharmony_ci									\
95162306a36Sopenharmony_ci	seq_printf(m, "%d\n", !list_empty_careful(&wait->entry));	\
95262306a36Sopenharmony_ci	return 0;							\
95362306a36Sopenharmony_ci}
95462306a36Sopenharmony_ciKYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
95562306a36Sopenharmony_ciKYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write)
95662306a36Sopenharmony_ciKYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard)
95762306a36Sopenharmony_ciKYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
95862306a36Sopenharmony_ci#undef KYBER_DEBUGFS_DOMAIN_ATTRS
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_cistatic int kyber_async_depth_show(void *data, struct seq_file *m)
96162306a36Sopenharmony_ci{
96262306a36Sopenharmony_ci	struct request_queue *q = data;
96362306a36Sopenharmony_ci	struct kyber_queue_data *kqd = q->elevator->elevator_data;
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	seq_printf(m, "%u\n", kqd->async_depth);
96662306a36Sopenharmony_ci	return 0;
96762306a36Sopenharmony_ci}
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_cistatic int kyber_cur_domain_show(void *data, struct seq_file *m)
97062306a36Sopenharmony_ci{
97162306a36Sopenharmony_ci	struct blk_mq_hw_ctx *hctx = data;
97262306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_ci	seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]);
97562306a36Sopenharmony_ci	return 0;
97662306a36Sopenharmony_ci}
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_cistatic int kyber_batching_show(void *data, struct seq_file *m)
97962306a36Sopenharmony_ci{
98062306a36Sopenharmony_ci	struct blk_mq_hw_ctx *hctx = data;
98162306a36Sopenharmony_ci	struct kyber_hctx_data *khd = hctx->sched_data;
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	seq_printf(m, "%u\n", khd->batching);
98462306a36Sopenharmony_ci	return 0;
98562306a36Sopenharmony_ci}
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci#define KYBER_QUEUE_DOMAIN_ATTRS(name)	\
98862306a36Sopenharmony_ci	{#name "_tokens", 0400, kyber_##name##_tokens_show}
98962306a36Sopenharmony_cistatic const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
99062306a36Sopenharmony_ci	KYBER_QUEUE_DOMAIN_ATTRS(read),
99162306a36Sopenharmony_ci	KYBER_QUEUE_DOMAIN_ATTRS(write),
99262306a36Sopenharmony_ci	KYBER_QUEUE_DOMAIN_ATTRS(discard),
99362306a36Sopenharmony_ci	KYBER_QUEUE_DOMAIN_ATTRS(other),
99462306a36Sopenharmony_ci	{"async_depth", 0400, kyber_async_depth_show},
99562306a36Sopenharmony_ci	{},
99662306a36Sopenharmony_ci};
99762306a36Sopenharmony_ci#undef KYBER_QUEUE_DOMAIN_ATTRS
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci#define KYBER_HCTX_DOMAIN_ATTRS(name)					\
100062306a36Sopenharmony_ci	{#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops},	\
100162306a36Sopenharmony_ci	{#name "_waiting", 0400, kyber_##name##_waiting_show}
100262306a36Sopenharmony_cistatic const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
100362306a36Sopenharmony_ci	KYBER_HCTX_DOMAIN_ATTRS(read),
100462306a36Sopenharmony_ci	KYBER_HCTX_DOMAIN_ATTRS(write),
100562306a36Sopenharmony_ci	KYBER_HCTX_DOMAIN_ATTRS(discard),
100662306a36Sopenharmony_ci	KYBER_HCTX_DOMAIN_ATTRS(other),
100762306a36Sopenharmony_ci	{"cur_domain", 0400, kyber_cur_domain_show},
100862306a36Sopenharmony_ci	{"batching", 0400, kyber_batching_show},
100962306a36Sopenharmony_ci	{},
101062306a36Sopenharmony_ci};
101162306a36Sopenharmony_ci#undef KYBER_HCTX_DOMAIN_ATTRS
101262306a36Sopenharmony_ci#endif
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_cistatic struct elevator_type kyber_sched = {
101562306a36Sopenharmony_ci	.ops = {
101662306a36Sopenharmony_ci		.init_sched = kyber_init_sched,
101762306a36Sopenharmony_ci		.exit_sched = kyber_exit_sched,
101862306a36Sopenharmony_ci		.init_hctx = kyber_init_hctx,
101962306a36Sopenharmony_ci		.exit_hctx = kyber_exit_hctx,
102062306a36Sopenharmony_ci		.limit_depth = kyber_limit_depth,
102162306a36Sopenharmony_ci		.bio_merge = kyber_bio_merge,
102262306a36Sopenharmony_ci		.prepare_request = kyber_prepare_request,
102362306a36Sopenharmony_ci		.insert_requests = kyber_insert_requests,
102462306a36Sopenharmony_ci		.finish_request = kyber_finish_request,
102562306a36Sopenharmony_ci		.requeue_request = kyber_finish_request,
102662306a36Sopenharmony_ci		.completed_request = kyber_completed_request,
102762306a36Sopenharmony_ci		.dispatch_request = kyber_dispatch_request,
102862306a36Sopenharmony_ci		.has_work = kyber_has_work,
102962306a36Sopenharmony_ci		.depth_updated = kyber_depth_updated,
103062306a36Sopenharmony_ci	},
103162306a36Sopenharmony_ci#ifdef CONFIG_BLK_DEBUG_FS
103262306a36Sopenharmony_ci	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
103362306a36Sopenharmony_ci	.hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
103462306a36Sopenharmony_ci#endif
103562306a36Sopenharmony_ci	.elevator_attrs = kyber_sched_attrs,
103662306a36Sopenharmony_ci	.elevator_name = "kyber",
103762306a36Sopenharmony_ci	.elevator_owner = THIS_MODULE,
103862306a36Sopenharmony_ci};
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_cistatic int __init kyber_init(void)
104162306a36Sopenharmony_ci{
104262306a36Sopenharmony_ci	return elv_register(&kyber_sched);
104362306a36Sopenharmony_ci}
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_cistatic void __exit kyber_exit(void)
104662306a36Sopenharmony_ci{
104762306a36Sopenharmony_ci	elv_unregister(&kyber_sched);
104862306a36Sopenharmony_ci}
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_cimodule_init(kyber_init);
105162306a36Sopenharmony_cimodule_exit(kyber_exit);
105262306a36Sopenharmony_ci
105362306a36Sopenharmony_ciMODULE_AUTHOR("Omar Sandoval");
105462306a36Sopenharmony_ciMODULE_LICENSE("GPL");
105562306a36Sopenharmony_ciMODULE_DESCRIPTION("Kyber I/O scheduler");
1056