162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include "blk-rq-qos.h" 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci/* 662306a36Sopenharmony_ci * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, 762306a36Sopenharmony_ci * false if 'v' + 1 would be bigger than 'below'. 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_cistatic bool atomic_inc_below(atomic_t *v, unsigned int below) 1062306a36Sopenharmony_ci{ 1162306a36Sopenharmony_ci unsigned int cur = atomic_read(v); 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci do { 1462306a36Sopenharmony_ci if (cur >= below) 1562306a36Sopenharmony_ci return false; 1662306a36Sopenharmony_ci } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci return true; 1962306a36Sopenharmony_ci} 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cibool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) 2262306a36Sopenharmony_ci{ 2362306a36Sopenharmony_ci return atomic_inc_below(&rq_wait->inflight, limit); 2462306a36Sopenharmony_ci} 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_civoid __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) 2762306a36Sopenharmony_ci{ 2862306a36Sopenharmony_ci do { 2962306a36Sopenharmony_ci if (rqos->ops->cleanup) 3062306a36Sopenharmony_ci rqos->ops->cleanup(rqos, bio); 3162306a36Sopenharmony_ci rqos = rqos->next; 3262306a36Sopenharmony_ci } while (rqos); 3362306a36Sopenharmony_ci} 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_civoid __rq_qos_done(struct rq_qos *rqos, struct request *rq) 3662306a36Sopenharmony_ci{ 3762306a36Sopenharmony_ci do { 3862306a36Sopenharmony_ci if (rqos->ops->done) 3962306a36Sopenharmony_ci rqos->ops->done(rqos, rq); 4062306a36Sopenharmony_ci rqos = rqos->next; 4162306a36Sopenharmony_ci } while (rqos); 4262306a36Sopenharmony_ci} 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_civoid __rq_qos_issue(struct rq_qos *rqos, struct request *rq) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci do { 4762306a36Sopenharmony_ci if (rqos->ops->issue) 4862306a36Sopenharmony_ci rqos->ops->issue(rqos, rq); 4962306a36Sopenharmony_ci rqos = rqos->next; 5062306a36Sopenharmony_ci } while (rqos); 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_civoid __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci do { 5662306a36Sopenharmony_ci if (rqos->ops->requeue) 5762306a36Sopenharmony_ci rqos->ops->requeue(rqos, rq); 5862306a36Sopenharmony_ci rqos = rqos->next; 5962306a36Sopenharmony_ci } while (rqos); 6062306a36Sopenharmony_ci} 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_civoid __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) 6362306a36Sopenharmony_ci{ 6462306a36Sopenharmony_ci do { 6562306a36Sopenharmony_ci if (rqos->ops->throttle) 6662306a36Sopenharmony_ci rqos->ops->throttle(rqos, bio); 6762306a36Sopenharmony_ci rqos = rqos->next; 6862306a36Sopenharmony_ci } while (rqos); 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_civoid __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci do { 7462306a36Sopenharmony_ci if (rqos->ops->track) 7562306a36Sopenharmony_ci rqos->ops->track(rqos, rq, bio); 7662306a36Sopenharmony_ci rqos = rqos->next; 7762306a36Sopenharmony_ci } while (rqos); 7862306a36Sopenharmony_ci} 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_civoid __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) 8162306a36Sopenharmony_ci{ 8262306a36Sopenharmony_ci do { 8362306a36Sopenharmony_ci if (rqos->ops->merge) 8462306a36Sopenharmony_ci rqos->ops->merge(rqos, rq, bio); 8562306a36Sopenharmony_ci rqos = rqos->next; 8662306a36Sopenharmony_ci } while (rqos); 8762306a36Sopenharmony_ci} 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_civoid __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) 9062306a36Sopenharmony_ci{ 9162306a36Sopenharmony_ci do { 9262306a36Sopenharmony_ci if (rqos->ops->done_bio) 9362306a36Sopenharmony_ci rqos->ops->done_bio(rqos, bio); 9462306a36Sopenharmony_ci rqos = rqos->next; 9562306a36Sopenharmony_ci } while (rqos); 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_civoid __rq_qos_queue_depth_changed(struct rq_qos *rqos) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci do { 10162306a36Sopenharmony_ci if (rqos->ops->queue_depth_changed) 10262306a36Sopenharmony_ci rqos->ops->queue_depth_changed(rqos); 10362306a36Sopenharmony_ci rqos = rqos->next; 10462306a36Sopenharmony_ci } while (rqos); 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci/* 10862306a36Sopenharmony_ci * Return true, if we can't increase the depth further by scaling 10962306a36Sopenharmony_ci */ 11062306a36Sopenharmony_cibool rq_depth_calc_max_depth(struct rq_depth *rqd) 11162306a36Sopenharmony_ci{ 11262306a36Sopenharmony_ci unsigned int depth; 11362306a36Sopenharmony_ci bool ret = false; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci /* 11662306a36Sopenharmony_ci * For QD=1 devices, this is a special case. It's important for those 11762306a36Sopenharmony_ci * to have one request ready when one completes, so force a depth of 11862306a36Sopenharmony_ci * 2 for those devices. On the backend, it'll be a depth of 1 anyway, 11962306a36Sopenharmony_ci * since the device can't have more than that in flight. If we're 12062306a36Sopenharmony_ci * scaling down, then keep a setting of 1/1/1. 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_ci if (rqd->queue_depth == 1) { 12362306a36Sopenharmony_ci if (rqd->scale_step > 0) 12462306a36Sopenharmony_ci rqd->max_depth = 1; 12562306a36Sopenharmony_ci else { 12662306a36Sopenharmony_ci rqd->max_depth = 2; 12762306a36Sopenharmony_ci ret = true; 12862306a36Sopenharmony_ci } 12962306a36Sopenharmony_ci } else { 13062306a36Sopenharmony_ci /* 13162306a36Sopenharmony_ci * scale_step == 0 is our default state. If we have suffered 13262306a36Sopenharmony_ci * latency spikes, step will be > 0, and we shrink the 13362306a36Sopenharmony_ci * allowed write depths. If step is < 0, we're only doing 13462306a36Sopenharmony_ci * writes, and we allow a temporarily higher depth to 13562306a36Sopenharmony_ci * increase performance. 13662306a36Sopenharmony_ci */ 13762306a36Sopenharmony_ci depth = min_t(unsigned int, rqd->default_depth, 13862306a36Sopenharmony_ci rqd->queue_depth); 13962306a36Sopenharmony_ci if (rqd->scale_step > 0) 14062306a36Sopenharmony_ci depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); 14162306a36Sopenharmony_ci else if (rqd->scale_step < 0) { 14262306a36Sopenharmony_ci unsigned int maxd = 3 * rqd->queue_depth / 4; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci depth = 1 + ((depth - 1) << -rqd->scale_step); 14562306a36Sopenharmony_ci if (depth > maxd) { 14662306a36Sopenharmony_ci depth = maxd; 14762306a36Sopenharmony_ci ret = true; 14862306a36Sopenharmony_ci } 14962306a36Sopenharmony_ci } 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci rqd->max_depth = depth; 15262306a36Sopenharmony_ci } 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci return ret; 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci/* Returns true on success and false if scaling up wasn't possible */ 15862306a36Sopenharmony_cibool rq_depth_scale_up(struct rq_depth *rqd) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci /* 16162306a36Sopenharmony_ci * Hit max in previous round, stop here 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ci if (rqd->scaled_max) 16462306a36Sopenharmony_ci return false; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci rqd->scale_step--; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci rqd->scaled_max = rq_depth_calc_max_depth(rqd); 16962306a36Sopenharmony_ci return true; 17062306a36Sopenharmony_ci} 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci/* 17362306a36Sopenharmony_ci * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we 17462306a36Sopenharmony_ci * had a latency violation. Returns true on success and returns false if 17562306a36Sopenharmony_ci * scaling down wasn't possible. 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_cibool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci /* 18062306a36Sopenharmony_ci * Stop scaling down when we've hit the limit. This also prevents 18162306a36Sopenharmony_ci * ->scale_step from going to crazy values, if the device can't 18262306a36Sopenharmony_ci * keep up. 18362306a36Sopenharmony_ci */ 18462306a36Sopenharmony_ci if (rqd->max_depth == 1) 18562306a36Sopenharmony_ci return false; 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci if (rqd->scale_step < 0 && hard_throttle) 18862306a36Sopenharmony_ci rqd->scale_step = 0; 18962306a36Sopenharmony_ci else 19062306a36Sopenharmony_ci rqd->scale_step++; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci rqd->scaled_max = false; 19362306a36Sopenharmony_ci rq_depth_calc_max_depth(rqd); 19462306a36Sopenharmony_ci return true; 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_cistruct rq_qos_wait_data { 19862306a36Sopenharmony_ci struct wait_queue_entry wq; 19962306a36Sopenharmony_ci struct task_struct *task; 20062306a36Sopenharmony_ci struct rq_wait *rqw; 20162306a36Sopenharmony_ci acquire_inflight_cb_t *cb; 20262306a36Sopenharmony_ci void *private_data; 20362306a36Sopenharmony_ci bool got_token; 20462306a36Sopenharmony_ci}; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_cistatic int rq_qos_wake_function(struct wait_queue_entry *curr, 20762306a36Sopenharmony_ci unsigned int mode, int wake_flags, void *key) 20862306a36Sopenharmony_ci{ 20962306a36Sopenharmony_ci struct rq_qos_wait_data *data = container_of(curr, 21062306a36Sopenharmony_ci struct rq_qos_wait_data, 21162306a36Sopenharmony_ci wq); 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci /* 21462306a36Sopenharmony_ci * If we fail to get a budget, return -1 to interrupt the wake up loop 21562306a36Sopenharmony_ci * in __wake_up_common. 21662306a36Sopenharmony_ci */ 21762306a36Sopenharmony_ci if (!data->cb(data->rqw, data->private_data)) 21862306a36Sopenharmony_ci return -1; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci data->got_token = true; 22162306a36Sopenharmony_ci smp_wmb(); 22262306a36Sopenharmony_ci list_del_init(&curr->entry); 22362306a36Sopenharmony_ci wake_up_process(data->task); 22462306a36Sopenharmony_ci return 1; 22562306a36Sopenharmony_ci} 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci/** 22862306a36Sopenharmony_ci * rq_qos_wait - throttle on a rqw if we need to 22962306a36Sopenharmony_ci * @rqw: rqw to throttle on 23062306a36Sopenharmony_ci * @private_data: caller provided specific data 23162306a36Sopenharmony_ci * @acquire_inflight_cb: inc the rqw->inflight counter if we can 23262306a36Sopenharmony_ci * @cleanup_cb: the callback to cleanup in case we race with a waker 23362306a36Sopenharmony_ci * 23462306a36Sopenharmony_ci * This provides a uniform place for the rq_qos users to do their throttling. 23562306a36Sopenharmony_ci * Since you can end up with a lot of things sleeping at once, this manages the 23662306a36Sopenharmony_ci * waking up based on the resources available. The acquire_inflight_cb should 23762306a36Sopenharmony_ci * inc the rqw->inflight if we have the ability to do so, or return false if not 23862306a36Sopenharmony_ci * and then we will sleep until the room becomes available. 23962306a36Sopenharmony_ci * 24062306a36Sopenharmony_ci * cleanup_cb is in case that we race with a waker and need to cleanup the 24162306a36Sopenharmony_ci * inflight count accordingly. 24262306a36Sopenharmony_ci */ 24362306a36Sopenharmony_civoid rq_qos_wait(struct rq_wait *rqw, void *private_data, 24462306a36Sopenharmony_ci acquire_inflight_cb_t *acquire_inflight_cb, 24562306a36Sopenharmony_ci cleanup_cb_t *cleanup_cb) 24662306a36Sopenharmony_ci{ 24762306a36Sopenharmony_ci struct rq_qos_wait_data data = { 24862306a36Sopenharmony_ci .wq = { 24962306a36Sopenharmony_ci .func = rq_qos_wake_function, 25062306a36Sopenharmony_ci .entry = LIST_HEAD_INIT(data.wq.entry), 25162306a36Sopenharmony_ci }, 25262306a36Sopenharmony_ci .task = current, 25362306a36Sopenharmony_ci .rqw = rqw, 25462306a36Sopenharmony_ci .cb = acquire_inflight_cb, 25562306a36Sopenharmony_ci .private_data = private_data, 25662306a36Sopenharmony_ci }; 25762306a36Sopenharmony_ci bool has_sleeper; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci has_sleeper = wq_has_sleeper(&rqw->wait); 26062306a36Sopenharmony_ci if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) 26162306a36Sopenharmony_ci return; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, 26462306a36Sopenharmony_ci TASK_UNINTERRUPTIBLE); 26562306a36Sopenharmony_ci do { 26662306a36Sopenharmony_ci /* The memory barrier in set_task_state saves us here. */ 26762306a36Sopenharmony_ci if (data.got_token) 26862306a36Sopenharmony_ci break; 26962306a36Sopenharmony_ci if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { 27062306a36Sopenharmony_ci finish_wait(&rqw->wait, &data.wq); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci /* 27362306a36Sopenharmony_ci * We raced with rq_qos_wake_function() getting a token, 27462306a36Sopenharmony_ci * which means we now have two. Put our local token 27562306a36Sopenharmony_ci * and wake anyone else potentially waiting for one. 27662306a36Sopenharmony_ci */ 27762306a36Sopenharmony_ci smp_rmb(); 27862306a36Sopenharmony_ci if (data.got_token) 27962306a36Sopenharmony_ci cleanup_cb(rqw, private_data); 28062306a36Sopenharmony_ci break; 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci io_schedule(); 28362306a36Sopenharmony_ci has_sleeper = true; 28462306a36Sopenharmony_ci set_current_state(TASK_UNINTERRUPTIBLE); 28562306a36Sopenharmony_ci } while (1); 28662306a36Sopenharmony_ci finish_wait(&rqw->wait, &data.wq); 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_civoid rq_qos_exit(struct request_queue *q) 29062306a36Sopenharmony_ci{ 29162306a36Sopenharmony_ci mutex_lock(&q->rq_qos_mutex); 29262306a36Sopenharmony_ci while (q->rq_qos) { 29362306a36Sopenharmony_ci struct rq_qos *rqos = q->rq_qos; 29462306a36Sopenharmony_ci q->rq_qos = rqos->next; 29562306a36Sopenharmony_ci rqos->ops->exit(rqos); 29662306a36Sopenharmony_ci } 29762306a36Sopenharmony_ci mutex_unlock(&q->rq_qos_mutex); 29862306a36Sopenharmony_ci} 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ciint rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, 30162306a36Sopenharmony_ci const struct rq_qos_ops *ops) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci struct request_queue *q = disk->queue; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci lockdep_assert_held(&q->rq_qos_mutex); 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci rqos->disk = disk; 30862306a36Sopenharmony_ci rqos->id = id; 30962306a36Sopenharmony_ci rqos->ops = ops; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci /* 31262306a36Sopenharmony_ci * No IO can be in-flight when adding rqos, so freeze queue, which 31362306a36Sopenharmony_ci * is fine since we only support rq_qos for blk-mq queue. 31462306a36Sopenharmony_ci */ 31562306a36Sopenharmony_ci blk_mq_freeze_queue(q); 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci if (rq_qos_id(q, rqos->id)) 31862306a36Sopenharmony_ci goto ebusy; 31962306a36Sopenharmony_ci rqos->next = q->rq_qos; 32062306a36Sopenharmony_ci q->rq_qos = rqos; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci blk_mq_unfreeze_queue(q); 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci if (rqos->ops->debugfs_attrs) { 32562306a36Sopenharmony_ci mutex_lock(&q->debugfs_mutex); 32662306a36Sopenharmony_ci blk_mq_debugfs_register_rqos(rqos); 32762306a36Sopenharmony_ci mutex_unlock(&q->debugfs_mutex); 32862306a36Sopenharmony_ci } 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci return 0; 33162306a36Sopenharmony_ciebusy: 33262306a36Sopenharmony_ci blk_mq_unfreeze_queue(q); 33362306a36Sopenharmony_ci return -EBUSY; 33462306a36Sopenharmony_ci} 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_civoid rq_qos_del(struct rq_qos *rqos) 33762306a36Sopenharmony_ci{ 33862306a36Sopenharmony_ci struct request_queue *q = rqos->disk->queue; 33962306a36Sopenharmony_ci struct rq_qos **cur; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci lockdep_assert_held(&q->rq_qos_mutex); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci blk_mq_freeze_queue(q); 34462306a36Sopenharmony_ci for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { 34562306a36Sopenharmony_ci if (*cur == rqos) { 34662306a36Sopenharmony_ci *cur = rqos->next; 34762306a36Sopenharmony_ci break; 34862306a36Sopenharmony_ci } 34962306a36Sopenharmony_ci } 35062306a36Sopenharmony_ci blk_mq_unfreeze_queue(q); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci mutex_lock(&q->debugfs_mutex); 35362306a36Sopenharmony_ci blk_mq_debugfs_unregister_rqos(rqos); 35462306a36Sopenharmony_ci mutex_unlock(&q->debugfs_mutex); 35562306a36Sopenharmony_ci} 356