162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * blk-mq scheduling framework 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2016 Jens Axboe 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#include <linux/kernel.h> 862306a36Sopenharmony_ci#include <linux/module.h> 962306a36Sopenharmony_ci#include <linux/list_sort.h> 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <trace/events/block.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include "blk.h" 1462306a36Sopenharmony_ci#include "blk-mq.h" 1562306a36Sopenharmony_ci#include "blk-mq-debugfs.h" 1662306a36Sopenharmony_ci#include "blk-mq-sched.h" 1762306a36Sopenharmony_ci#include "blk-wbt.h" 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci/* 2062306a36Sopenharmony_ci * Mark a hardware queue as needing a restart. 2162306a36Sopenharmony_ci */ 2262306a36Sopenharmony_civoid blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) 2362306a36Sopenharmony_ci{ 2462306a36Sopenharmony_ci if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 2562306a36Sopenharmony_ci return; 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 2862306a36Sopenharmony_ci} 2962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx); 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_civoid __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) 3262306a36Sopenharmony_ci{ 3362306a36Sopenharmony_ci clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci /* 3662306a36Sopenharmony_ci * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch) 3762306a36Sopenharmony_ci * in blk_mq_run_hw_queue(). Its pair is the barrier in 3862306a36Sopenharmony_ci * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART, 3962306a36Sopenharmony_ci * meantime new request added to hctx->dispatch is missed to check in 4062306a36Sopenharmony_ci * blk_mq_run_hw_queue(). 4162306a36Sopenharmony_ci */ 4262306a36Sopenharmony_ci smp_mb(); 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci blk_mq_run_hw_queue(hctx, true); 4562306a36Sopenharmony_ci} 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_cistatic int sched_rq_cmp(void *priv, const struct list_head *a, 4862306a36Sopenharmony_ci const struct list_head *b) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci struct request *rqa = container_of(a, struct request, queuelist); 5162306a36Sopenharmony_ci struct request *rqb = container_of(b, struct request, queuelist); 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci return rqa->mq_hctx > rqb->mq_hctx; 5462306a36Sopenharmony_ci} 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistatic bool blk_mq_dispatch_hctx_list(struct list_head *rq_list) 5762306a36Sopenharmony_ci{ 5862306a36Sopenharmony_ci struct blk_mq_hw_ctx *hctx = 5962306a36Sopenharmony_ci list_first_entry(rq_list, struct request, queuelist)->mq_hctx; 6062306a36Sopenharmony_ci struct request *rq; 6162306a36Sopenharmony_ci LIST_HEAD(hctx_list); 6262306a36Sopenharmony_ci unsigned int count = 0; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci list_for_each_entry(rq, rq_list, queuelist) { 6562306a36Sopenharmony_ci if (rq->mq_hctx != hctx) { 6662306a36Sopenharmony_ci list_cut_before(&hctx_list, rq_list, &rq->queuelist); 6762306a36Sopenharmony_ci goto dispatch; 6862306a36Sopenharmony_ci } 6962306a36Sopenharmony_ci count++; 7062306a36Sopenharmony_ci } 7162306a36Sopenharmony_ci list_splice_tail_init(rq_list, &hctx_list); 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_cidispatch: 7462306a36Sopenharmony_ci return blk_mq_dispatch_rq_list(hctx, &hctx_list, count); 7562306a36Sopenharmony_ci} 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci#define BLK_MQ_BUDGET_DELAY 3 /* ms units */ 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci/* 8062306a36Sopenharmony_ci * Only SCSI implements .get_budget and .put_budget, and SCSI restarts 8162306a36Sopenharmony_ci * its queue by itself in its completion handler, so we don't need to 8262306a36Sopenharmony_ci * restart queue if .get_budget() fails to get the budget. 8362306a36Sopenharmony_ci * 8462306a36Sopenharmony_ci * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to 8562306a36Sopenharmony_ci * be run again. This is necessary to avoid starving flushes. 8662306a36Sopenharmony_ci */ 8762306a36Sopenharmony_cistatic int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci struct request_queue *q = hctx->queue; 9062306a36Sopenharmony_ci struct elevator_queue *e = q->elevator; 9162306a36Sopenharmony_ci bool multi_hctxs = false, run_queue = false; 9262306a36Sopenharmony_ci bool dispatched = false, busy = false; 9362306a36Sopenharmony_ci unsigned int max_dispatch; 9462306a36Sopenharmony_ci LIST_HEAD(rq_list); 9562306a36Sopenharmony_ci int count = 0; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci if (hctx->dispatch_busy) 9862306a36Sopenharmony_ci max_dispatch = 1; 9962306a36Sopenharmony_ci else 10062306a36Sopenharmony_ci max_dispatch = hctx->queue->nr_requests; 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci do { 10362306a36Sopenharmony_ci struct request *rq; 10462306a36Sopenharmony_ci int budget_token; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci if (e->type->ops.has_work && !e->type->ops.has_work(hctx)) 10762306a36Sopenharmony_ci break; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci if (!list_empty_careful(&hctx->dispatch)) { 11062306a36Sopenharmony_ci busy = true; 11162306a36Sopenharmony_ci break; 11262306a36Sopenharmony_ci } 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci budget_token = blk_mq_get_dispatch_budget(q); 11562306a36Sopenharmony_ci if (budget_token < 0) 11662306a36Sopenharmony_ci break; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci rq = e->type->ops.dispatch_request(hctx); 11962306a36Sopenharmony_ci if (!rq) { 12062306a36Sopenharmony_ci blk_mq_put_dispatch_budget(q, budget_token); 12162306a36Sopenharmony_ci /* 12262306a36Sopenharmony_ci * We're releasing without dispatching. Holding the 12362306a36Sopenharmony_ci * budget could have blocked any "hctx"s with the 12462306a36Sopenharmony_ci * same queue and if we didn't dispatch then there's 12562306a36Sopenharmony_ci * no guarantee anyone will kick the queue. Kick it 12662306a36Sopenharmony_ci * ourselves. 12762306a36Sopenharmony_ci */ 12862306a36Sopenharmony_ci run_queue = true; 12962306a36Sopenharmony_ci break; 13062306a36Sopenharmony_ci } 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci blk_mq_set_rq_budget_token(rq, budget_token); 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci /* 13562306a36Sopenharmony_ci * Now this rq owns the budget which has to be released 13662306a36Sopenharmony_ci * if this rq won't be queued to driver via .queue_rq() 13762306a36Sopenharmony_ci * in blk_mq_dispatch_rq_list(). 13862306a36Sopenharmony_ci */ 13962306a36Sopenharmony_ci list_add_tail(&rq->queuelist, &rq_list); 14062306a36Sopenharmony_ci count++; 14162306a36Sopenharmony_ci if (rq->mq_hctx != hctx) 14262306a36Sopenharmony_ci multi_hctxs = true; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci /* 14562306a36Sopenharmony_ci * If we cannot get tag for the request, stop dequeueing 14662306a36Sopenharmony_ci * requests from the IO scheduler. We are unlikely to be able 14762306a36Sopenharmony_ci * to submit them anyway and it creates false impression for 14862306a36Sopenharmony_ci * scheduling heuristics that the device can take more IO. 14962306a36Sopenharmony_ci */ 15062306a36Sopenharmony_ci if (!blk_mq_get_driver_tag(rq)) 15162306a36Sopenharmony_ci break; 15262306a36Sopenharmony_ci } while (count < max_dispatch); 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci if (!count) { 15562306a36Sopenharmony_ci if (run_queue) 15662306a36Sopenharmony_ci blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY); 15762306a36Sopenharmony_ci } else if (multi_hctxs) { 15862306a36Sopenharmony_ci /* 15962306a36Sopenharmony_ci * Requests from different hctx may be dequeued from some 16062306a36Sopenharmony_ci * schedulers, such as bfq and deadline. 16162306a36Sopenharmony_ci * 16262306a36Sopenharmony_ci * Sort the requests in the list according to their hctx, 16362306a36Sopenharmony_ci * dispatch batching requests from same hctx at a time. 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_ci list_sort(NULL, &rq_list, sched_rq_cmp); 16662306a36Sopenharmony_ci do { 16762306a36Sopenharmony_ci dispatched |= blk_mq_dispatch_hctx_list(&rq_list); 16862306a36Sopenharmony_ci } while (!list_empty(&rq_list)); 16962306a36Sopenharmony_ci } else { 17062306a36Sopenharmony_ci dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count); 17162306a36Sopenharmony_ci } 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci if (busy) 17462306a36Sopenharmony_ci return -EAGAIN; 17562306a36Sopenharmony_ci return !!dispatched; 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_cistatic int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci unsigned long end = jiffies + HZ; 18162306a36Sopenharmony_ci int ret; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci do { 18462306a36Sopenharmony_ci ret = __blk_mq_do_dispatch_sched(hctx); 18562306a36Sopenharmony_ci if (ret != 1) 18662306a36Sopenharmony_ci break; 18762306a36Sopenharmony_ci if (need_resched() || time_is_before_jiffies(end)) { 18862306a36Sopenharmony_ci blk_mq_delay_run_hw_queue(hctx, 0); 18962306a36Sopenharmony_ci break; 19062306a36Sopenharmony_ci } 19162306a36Sopenharmony_ci } while (1); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci return ret; 19462306a36Sopenharmony_ci} 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_cistatic struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, 19762306a36Sopenharmony_ci struct blk_mq_ctx *ctx) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci unsigned short idx = ctx->index_hw[hctx->type]; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci if (++idx == hctx->nr_ctx) 20262306a36Sopenharmony_ci idx = 0; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci return hctx->ctxs[idx]; 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci/* 20862306a36Sopenharmony_ci * Only SCSI implements .get_budget and .put_budget, and SCSI restarts 20962306a36Sopenharmony_ci * its queue by itself in its completion handler, so we don't need to 21062306a36Sopenharmony_ci * restart queue if .get_budget() fails to get the budget. 21162306a36Sopenharmony_ci * 21262306a36Sopenharmony_ci * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to 21362306a36Sopenharmony_ci * be run again. This is necessary to avoid starving flushes. 21462306a36Sopenharmony_ci */ 21562306a36Sopenharmony_cistatic int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) 21662306a36Sopenharmony_ci{ 21762306a36Sopenharmony_ci struct request_queue *q = hctx->queue; 21862306a36Sopenharmony_ci LIST_HEAD(rq_list); 21962306a36Sopenharmony_ci struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from); 22062306a36Sopenharmony_ci int ret = 0; 22162306a36Sopenharmony_ci struct request *rq; 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci do { 22462306a36Sopenharmony_ci int budget_token; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci if (!list_empty_careful(&hctx->dispatch)) { 22762306a36Sopenharmony_ci ret = -EAGAIN; 22862306a36Sopenharmony_ci break; 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci if (!sbitmap_any_bit_set(&hctx->ctx_map)) 23262306a36Sopenharmony_ci break; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci budget_token = blk_mq_get_dispatch_budget(q); 23562306a36Sopenharmony_ci if (budget_token < 0) 23662306a36Sopenharmony_ci break; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci rq = blk_mq_dequeue_from_ctx(hctx, ctx); 23962306a36Sopenharmony_ci if (!rq) { 24062306a36Sopenharmony_ci blk_mq_put_dispatch_budget(q, budget_token); 24162306a36Sopenharmony_ci /* 24262306a36Sopenharmony_ci * We're releasing without dispatching. Holding the 24362306a36Sopenharmony_ci * budget could have blocked any "hctx"s with the 24462306a36Sopenharmony_ci * same queue and if we didn't dispatch then there's 24562306a36Sopenharmony_ci * no guarantee anyone will kick the queue. Kick it 24662306a36Sopenharmony_ci * ourselves. 24762306a36Sopenharmony_ci */ 24862306a36Sopenharmony_ci blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY); 24962306a36Sopenharmony_ci break; 25062306a36Sopenharmony_ci } 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci blk_mq_set_rq_budget_token(rq, budget_token); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci /* 25562306a36Sopenharmony_ci * Now this rq owns the budget which has to be released 25662306a36Sopenharmony_ci * if this rq won't be queued to driver via .queue_rq() 25762306a36Sopenharmony_ci * in blk_mq_dispatch_rq_list(). 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_ci list_add(&rq->queuelist, &rq_list); 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci /* round robin for fair dispatch */ 26262306a36Sopenharmony_ci ctx = blk_mq_next_ctx(hctx, rq->mq_ctx); 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1)); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci WRITE_ONCE(hctx->dispatch_from, ctx); 26762306a36Sopenharmony_ci return ret; 26862306a36Sopenharmony_ci} 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_cistatic int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci bool need_dispatch = false; 27362306a36Sopenharmony_ci LIST_HEAD(rq_list); 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci /* 27662306a36Sopenharmony_ci * If we have previous entries on our dispatch list, grab them first for 27762306a36Sopenharmony_ci * more fair dispatch. 27862306a36Sopenharmony_ci */ 27962306a36Sopenharmony_ci if (!list_empty_careful(&hctx->dispatch)) { 28062306a36Sopenharmony_ci spin_lock(&hctx->lock); 28162306a36Sopenharmony_ci if (!list_empty(&hctx->dispatch)) 28262306a36Sopenharmony_ci list_splice_init(&hctx->dispatch, &rq_list); 28362306a36Sopenharmony_ci spin_unlock(&hctx->lock); 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci /* 28762306a36Sopenharmony_ci * Only ask the scheduler for requests, if we didn't have residual 28862306a36Sopenharmony_ci * requests from the dispatch list. This is to avoid the case where 28962306a36Sopenharmony_ci * we only ever dispatch a fraction of the requests available because 29062306a36Sopenharmony_ci * of low device queue depth. Once we pull requests out of the IO 29162306a36Sopenharmony_ci * scheduler, we can no longer merge or sort them. So it's best to 29262306a36Sopenharmony_ci * leave them there for as long as we can. Mark the hw queue as 29362306a36Sopenharmony_ci * needing a restart in that case. 29462306a36Sopenharmony_ci * 29562306a36Sopenharmony_ci * We want to dispatch from the scheduler if there was nothing 29662306a36Sopenharmony_ci * on the dispatch list or we were able to dispatch from the 29762306a36Sopenharmony_ci * dispatch list. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_ci if (!list_empty(&rq_list)) { 30062306a36Sopenharmony_ci blk_mq_sched_mark_restart_hctx(hctx); 30162306a36Sopenharmony_ci if (!blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) 30262306a36Sopenharmony_ci return 0; 30362306a36Sopenharmony_ci need_dispatch = true; 30462306a36Sopenharmony_ci } else { 30562306a36Sopenharmony_ci need_dispatch = hctx->dispatch_busy; 30662306a36Sopenharmony_ci } 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (hctx->queue->elevator) 30962306a36Sopenharmony_ci return blk_mq_do_dispatch_sched(hctx); 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci /* dequeue request one by one from sw queue if queue is busy */ 31262306a36Sopenharmony_ci if (need_dispatch) 31362306a36Sopenharmony_ci return blk_mq_do_dispatch_ctx(hctx); 31462306a36Sopenharmony_ci blk_mq_flush_busy_ctxs(hctx, &rq_list); 31562306a36Sopenharmony_ci blk_mq_dispatch_rq_list(hctx, &rq_list, 0); 31662306a36Sopenharmony_ci return 0; 31762306a36Sopenharmony_ci} 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_civoid blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci struct request_queue *q = hctx->queue; 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci /* RCU or SRCU read lock is needed before checking quiesced flag */ 32462306a36Sopenharmony_ci if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) 32562306a36Sopenharmony_ci return; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci hctx->run++; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci /* 33062306a36Sopenharmony_ci * A return of -EAGAIN is an indication that hctx->dispatch is not 33162306a36Sopenharmony_ci * empty and we must run again in order to avoid starving flushes. 33262306a36Sopenharmony_ci */ 33362306a36Sopenharmony_ci if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) { 33462306a36Sopenharmony_ci if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) 33562306a36Sopenharmony_ci blk_mq_run_hw_queue(hctx, true); 33662306a36Sopenharmony_ci } 33762306a36Sopenharmony_ci} 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_cibool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, 34062306a36Sopenharmony_ci unsigned int nr_segs) 34162306a36Sopenharmony_ci{ 34262306a36Sopenharmony_ci struct elevator_queue *e = q->elevator; 34362306a36Sopenharmony_ci struct blk_mq_ctx *ctx; 34462306a36Sopenharmony_ci struct blk_mq_hw_ctx *hctx; 34562306a36Sopenharmony_ci bool ret = false; 34662306a36Sopenharmony_ci enum hctx_type type; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci if (e && e->type->ops.bio_merge) { 34962306a36Sopenharmony_ci ret = e->type->ops.bio_merge(q, bio, nr_segs); 35062306a36Sopenharmony_ci goto out_put; 35162306a36Sopenharmony_ci } 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci ctx = blk_mq_get_ctx(q); 35462306a36Sopenharmony_ci hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); 35562306a36Sopenharmony_ci type = hctx->type; 35662306a36Sopenharmony_ci if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || 35762306a36Sopenharmony_ci list_empty_careful(&ctx->rq_lists[type])) 35862306a36Sopenharmony_ci goto out_put; 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci /* default per sw-queue merge */ 36162306a36Sopenharmony_ci spin_lock(&ctx->lock); 36262306a36Sopenharmony_ci /* 36362306a36Sopenharmony_ci * Reverse check our software queue for entries that we could 36462306a36Sopenharmony_ci * potentially merge with. Currently includes a hand-wavy stop 36562306a36Sopenharmony_ci * count of 8, to not spend too much time checking for merges. 36662306a36Sopenharmony_ci */ 36762306a36Sopenharmony_ci if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) 36862306a36Sopenharmony_ci ret = true; 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci spin_unlock(&ctx->lock); 37162306a36Sopenharmony_ciout_put: 37262306a36Sopenharmony_ci return ret; 37362306a36Sopenharmony_ci} 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_cibool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, 37662306a36Sopenharmony_ci struct list_head *free) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free); 37962306a36Sopenharmony_ci} 38062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_cistatic int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q, 38362306a36Sopenharmony_ci struct blk_mq_hw_ctx *hctx, 38462306a36Sopenharmony_ci unsigned int hctx_idx) 38562306a36Sopenharmony_ci{ 38662306a36Sopenharmony_ci if (blk_mq_is_shared_tags(q->tag_set->flags)) { 38762306a36Sopenharmony_ci hctx->sched_tags = q->sched_shared_tags; 38862306a36Sopenharmony_ci return 0; 38962306a36Sopenharmony_ci } 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx, 39262306a36Sopenharmony_ci q->nr_requests); 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci if (!hctx->sched_tags) 39562306a36Sopenharmony_ci return -ENOMEM; 39662306a36Sopenharmony_ci return 0; 39762306a36Sopenharmony_ci} 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_cistatic void blk_mq_exit_sched_shared_tags(struct request_queue *queue) 40062306a36Sopenharmony_ci{ 40162306a36Sopenharmony_ci blk_mq_free_rq_map(queue->sched_shared_tags); 40262306a36Sopenharmony_ci queue->sched_shared_tags = NULL; 40362306a36Sopenharmony_ci} 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci/* called in queue's release handler, tagset has gone away */ 40662306a36Sopenharmony_cistatic void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags) 40762306a36Sopenharmony_ci{ 40862306a36Sopenharmony_ci struct blk_mq_hw_ctx *hctx; 40962306a36Sopenharmony_ci unsigned long i; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci queue_for_each_hw_ctx(q, hctx, i) { 41262306a36Sopenharmony_ci if (hctx->sched_tags) { 41362306a36Sopenharmony_ci if (!blk_mq_is_shared_tags(flags)) 41462306a36Sopenharmony_ci blk_mq_free_rq_map(hctx->sched_tags); 41562306a36Sopenharmony_ci hctx->sched_tags = NULL; 41662306a36Sopenharmony_ci } 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci if (blk_mq_is_shared_tags(flags)) 42062306a36Sopenharmony_ci blk_mq_exit_sched_shared_tags(q); 42162306a36Sopenharmony_ci} 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_cistatic int blk_mq_init_sched_shared_tags(struct request_queue *queue) 42462306a36Sopenharmony_ci{ 42562306a36Sopenharmony_ci struct blk_mq_tag_set *set = queue->tag_set; 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci /* 42862306a36Sopenharmony_ci * Set initial depth at max so that we don't need to reallocate for 42962306a36Sopenharmony_ci * updating nr_requests. 43062306a36Sopenharmony_ci */ 43162306a36Sopenharmony_ci queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set, 43262306a36Sopenharmony_ci BLK_MQ_NO_HCTX_IDX, 43362306a36Sopenharmony_ci MAX_SCHED_RQ); 43462306a36Sopenharmony_ci if (!queue->sched_shared_tags) 43562306a36Sopenharmony_ci return -ENOMEM; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci blk_mq_tag_update_sched_shared_tags(queue); 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci return 0; 44062306a36Sopenharmony_ci} 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci/* caller must have a reference to @e, will grab another one if successful */ 44362306a36Sopenharmony_ciint blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) 44462306a36Sopenharmony_ci{ 44562306a36Sopenharmony_ci unsigned int flags = q->tag_set->flags; 44662306a36Sopenharmony_ci struct blk_mq_hw_ctx *hctx; 44762306a36Sopenharmony_ci struct elevator_queue *eq; 44862306a36Sopenharmony_ci unsigned long i; 44962306a36Sopenharmony_ci int ret; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci /* 45262306a36Sopenharmony_ci * Default to double of smaller one between hw queue_depth and 128, 45362306a36Sopenharmony_ci * since we don't split into sync/async like the old code did. 45462306a36Sopenharmony_ci * Additionally, this is a per-hw queue depth. 45562306a36Sopenharmony_ci */ 45662306a36Sopenharmony_ci q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, 45762306a36Sopenharmony_ci BLKDEV_DEFAULT_RQ); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci if (blk_mq_is_shared_tags(flags)) { 46062306a36Sopenharmony_ci ret = blk_mq_init_sched_shared_tags(q); 46162306a36Sopenharmony_ci if (ret) 46262306a36Sopenharmony_ci return ret; 46362306a36Sopenharmony_ci } 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci queue_for_each_hw_ctx(q, hctx, i) { 46662306a36Sopenharmony_ci ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i); 46762306a36Sopenharmony_ci if (ret) 46862306a36Sopenharmony_ci goto err_free_map_and_rqs; 46962306a36Sopenharmony_ci } 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci ret = e->ops.init_sched(q, e); 47262306a36Sopenharmony_ci if (ret) 47362306a36Sopenharmony_ci goto err_free_map_and_rqs; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci mutex_lock(&q->debugfs_mutex); 47662306a36Sopenharmony_ci blk_mq_debugfs_register_sched(q); 47762306a36Sopenharmony_ci mutex_unlock(&q->debugfs_mutex); 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci queue_for_each_hw_ctx(q, hctx, i) { 48062306a36Sopenharmony_ci if (e->ops.init_hctx) { 48162306a36Sopenharmony_ci ret = e->ops.init_hctx(hctx, i); 48262306a36Sopenharmony_ci if (ret) { 48362306a36Sopenharmony_ci eq = q->elevator; 48462306a36Sopenharmony_ci blk_mq_sched_free_rqs(q); 48562306a36Sopenharmony_ci blk_mq_exit_sched(q, eq); 48662306a36Sopenharmony_ci kobject_put(&eq->kobj); 48762306a36Sopenharmony_ci return ret; 48862306a36Sopenharmony_ci } 48962306a36Sopenharmony_ci } 49062306a36Sopenharmony_ci mutex_lock(&q->debugfs_mutex); 49162306a36Sopenharmony_ci blk_mq_debugfs_register_sched_hctx(q, hctx); 49262306a36Sopenharmony_ci mutex_unlock(&q->debugfs_mutex); 49362306a36Sopenharmony_ci } 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci return 0; 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_cierr_free_map_and_rqs: 49862306a36Sopenharmony_ci blk_mq_sched_free_rqs(q); 49962306a36Sopenharmony_ci blk_mq_sched_tags_teardown(q, flags); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci q->elevator = NULL; 50262306a36Sopenharmony_ci return ret; 50362306a36Sopenharmony_ci} 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci/* 50662306a36Sopenharmony_ci * called in either blk_queue_cleanup or elevator_switch, tagset 50762306a36Sopenharmony_ci * is required for freeing requests 50862306a36Sopenharmony_ci */ 50962306a36Sopenharmony_civoid blk_mq_sched_free_rqs(struct request_queue *q) 51062306a36Sopenharmony_ci{ 51162306a36Sopenharmony_ci struct blk_mq_hw_ctx *hctx; 51262306a36Sopenharmony_ci unsigned long i; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci if (blk_mq_is_shared_tags(q->tag_set->flags)) { 51562306a36Sopenharmony_ci blk_mq_free_rqs(q->tag_set, q->sched_shared_tags, 51662306a36Sopenharmony_ci BLK_MQ_NO_HCTX_IDX); 51762306a36Sopenharmony_ci } else { 51862306a36Sopenharmony_ci queue_for_each_hw_ctx(q, hctx, i) { 51962306a36Sopenharmony_ci if (hctx->sched_tags) 52062306a36Sopenharmony_ci blk_mq_free_rqs(q->tag_set, 52162306a36Sopenharmony_ci hctx->sched_tags, i); 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci } 52462306a36Sopenharmony_ci} 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_civoid blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) 52762306a36Sopenharmony_ci{ 52862306a36Sopenharmony_ci struct blk_mq_hw_ctx *hctx; 52962306a36Sopenharmony_ci unsigned long i; 53062306a36Sopenharmony_ci unsigned int flags = 0; 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci queue_for_each_hw_ctx(q, hctx, i) { 53362306a36Sopenharmony_ci mutex_lock(&q->debugfs_mutex); 53462306a36Sopenharmony_ci blk_mq_debugfs_unregister_sched_hctx(hctx); 53562306a36Sopenharmony_ci mutex_unlock(&q->debugfs_mutex); 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci if (e->type->ops.exit_hctx && hctx->sched_data) { 53862306a36Sopenharmony_ci e->type->ops.exit_hctx(hctx, i); 53962306a36Sopenharmony_ci hctx->sched_data = NULL; 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci flags = hctx->flags; 54262306a36Sopenharmony_ci } 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci mutex_lock(&q->debugfs_mutex); 54562306a36Sopenharmony_ci blk_mq_debugfs_unregister_sched(q); 54662306a36Sopenharmony_ci mutex_unlock(&q->debugfs_mutex); 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci if (e->type->ops.exit_sched) 54962306a36Sopenharmony_ci e->type->ops.exit_sched(e); 55062306a36Sopenharmony_ci blk_mq_sched_tags_teardown(q, flags); 55162306a36Sopenharmony_ci q->elevator = NULL; 55262306a36Sopenharmony_ci} 553