Lines Matching refs:blkg
68 * New IO stats are stored in the percpu iostat_cpu within blkcg_gq (blkg).
69 * There are multiple blkg's (one for each block device) attached to each
71 * but it doesn't know which blkg has the updated stats. If there are many
72 * block devices in a system, the cost of iterating all the blkg's to flush
78 * References to blkg are gotten and then put back in the process to
79 * protect against blkg removal.
121 struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
123 struct request_queue *q = blkg->q;
129 * of the list blkg->q_node is delayed to here from blkg_destroy(), and
135 if (blkg->pd[i])
136 blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
137 if (blkg->parent)
138 blkg_put(blkg->parent);
140 list_del_init(&blkg->q_node);
145 free_percpu(blkg->iostat_cpu);
146 percpu_ref_exit(&blkg->refcnt);
147 kfree(blkg);
151 * blkg_free - free a blkg
152 * @blkg: blkg to free
154 * Free @blkg which may be partially allocated.
156 static void blkg_free(struct blkcg_gq *blkg)
158 if (!blkg)
165 INIT_WORK(&blkg->free_work, blkg_free_workfn);
166 schedule_work(&blkg->free_work);
171 struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
172 struct blkcg *blkcg = blkg->blkcg;
176 WARN_ON(!bio_list_empty(&blkg->async_bios));
182 * blkg_stat_lock is for serializing blkg stat update
187 /* release the blkcg and parent blkg refs this blkg has been holding */
188 css_put(&blkg->blkcg->css);
189 blkg_free(blkg);
194 * can access all the fields of blkg and assume these are valid. For
197 * Having a reference to blkg under an rcu allows accesses to only values
202 struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
204 call_rcu(&blkg->rcu_head, __blkg_release);
212 struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
219 /* as long as there are pending bios, @blkg can't go away */
220 spin_lock(&blkg->async_bio_lock);
221 bio_list_merge(&bios, &blkg->async_bios);
222 bio_list_init(&blkg->async_bios);
223 spin_unlock(&blkg->async_bio_lock);
244 struct blkcg_gq *blkg = bio->bi_blkg;
246 if (blkg->parent) {
247 spin_lock(&blkg->async_bio_lock);
248 bio_list_add(&blkg->async_bios, bio);
249 spin_unlock(&blkg->async_bio_lock);
250 queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
298 * blkg_alloc - allocate a blkg
299 * @blkcg: block cgroup the new blkg is associated with
300 * @disk: gendisk the new blkg is associated with
303 * Allocate a new blkg assocating @blkcg and @q.
308 struct blkcg_gq *blkg;
312 blkg = kzalloc_node(sizeof(*blkg), gfp_mask, disk->queue->node);
313 if (!blkg)
315 if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
317 blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
318 if (!blkg->iostat_cpu)
323 blkg->q = disk->queue;
324 INIT_LIST_HEAD(&blkg->q_node);
325 blkg->blkcg = blkcg;
327 spin_lock_init(&blkg->async_bio_lock);
328 bio_list_init(&blkg->async_bios);
329 INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
332 u64_stats_init(&blkg->iostat.sync);
334 u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
335 per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg;
345 /* alloc per-policy data and attach it to blkg */
349 blkg->pd[i] = pd;
350 pd->blkg = blkg;
355 return blkg;
359 if (blkg->pd[i])
360 blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
363 free_percpu(blkg->iostat_cpu);
365 percpu_ref_exit(&blkg->refcnt);
367 kfree(blkg);
378 struct blkcg_gq *blkg;
383 /* request_queue is dying, do not create/recreate a blkg */
389 /* blkg holds a reference to blkcg */
403 blkg = new_blkg;
407 blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk->queue);
408 if (WARN_ON_ONCE(!blkg->parent)) {
412 blkg_get(blkg->parent);
419 if (blkg->pd[i] && pol->pd_init_fn)
420 pol->pd_init_fn(blkg->pd[i]);
425 ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg);
427 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
428 list_add(&blkg->q_node, &disk->queue->blkg_list);
433 if (blkg->pd[i]) {
435 pol->pd_online_fn(blkg->pd[i]);
436 blkg->pd[i]->online = true;
440 blkg->online = true;
444 return blkg;
446 /* @blkg failed fully initialized, use the usual release path */
447 blkg_put(blkg);
459 * blkg_lookup_create - lookup blkg, try to create one if not there
463 * Lookup blkg for the @blkcg - @disk pair. If it doesn't exist, try to
464 * create one. blkg creation is performed recursively from blkcg_root such
465 * that all non-root blkg's have access to the parent blkg. This function
468 * Returns the blkg or the closest blkg if blkg_create() fails as it walks
475 struct blkcg_gq *blkg;
480 blkg = blkg_lookup(blkcg, q);
481 if (blkg)
482 return blkg;
485 blkg = blkg_lookup(blkcg, q);
486 if (blkg) {
488 blkg != rcu_dereference(blkcg->blkg_hint))
489 rcu_assign_pointer(blkcg->blkg_hint, blkg);
496 * blkg to the intended blkg should blkg_create() fail.
504 blkg = blkg_lookup(parent, q);
505 if (blkg) {
506 /* remember closest blkg */
507 ret_blkg = blkg;
514 blkg = blkg_create(pos, disk, NULL);
515 if (IS_ERR(blkg)) {
516 blkg = ret_blkg;
525 return blkg;
528 static void blkg_destroy(struct blkcg_gq *blkg)
530 struct blkcg *blkcg = blkg->blkcg;
533 lockdep_assert_held(&blkg->q->queue_lock);
537 * blkg stays on the queue list until blkg_free_workfn(), see details in
542 if (hlist_unhashed(&blkg->blkcg_node))
548 if (blkg->pd[i] && blkg->pd[i]->online) {
549 blkg->pd[i]->online = false;
551 pol->pd_offline_fn(blkg->pd[i]);
555 blkg->online = false;
557 radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
558 hlist_del_init_rcu(&blkg->blkcg_node);
561 * Both setting lookup hint to and clearing it from @blkg are done
562 * under queue_lock. If it's not pointing to @blkg now, it never
565 if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
572 percpu_ref_kill(&blkg->refcnt);
578 struct blkcg_gq *blkg, *n;
584 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
585 struct blkcg *blkcg = blkg->blkcg;
587 if (hlist_unhashed(&blkg->blkcg_node))
591 blkg_destroy(blkg);
626 struct blkcg_gq *blkg;
637 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
640 per_cpu_ptr(blkg->iostat_cpu, cpu);
645 bis->blkg = blkg;
647 memset(&blkg->iostat, 0, sizeof(blkg->iostat));
648 u64_stats_init(&blkg->iostat.sync);
653 if (blkg->pd[i] && pol->pd_reset_stats_fn)
654 pol->pd_reset_stats_fn(blkg->pd[i]);
663 const char *blkg_dev_name(struct blkcg_gq *blkg)
665 if (!blkg->q->disk)
667 return bdi_dev_name(blkg->q->disk->bdi);
671 * blkcg_print_blkgs - helper for printing per-blkg data
674 * @prfill: fill function to print out a blkg
679 * This function invokes @prfill on each blkg of @blkcg if pd for the
694 struct blkcg_gq *blkg;
698 hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
699 spin_lock_irq(&blkg->q->queue_lock);
700 if (blkcg_policy_enabled(blkg->q, pol))
701 total += prfill(sf, blkg->pd[pol->plid], data);
702 spin_unlock_irq(&blkg->q->queue_lock);
721 const char *dname = blkg_dev_name(pd->blkg);
736 * Initialize @ctx which can be used to parse blkg config input string @input.
747 * blkg_conf_open_bdev - parse and open bdev for per-blkg config update
750 * Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
798 * blkg_conf_prep - parse and prepare for per-blkg config update
803 * Parse per-blkg config update from @ctx->input and initialize @ctx
806 * @ctx->blkg to the blkg being configured.
818 struct blkcg_gq *blkg;
843 blkg = blkg_lookup(blkcg, q);
844 if (blkg)
862 /* Drop locks to do new blkg allocation with GFP_KERNEL. */
885 blkg = blkg_lookup(pos, q);
886 if (blkg) {
889 blkg = blkg_create(pos, disk, new_blkg);
890 if (IS_ERR(blkg)) {
891 ret = PTR_ERR(blkg);
903 ctx->blkg = blkg;
928 * blkg_conf_exit - clean up per-blkg config update
931 * Clean up after per-blkg config update. This function must be called on all
938 if (ctx->blkg) {
940 ctx->blkg = NULL;
982 static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
989 flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
992 blkg_iostat_add(&blkg->iostat.cur, &delta);
994 u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
1011 * For covering concurrent parent blkg update from blkg_release().
1022 struct blkcg_gq *blkg = bisc->blkg;
1023 struct blkcg_gq *parent = blkg->parent;
1035 blkcg_iostat_update(blkg, &cur, &bisc->last);
1039 blkcg_iostat_update(parent, &blkg->iostat.cur,
1040 &blkg->iostat.last);
1074 struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg;
1099 flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
1100 blkg_iostat_set(&blkg->iostat.cur, &tmp);
1101 u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
1105 static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
1107 struct blkg_iostat_set *bis = &blkg->iostat;
1113 if (!blkg->online)
1116 dname = blkg_dev_name(blkg);
1139 if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
1141 atomic_read(&blkg->use_delay),
1142 atomic64_read(&blkg->delay_nsec));
1148 if (!blkg->pd[i] || !pol->pd_stat_fn)
1151 pol->pd_stat_fn(blkg->pd[i], s);
1160 struct blkcg_gq *blkg;
1168 hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
1169 spin_lock_irq(&blkg->q->queue_lock);
1170 blkcg_print_one_stat(blkg, sf);
1171 spin_unlock_irq(&blkg->q->queue_lock);
1204 * which offlines writeback. Here we tie the next stage of blkg destruction
1212 * the blkg is put back eventually allowing blkcg_css_free() to be called.
1214 * workqueue. Any submitted ios that fail to get the blkg ref will be
1239 struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
1241 struct request_queue *q = blkg->q;
1255 blkg_destroy(blkg);
1267 * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
1279 * This is primarily used to impedance-match blkg and cgwb lifetimes so
1280 * that blkg doesn't go offline while an associated cgwb is still active.
1415 struct blkcg_gq *new_blkg, *blkg;
1428 /* Make sure the root blkg exists. */
1431 blkg = blkg_create(&blkcg_root, disk, new_blkg);
1432 if (IS_ERR(blkg))
1434 q->root_blkg = blkg;
1459 return PTR_ERR(blkg);
1505 * from IO path. Update of each blkg is protected by both queue and blkcg
1516 struct blkcg_gq *blkg, *pinned_blkg = NULL;
1528 list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
1531 if (blkg->pd[pol->plid])
1535 if (blkg == pinned_blkg) {
1539 pd = pol->pd_alloc_fn(disk, blkg->blkcg,
1546 * prealloc for @blkg w/ GFP_KERNEL.
1550 blkg_get(blkg);
1551 pinned_blkg = blkg;
1557 pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg,
1565 spin_lock(&blkg->blkcg->lock);
1567 pd->blkg = blkg;
1569 blkg->pd[pol->plid] = pd;
1578 spin_unlock(&blkg->blkcg->lock);
1597 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1598 struct blkcg *blkcg = blkg->blkcg;
1602 pd = blkg->pd[pol->plid];
1608 blkg->pd[pol->plid] = NULL;
1630 struct blkcg_gq *blkg;
1643 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1644 struct blkcg *blkcg = blkg->blkcg;
1647 if (blkg->pd[pol->plid]) {
1648 if (blkg->pd[pol->plid]->online && pol->pd_offline_fn)
1649 pol->pd_offline_fn(blkg->pd[pol->plid]);
1650 pol->pd_free_fn(blkg->pd[pol->plid]);
1651 blkg->pd[pol->plid] = NULL;
1788 static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
1790 u64 old = atomic64_read(&blkg->delay_start);
1793 if (atomic_read(&blkg->use_delay) < 0)
1802 * blkg->last_delay so we know what amount is still left to be charged
1803 * to the blkg from this point onward. blkg->last_use keeps track of
1804 * the use_delay counter. The idea is if we're unthrottling the blkg we
1810 atomic64_try_cmpxchg(&blkg->delay_start, &old, now)) {
1811 u64 cur = atomic64_read(&blkg->delay_nsec);
1812 u64 sub = min_t(u64, blkg->last_delay, now - old);
1813 int cur_use = atomic_read(&blkg->use_delay);
1819 if (cur_use < blkg->last_use)
1820 sub = max_t(u64, sub, blkg->last_delay >> 1);
1829 atomic64_set(&blkg->delay_nsec, 0);
1830 blkg->last_delay = 0;
1832 atomic64_sub(sub, &blkg->delay_nsec);
1833 blkg->last_delay = cur - sub;
1835 blkg->last_use = cur_use;
1845 static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
1854 while (blkg->parent) {
1855 int use_delay = atomic_read(&blkg->use_delay);
1860 blkcg_scale_delay(blkg, now);
1861 this_delay = atomic64_read(&blkg->delay_nsec);
1867 blkg = blkg->parent;
1915 struct blkcg_gq *blkg;
1928 blkg = blkg_lookup(blkcg, disk->queue);
1929 if (!blkg)
1931 if (!blkg_tryget(blkg))
1935 blkcg_maybe_throttle_blkg(blkg, use_memdelay);
1936 blkg_put(blkg);
1949 * for the blkg for this task. We do not pass the blkg because there are places
1981 * blkcg_add_delay - add delay to this blkg
1982 * @blkg: blkg of interest
1986 * Charge @delta to the blkg's current delay accumulation. This is used to
1989 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
1991 if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
1993 blkcg_scale_delay(blkg, now);
1994 atomic64_add(delta, &blkg->delay_nsec);
1998 * blkg_tryget_closest - try and get a blkg ref on the closet blkg
2002 * As the failure mode here is to walk up the blkg tree, this ensure that the
2003 * blkg->parent pointers are always valid. This returns the blkg that it ended
2009 struct blkcg_gq *blkg, *ret_blkg = NULL;
2012 blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_bdev->bd_disk);
2013 while (blkg) {
2014 if (blkg_tryget(blkg)) {
2015 ret_blkg = blkg;
2018 blkg = blkg->parent;
2030 * Associate @bio with the blkg found by combining the css's blkg and the
2032 * the blkg tree. Therefore, the blkg associated can be anything between @blkg
2034 * then the remaining bios will spill to the closest alive blkg.
2036 * A reference will be taken on the blkg and will be released when @bio is
2055 * bio_associate_blkg - associate a bio with a blkg
2058 * Associate @bio with the blkg found from the bio's css and request_queue.
2059 * If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is
2081 * bio_clone_blkg_association - clone blkg association from src to dst bio