18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Moving/copying garbage collector
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright 2012 Google, Inc.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include "bcache.h"
98c2ecf20Sopenharmony_ci#include "btree.h"
108c2ecf20Sopenharmony_ci#include "debug.h"
118c2ecf20Sopenharmony_ci#include "request.h"
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <trace/events/bcache.h>
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_cistruct moving_io {
168c2ecf20Sopenharmony_ci	struct closure		cl;
178c2ecf20Sopenharmony_ci	struct keybuf_key	*w;
188c2ecf20Sopenharmony_ci	struct data_insert_op	op;
198c2ecf20Sopenharmony_ci	struct bbio		bio;
208c2ecf20Sopenharmony_ci};
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_cistatic bool moving_pred(struct keybuf *buf, struct bkey *k)
238c2ecf20Sopenharmony_ci{
248c2ecf20Sopenharmony_ci	struct cache_set *c = container_of(buf, struct cache_set,
258c2ecf20Sopenharmony_ci					   moving_gc_keys);
268c2ecf20Sopenharmony_ci	unsigned int i;
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	for (i = 0; i < KEY_PTRS(k); i++)
298c2ecf20Sopenharmony_ci		if (ptr_available(c, k, i) &&
308c2ecf20Sopenharmony_ci		    GC_MOVE(PTR_BUCKET(c, k, i)))
318c2ecf20Sopenharmony_ci			return true;
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci	return false;
348c2ecf20Sopenharmony_ci}
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci/* Moving GC - IO loop */
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_cistatic void moving_io_destructor(struct closure *cl)
398c2ecf20Sopenharmony_ci{
408c2ecf20Sopenharmony_ci	struct moving_io *io = container_of(cl, struct moving_io, cl);
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci	kfree(io);
438c2ecf20Sopenharmony_ci}
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_cistatic void write_moving_finish(struct closure *cl)
468c2ecf20Sopenharmony_ci{
478c2ecf20Sopenharmony_ci	struct moving_io *io = container_of(cl, struct moving_io, cl);
488c2ecf20Sopenharmony_ci	struct bio *bio = &io->bio.bio;
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	bio_free_pages(bio);
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	if (io->op.replace_collision)
538c2ecf20Sopenharmony_ci		trace_bcache_gc_copy_collision(&io->w->key);
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	bch_keybuf_del(&io->op.c->moving_gc_keys, io->w);
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	up(&io->op.c->moving_in_flight);
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	closure_return_with_destructor(cl, moving_io_destructor);
608c2ecf20Sopenharmony_ci}
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_cistatic void read_moving_endio(struct bio *bio)
638c2ecf20Sopenharmony_ci{
648c2ecf20Sopenharmony_ci	struct bbio *b = container_of(bio, struct bbio, bio);
658c2ecf20Sopenharmony_ci	struct moving_io *io = container_of(bio->bi_private,
668c2ecf20Sopenharmony_ci					    struct moving_io, cl);
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	if (bio->bi_status)
698c2ecf20Sopenharmony_ci		io->op.status = bio->bi_status;
708c2ecf20Sopenharmony_ci	else if (!KEY_DIRTY(&b->key) &&
718c2ecf20Sopenharmony_ci		 ptr_stale(io->op.c, &b->key, 0)) {
728c2ecf20Sopenharmony_ci		io->op.status = BLK_STS_IOERR;
738c2ecf20Sopenharmony_ci	}
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
768c2ecf20Sopenharmony_ci}
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_cistatic void moving_init(struct moving_io *io)
798c2ecf20Sopenharmony_ci{
808c2ecf20Sopenharmony_ci	struct bio *bio = &io->bio.bio;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	bio_init(bio, bio->bi_inline_vecs,
838c2ecf20Sopenharmony_ci		 DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS));
848c2ecf20Sopenharmony_ci	bio_get(bio);
858c2ecf20Sopenharmony_ci	bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	bio->bi_iter.bi_size	= KEY_SIZE(&io->w->key) << 9;
888c2ecf20Sopenharmony_ci	bio->bi_private		= &io->cl;
898c2ecf20Sopenharmony_ci	bch_bio_map(bio, NULL);
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_cistatic void write_moving(struct closure *cl)
938c2ecf20Sopenharmony_ci{
948c2ecf20Sopenharmony_ci	struct moving_io *io = container_of(cl, struct moving_io, cl);
958c2ecf20Sopenharmony_ci	struct data_insert_op *op = &io->op;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	if (!op->status) {
988c2ecf20Sopenharmony_ci		moving_init(io);
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci		io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
1018c2ecf20Sopenharmony_ci		op->write_prio		= 1;
1028c2ecf20Sopenharmony_ci		op->bio			= &io->bio.bio;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci		op->writeback		= KEY_DIRTY(&io->w->key);
1058c2ecf20Sopenharmony_ci		op->csum		= KEY_CSUM(&io->w->key);
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci		bkey_copy(&op->replace_key, &io->w->key);
1088c2ecf20Sopenharmony_ci		op->replace		= true;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci		closure_call(&op->cl, bch_data_insert, NULL, cl);
1118c2ecf20Sopenharmony_ci	}
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	continue_at(cl, write_moving_finish, op->wq);
1148c2ecf20Sopenharmony_ci}
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_cistatic void read_moving_submit(struct closure *cl)
1178c2ecf20Sopenharmony_ci{
1188c2ecf20Sopenharmony_ci	struct moving_io *io = container_of(cl, struct moving_io, cl);
1198c2ecf20Sopenharmony_ci	struct bio *bio = &io->bio.bio;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	continue_at(cl, write_moving, io->op.wq);
1248c2ecf20Sopenharmony_ci}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_cistatic void read_moving(struct cache_set *c)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	struct keybuf_key *w;
1298c2ecf20Sopenharmony_ci	struct moving_io *io;
1308c2ecf20Sopenharmony_ci	struct bio *bio;
1318c2ecf20Sopenharmony_ci	struct closure cl;
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci	closure_init_stack(&cl);
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	/* XXX: if we error, background writeback could stall indefinitely */
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
1388c2ecf20Sopenharmony_ci		w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
1398c2ecf20Sopenharmony_ci					   &MAX_KEY, moving_pred);
1408c2ecf20Sopenharmony_ci		if (!w)
1418c2ecf20Sopenharmony_ci			break;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci		if (ptr_stale(c, &w->key, 0)) {
1448c2ecf20Sopenharmony_ci			bch_keybuf_del(&c->moving_gc_keys, w);
1458c2ecf20Sopenharmony_ci			continue;
1468c2ecf20Sopenharmony_ci		}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci		io = kzalloc(struct_size(io, bio.bio.bi_inline_vecs,
1498c2ecf20Sopenharmony_ci					 DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)),
1508c2ecf20Sopenharmony_ci			     GFP_KERNEL);
1518c2ecf20Sopenharmony_ci		if (!io)
1528c2ecf20Sopenharmony_ci			goto err;
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci		w->private	= io;
1558c2ecf20Sopenharmony_ci		io->w		= w;
1568c2ecf20Sopenharmony_ci		io->op.inode	= KEY_INODE(&w->key);
1578c2ecf20Sopenharmony_ci		io->op.c	= c;
1588c2ecf20Sopenharmony_ci		io->op.wq	= c->moving_gc_wq;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci		moving_init(io);
1618c2ecf20Sopenharmony_ci		bio = &io->bio.bio;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci		bio_set_op_attrs(bio, REQ_OP_READ, 0);
1648c2ecf20Sopenharmony_ci		bio->bi_end_io	= read_moving_endio;
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci		if (bch_bio_alloc_pages(bio, GFP_KERNEL))
1678c2ecf20Sopenharmony_ci			goto err;
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci		trace_bcache_gc_copy(&w->key);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci		down(&c->moving_in_flight);
1728c2ecf20Sopenharmony_ci		closure_call(&io->cl, read_moving_submit, NULL, &cl);
1738c2ecf20Sopenharmony_ci	}
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	if (0) {
1768c2ecf20Sopenharmony_cierr:		if (!IS_ERR_OR_NULL(w->private))
1778c2ecf20Sopenharmony_ci			kfree(w->private);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci		bch_keybuf_del(&c->moving_gc_keys, w);
1808c2ecf20Sopenharmony_ci	}
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	closure_sync(&cl);
1838c2ecf20Sopenharmony_ci}
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_cistatic bool bucket_cmp(struct bucket *l, struct bucket *r)
1868c2ecf20Sopenharmony_ci{
1878c2ecf20Sopenharmony_ci	return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
1888c2ecf20Sopenharmony_ci}
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_cistatic unsigned int bucket_heap_top(struct cache *ca)
1918c2ecf20Sopenharmony_ci{
1928c2ecf20Sopenharmony_ci	struct bucket *b;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_civoid bch_moving_gc(struct cache_set *c)
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	struct cache *ca = c->cache;
2008c2ecf20Sopenharmony_ci	struct bucket *b;
2018c2ecf20Sopenharmony_ci	unsigned long sectors_to_move, reserve_sectors;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	if (!c->copy_gc_enabled)
2048c2ecf20Sopenharmony_ci		return;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	mutex_lock(&c->bucket_lock);
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	sectors_to_move = 0;
2098c2ecf20Sopenharmony_ci	reserve_sectors = ca->sb.bucket_size *
2108c2ecf20Sopenharmony_ci			     fifo_used(&ca->free[RESERVE_MOVINGGC]);
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	ca->heap.used = 0;
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	for_each_bucket(b, ca) {
2158c2ecf20Sopenharmony_ci		if (GC_MARK(b) == GC_MARK_METADATA ||
2168c2ecf20Sopenharmony_ci		    !GC_SECTORS_USED(b) ||
2178c2ecf20Sopenharmony_ci		    GC_SECTORS_USED(b) == ca->sb.bucket_size ||
2188c2ecf20Sopenharmony_ci		    atomic_read(&b->pin))
2198c2ecf20Sopenharmony_ci			continue;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci		if (!heap_full(&ca->heap)) {
2228c2ecf20Sopenharmony_ci			sectors_to_move += GC_SECTORS_USED(b);
2238c2ecf20Sopenharmony_ci			heap_add(&ca->heap, b, bucket_cmp);
2248c2ecf20Sopenharmony_ci		} else if (bucket_cmp(b, heap_peek(&ca->heap))) {
2258c2ecf20Sopenharmony_ci			sectors_to_move -= bucket_heap_top(ca);
2268c2ecf20Sopenharmony_ci			sectors_to_move += GC_SECTORS_USED(b);
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci			ca->heap.data[0] = b;
2298c2ecf20Sopenharmony_ci			heap_sift(&ca->heap, 0, bucket_cmp);
2308c2ecf20Sopenharmony_ci		}
2318c2ecf20Sopenharmony_ci	}
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	while (sectors_to_move > reserve_sectors) {
2348c2ecf20Sopenharmony_ci		heap_pop(&ca->heap, b, bucket_cmp);
2358c2ecf20Sopenharmony_ci		sectors_to_move -= GC_SECTORS_USED(b);
2368c2ecf20Sopenharmony_ci	}
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	while (heap_pop(&ca->heap, b, bucket_cmp))
2398c2ecf20Sopenharmony_ci		SET_GC_MOVE(b, 1);
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	mutex_unlock(&c->bucket_lock);
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci	c->moving_gc_keys.last_scanned = ZERO_KEY;
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	read_moving(c);
2468c2ecf20Sopenharmony_ci}
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_civoid bch_moving_init_cache_set(struct cache_set *c)
2498c2ecf20Sopenharmony_ci{
2508c2ecf20Sopenharmony_ci	bch_keybuf_init(&c->moving_gc_keys);
2518c2ecf20Sopenharmony_ci	sema_init(&c->moving_in_flight, 64);
2528c2ecf20Sopenharmony_ci}
253