18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (C) 2014 Facebook. All rights reserved.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This file is released under the GPL.
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#include <linux/device-mapper.h>
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <linux/module.h>
108c2ecf20Sopenharmony_ci#include <linux/init.h>
118c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
128c2ecf20Sopenharmony_ci#include <linux/bio.h>
138c2ecf20Sopenharmony_ci#include <linux/dax.h>
148c2ecf20Sopenharmony_ci#include <linux/slab.h>
158c2ecf20Sopenharmony_ci#include <linux/kthread.h>
168c2ecf20Sopenharmony_ci#include <linux/freezer.h>
178c2ecf20Sopenharmony_ci#include <linux/uio.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "log-writes"
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci/*
228c2ecf20Sopenharmony_ci * This target will sequentially log all writes to the target device onto the
238c2ecf20Sopenharmony_ci * log device.  This is helpful for replaying writes to check for fs consistency
248c2ecf20Sopenharmony_ci * at all times.  This target provides a mechanism to mark specific events to
258c2ecf20Sopenharmony_ci * check data at a later time.  So for example you would:
268c2ecf20Sopenharmony_ci *
278c2ecf20Sopenharmony_ci * write data
288c2ecf20Sopenharmony_ci * fsync
298c2ecf20Sopenharmony_ci * dmsetup message /dev/whatever mark mymark
308c2ecf20Sopenharmony_ci * unmount /mnt/test
318c2ecf20Sopenharmony_ci *
328c2ecf20Sopenharmony_ci * Then replay the log up to mymark and check the contents of the replay to
338c2ecf20Sopenharmony_ci * verify it matches what was written.
348c2ecf20Sopenharmony_ci *
358c2ecf20Sopenharmony_ci * We log writes only after they have been flushed, this makes the log describe
368c2ecf20Sopenharmony_ci * close to the order in which the data hits the actual disk, not its cache.  So
378c2ecf20Sopenharmony_ci * for example the following sequence (W means write, C means complete)
388c2ecf20Sopenharmony_ci *
398c2ecf20Sopenharmony_ci * Wa,Wb,Wc,Cc,Ca,FLUSH,FUAd,Cb,CFLUSH,CFUAd
408c2ecf20Sopenharmony_ci *
418c2ecf20Sopenharmony_ci * Would result in the log looking like this:
428c2ecf20Sopenharmony_ci *
438c2ecf20Sopenharmony_ci * c,a,b,flush,fuad,<other writes>,<next flush>
448c2ecf20Sopenharmony_ci *
458c2ecf20Sopenharmony_ci * This is meant to help expose problems where file systems do not properly wait
468c2ecf20Sopenharmony_ci * on data being written before invoking a FLUSH.  FUA bypasses cache so once it
478c2ecf20Sopenharmony_ci * completes it is added to the log as it should be on disk.
488c2ecf20Sopenharmony_ci *
498c2ecf20Sopenharmony_ci * We treat DISCARDs as if they don't bypass cache so that they are logged in
508c2ecf20Sopenharmony_ci * order of completion along with the normal writes.  If we didn't do it this
518c2ecf20Sopenharmony_ci * way we would process all the discards first and then write all the data, when
528c2ecf20Sopenharmony_ci * in fact we want to do the data and the discard in the order that they
538c2ecf20Sopenharmony_ci * completed.
548c2ecf20Sopenharmony_ci */
558c2ecf20Sopenharmony_ci#define LOG_FLUSH_FLAG		(1 << 0)
568c2ecf20Sopenharmony_ci#define LOG_FUA_FLAG		(1 << 1)
578c2ecf20Sopenharmony_ci#define LOG_DISCARD_FLAG	(1 << 2)
588c2ecf20Sopenharmony_ci#define LOG_MARK_FLAG		(1 << 3)
598c2ecf20Sopenharmony_ci#define LOG_METADATA_FLAG	(1 << 4)
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci#define WRITE_LOG_VERSION 1ULL
628c2ecf20Sopenharmony_ci#define WRITE_LOG_MAGIC 0x6a736677736872ULL
638c2ecf20Sopenharmony_ci#define WRITE_LOG_SUPER_SECTOR 0
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci/*
668c2ecf20Sopenharmony_ci * The disk format for this is braindead simple.
678c2ecf20Sopenharmony_ci *
688c2ecf20Sopenharmony_ci * At byte 0 we have our super, followed by the following sequence for
698c2ecf20Sopenharmony_ci * nr_entries:
708c2ecf20Sopenharmony_ci *
718c2ecf20Sopenharmony_ci * [   1 sector    ][  entry->nr_sectors ]
728c2ecf20Sopenharmony_ci * [log_write_entry][    data written    ]
738c2ecf20Sopenharmony_ci *
748c2ecf20Sopenharmony_ci * The log_write_entry takes up a full sector so we can have arbitrary length
758c2ecf20Sopenharmony_ci * marks and it leaves us room for extra content in the future.
768c2ecf20Sopenharmony_ci */
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci/*
798c2ecf20Sopenharmony_ci * Basic info about the log for userspace.
808c2ecf20Sopenharmony_ci */
818c2ecf20Sopenharmony_cistruct log_write_super {
828c2ecf20Sopenharmony_ci	__le64 magic;
838c2ecf20Sopenharmony_ci	__le64 version;
848c2ecf20Sopenharmony_ci	__le64 nr_entries;
858c2ecf20Sopenharmony_ci	__le32 sectorsize;
868c2ecf20Sopenharmony_ci};
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci/*
898c2ecf20Sopenharmony_ci * sector - the sector we wrote.
908c2ecf20Sopenharmony_ci * nr_sectors - the number of sectors we wrote.
918c2ecf20Sopenharmony_ci * flags - flags for this log entry.
928c2ecf20Sopenharmony_ci * data_len - the size of the data in this log entry, this is for private log
938c2ecf20Sopenharmony_ci * entry stuff, the MARK data provided by userspace for example.
948c2ecf20Sopenharmony_ci */
958c2ecf20Sopenharmony_cistruct log_write_entry {
968c2ecf20Sopenharmony_ci	__le64 sector;
978c2ecf20Sopenharmony_ci	__le64 nr_sectors;
988c2ecf20Sopenharmony_ci	__le64 flags;
998c2ecf20Sopenharmony_ci	__le64 data_len;
1008c2ecf20Sopenharmony_ci};
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_cistruct log_writes_c {
1038c2ecf20Sopenharmony_ci	struct dm_dev *dev;
1048c2ecf20Sopenharmony_ci	struct dm_dev *logdev;
1058c2ecf20Sopenharmony_ci	u64 logged_entries;
1068c2ecf20Sopenharmony_ci	u32 sectorsize;
1078c2ecf20Sopenharmony_ci	u32 sectorshift;
1088c2ecf20Sopenharmony_ci	atomic_t io_blocks;
1098c2ecf20Sopenharmony_ci	atomic_t pending_blocks;
1108c2ecf20Sopenharmony_ci	sector_t next_sector;
1118c2ecf20Sopenharmony_ci	sector_t end_sector;
1128c2ecf20Sopenharmony_ci	bool logging_enabled;
1138c2ecf20Sopenharmony_ci	bool device_supports_discard;
1148c2ecf20Sopenharmony_ci	spinlock_t blocks_lock;
1158c2ecf20Sopenharmony_ci	struct list_head unflushed_blocks;
1168c2ecf20Sopenharmony_ci	struct list_head logging_blocks;
1178c2ecf20Sopenharmony_ci	wait_queue_head_t wait;
1188c2ecf20Sopenharmony_ci	struct task_struct *log_kthread;
1198c2ecf20Sopenharmony_ci	struct completion super_done;
1208c2ecf20Sopenharmony_ci};
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_cistruct pending_block {
1238c2ecf20Sopenharmony_ci	int vec_cnt;
1248c2ecf20Sopenharmony_ci	u64 flags;
1258c2ecf20Sopenharmony_ci	sector_t sector;
1268c2ecf20Sopenharmony_ci	sector_t nr_sectors;
1278c2ecf20Sopenharmony_ci	char *data;
1288c2ecf20Sopenharmony_ci	u32 datalen;
1298c2ecf20Sopenharmony_ci	struct list_head list;
1308c2ecf20Sopenharmony_ci	struct bio_vec vecs[];
1318c2ecf20Sopenharmony_ci};
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_cistruct per_bio_data {
1348c2ecf20Sopenharmony_ci	struct pending_block *block;
1358c2ecf20Sopenharmony_ci};
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_cistatic inline sector_t bio_to_dev_sectors(struct log_writes_c *lc,
1388c2ecf20Sopenharmony_ci					  sector_t sectors)
1398c2ecf20Sopenharmony_ci{
1408c2ecf20Sopenharmony_ci	return sectors >> (lc->sectorshift - SECTOR_SHIFT);
1418c2ecf20Sopenharmony_ci}
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_cistatic inline sector_t dev_to_bio_sectors(struct log_writes_c *lc,
1448c2ecf20Sopenharmony_ci					  sector_t sectors)
1458c2ecf20Sopenharmony_ci{
1468c2ecf20Sopenharmony_ci	return sectors << (lc->sectorshift - SECTOR_SHIFT);
1478c2ecf20Sopenharmony_ci}
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_cistatic void put_pending_block(struct log_writes_c *lc)
1508c2ecf20Sopenharmony_ci{
1518c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&lc->pending_blocks)) {
1528c2ecf20Sopenharmony_ci		smp_mb__after_atomic();
1538c2ecf20Sopenharmony_ci		if (waitqueue_active(&lc->wait))
1548c2ecf20Sopenharmony_ci			wake_up(&lc->wait);
1558c2ecf20Sopenharmony_ci	}
1568c2ecf20Sopenharmony_ci}
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_cistatic void put_io_block(struct log_writes_c *lc)
1598c2ecf20Sopenharmony_ci{
1608c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&lc->io_blocks)) {
1618c2ecf20Sopenharmony_ci		smp_mb__after_atomic();
1628c2ecf20Sopenharmony_ci		if (waitqueue_active(&lc->wait))
1638c2ecf20Sopenharmony_ci			wake_up(&lc->wait);
1648c2ecf20Sopenharmony_ci	}
1658c2ecf20Sopenharmony_ci}
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_cistatic void log_end_io(struct bio *bio)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	struct log_writes_c *lc = bio->bi_private;
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	if (bio->bi_status) {
1728c2ecf20Sopenharmony_ci		unsigned long flags;
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci		DMERR("Error writing log block, error=%d", bio->bi_status);
1758c2ecf20Sopenharmony_ci		spin_lock_irqsave(&lc->blocks_lock, flags);
1768c2ecf20Sopenharmony_ci		lc->logging_enabled = false;
1778c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lc->blocks_lock, flags);
1788c2ecf20Sopenharmony_ci	}
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	bio_free_pages(bio);
1818c2ecf20Sopenharmony_ci	put_io_block(lc);
1828c2ecf20Sopenharmony_ci	bio_put(bio);
1838c2ecf20Sopenharmony_ci}
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_cistatic void log_end_super(struct bio *bio)
1868c2ecf20Sopenharmony_ci{
1878c2ecf20Sopenharmony_ci	struct log_writes_c *lc = bio->bi_private;
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	complete(&lc->super_done);
1908c2ecf20Sopenharmony_ci	log_end_io(bio);
1918c2ecf20Sopenharmony_ci}
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci/*
1948c2ecf20Sopenharmony_ci * Meant to be called if there is an error, it will free all the pages
1958c2ecf20Sopenharmony_ci * associated with the block.
1968c2ecf20Sopenharmony_ci */
1978c2ecf20Sopenharmony_cistatic void free_pending_block(struct log_writes_c *lc,
1988c2ecf20Sopenharmony_ci			       struct pending_block *block)
1998c2ecf20Sopenharmony_ci{
2008c2ecf20Sopenharmony_ci	int i;
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	for (i = 0; i < block->vec_cnt; i++) {
2038c2ecf20Sopenharmony_ci		if (block->vecs[i].bv_page)
2048c2ecf20Sopenharmony_ci			__free_page(block->vecs[i].bv_page);
2058c2ecf20Sopenharmony_ci	}
2068c2ecf20Sopenharmony_ci	kfree(block->data);
2078c2ecf20Sopenharmony_ci	kfree(block);
2088c2ecf20Sopenharmony_ci	put_pending_block(lc);
2098c2ecf20Sopenharmony_ci}
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_cistatic int write_metadata(struct log_writes_c *lc, void *entry,
2128c2ecf20Sopenharmony_ci			  size_t entrylen, void *data, size_t datalen,
2138c2ecf20Sopenharmony_ci			  sector_t sector)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	struct bio *bio;
2168c2ecf20Sopenharmony_ci	struct page *page;
2178c2ecf20Sopenharmony_ci	void *ptr;
2188c2ecf20Sopenharmony_ci	size_t ret;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	bio = bio_alloc(GFP_KERNEL, 1);
2218c2ecf20Sopenharmony_ci	if (!bio) {
2228c2ecf20Sopenharmony_ci		DMERR("Couldn't alloc log bio");
2238c2ecf20Sopenharmony_ci		goto error;
2248c2ecf20Sopenharmony_ci	}
2258c2ecf20Sopenharmony_ci	bio->bi_iter.bi_size = 0;
2268c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = sector;
2278c2ecf20Sopenharmony_ci	bio_set_dev(bio, lc->logdev->bdev);
2288c2ecf20Sopenharmony_ci	bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
2298c2ecf20Sopenharmony_ci			  log_end_super : log_end_io;
2308c2ecf20Sopenharmony_ci	bio->bi_private = lc;
2318c2ecf20Sopenharmony_ci	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	page = alloc_page(GFP_KERNEL);
2348c2ecf20Sopenharmony_ci	if (!page) {
2358c2ecf20Sopenharmony_ci		DMERR("Couldn't alloc log page");
2368c2ecf20Sopenharmony_ci		bio_put(bio);
2378c2ecf20Sopenharmony_ci		goto error;
2388c2ecf20Sopenharmony_ci	}
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	ptr = kmap_atomic(page);
2418c2ecf20Sopenharmony_ci	memcpy(ptr, entry, entrylen);
2428c2ecf20Sopenharmony_ci	if (datalen)
2438c2ecf20Sopenharmony_ci		memcpy(ptr + entrylen, data, datalen);
2448c2ecf20Sopenharmony_ci	memset(ptr + entrylen + datalen, 0,
2458c2ecf20Sopenharmony_ci	       lc->sectorsize - entrylen - datalen);
2468c2ecf20Sopenharmony_ci	kunmap_atomic(ptr);
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	ret = bio_add_page(bio, page, lc->sectorsize, 0);
2498c2ecf20Sopenharmony_ci	if (ret != lc->sectorsize) {
2508c2ecf20Sopenharmony_ci		DMERR("Couldn't add page to the log block");
2518c2ecf20Sopenharmony_ci		goto error_bio;
2528c2ecf20Sopenharmony_ci	}
2538c2ecf20Sopenharmony_ci	submit_bio(bio);
2548c2ecf20Sopenharmony_ci	return 0;
2558c2ecf20Sopenharmony_cierror_bio:
2568c2ecf20Sopenharmony_ci	bio_put(bio);
2578c2ecf20Sopenharmony_ci	__free_page(page);
2588c2ecf20Sopenharmony_cierror:
2598c2ecf20Sopenharmony_ci	put_io_block(lc);
2608c2ecf20Sopenharmony_ci	return -1;
2618c2ecf20Sopenharmony_ci}
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_cistatic int write_inline_data(struct log_writes_c *lc, void *entry,
2648c2ecf20Sopenharmony_ci			     size_t entrylen, void *data, size_t datalen,
2658c2ecf20Sopenharmony_ci			     sector_t sector)
2668c2ecf20Sopenharmony_ci{
2678c2ecf20Sopenharmony_ci	int num_pages, bio_pages, pg_datalen, pg_sectorlen, i;
2688c2ecf20Sopenharmony_ci	struct page *page;
2698c2ecf20Sopenharmony_ci	struct bio *bio;
2708c2ecf20Sopenharmony_ci	size_t ret;
2718c2ecf20Sopenharmony_ci	void *ptr;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	while (datalen) {
2748c2ecf20Sopenharmony_ci		num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT;
2758c2ecf20Sopenharmony_ci		bio_pages = min(num_pages, BIO_MAX_PAGES);
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci		atomic_inc(&lc->io_blocks);
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci		bio = bio_alloc(GFP_KERNEL, bio_pages);
2808c2ecf20Sopenharmony_ci		if (!bio) {
2818c2ecf20Sopenharmony_ci			DMERR("Couldn't alloc inline data bio");
2828c2ecf20Sopenharmony_ci			goto error;
2838c2ecf20Sopenharmony_ci		}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci		bio->bi_iter.bi_size = 0;
2868c2ecf20Sopenharmony_ci		bio->bi_iter.bi_sector = sector;
2878c2ecf20Sopenharmony_ci		bio_set_dev(bio, lc->logdev->bdev);
2888c2ecf20Sopenharmony_ci		bio->bi_end_io = log_end_io;
2898c2ecf20Sopenharmony_ci		bio->bi_private = lc;
2908c2ecf20Sopenharmony_ci		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci		for (i = 0; i < bio_pages; i++) {
2938c2ecf20Sopenharmony_ci			pg_datalen = min_t(int, datalen, PAGE_SIZE);
2948c2ecf20Sopenharmony_ci			pg_sectorlen = ALIGN(pg_datalen, lc->sectorsize);
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci			page = alloc_page(GFP_KERNEL);
2978c2ecf20Sopenharmony_ci			if (!page) {
2988c2ecf20Sopenharmony_ci				DMERR("Couldn't alloc inline data page");
2998c2ecf20Sopenharmony_ci				goto error_bio;
3008c2ecf20Sopenharmony_ci			}
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci			ptr = kmap_atomic(page);
3038c2ecf20Sopenharmony_ci			memcpy(ptr, data, pg_datalen);
3048c2ecf20Sopenharmony_ci			if (pg_sectorlen > pg_datalen)
3058c2ecf20Sopenharmony_ci				memset(ptr + pg_datalen, 0, pg_sectorlen - pg_datalen);
3068c2ecf20Sopenharmony_ci			kunmap_atomic(ptr);
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci			ret = bio_add_page(bio, page, pg_sectorlen, 0);
3098c2ecf20Sopenharmony_ci			if (ret != pg_sectorlen) {
3108c2ecf20Sopenharmony_ci				DMERR("Couldn't add page of inline data");
3118c2ecf20Sopenharmony_ci				__free_page(page);
3128c2ecf20Sopenharmony_ci				goto error_bio;
3138c2ecf20Sopenharmony_ci			}
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci			datalen -= pg_datalen;
3168c2ecf20Sopenharmony_ci			data	+= pg_datalen;
3178c2ecf20Sopenharmony_ci		}
3188c2ecf20Sopenharmony_ci		submit_bio(bio);
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci		sector += bio_pages * PAGE_SECTORS;
3218c2ecf20Sopenharmony_ci	}
3228c2ecf20Sopenharmony_ci	return 0;
3238c2ecf20Sopenharmony_cierror_bio:
3248c2ecf20Sopenharmony_ci	bio_free_pages(bio);
3258c2ecf20Sopenharmony_ci	bio_put(bio);
3268c2ecf20Sopenharmony_cierror:
3278c2ecf20Sopenharmony_ci	put_io_block(lc);
3288c2ecf20Sopenharmony_ci	return -1;
3298c2ecf20Sopenharmony_ci}
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_cistatic int log_one_block(struct log_writes_c *lc,
3328c2ecf20Sopenharmony_ci			 struct pending_block *block, sector_t sector)
3338c2ecf20Sopenharmony_ci{
3348c2ecf20Sopenharmony_ci	struct bio *bio;
3358c2ecf20Sopenharmony_ci	struct log_write_entry entry;
3368c2ecf20Sopenharmony_ci	size_t metadatalen, ret;
3378c2ecf20Sopenharmony_ci	int i;
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	entry.sector = cpu_to_le64(block->sector);
3408c2ecf20Sopenharmony_ci	entry.nr_sectors = cpu_to_le64(block->nr_sectors);
3418c2ecf20Sopenharmony_ci	entry.flags = cpu_to_le64(block->flags);
3428c2ecf20Sopenharmony_ci	entry.data_len = cpu_to_le64(block->datalen);
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	metadatalen = (block->flags & LOG_MARK_FLAG) ? block->datalen : 0;
3458c2ecf20Sopenharmony_ci	if (write_metadata(lc, &entry, sizeof(entry), block->data,
3468c2ecf20Sopenharmony_ci			   metadatalen, sector)) {
3478c2ecf20Sopenharmony_ci		free_pending_block(lc, block);
3488c2ecf20Sopenharmony_ci		return -1;
3498c2ecf20Sopenharmony_ci	}
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	sector += dev_to_bio_sectors(lc, 1);
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci	if (block->datalen && metadatalen == 0) {
3548c2ecf20Sopenharmony_ci		if (write_inline_data(lc, &entry, sizeof(entry), block->data,
3558c2ecf20Sopenharmony_ci				      block->datalen, sector)) {
3568c2ecf20Sopenharmony_ci			free_pending_block(lc, block);
3578c2ecf20Sopenharmony_ci			return -1;
3588c2ecf20Sopenharmony_ci		}
3598c2ecf20Sopenharmony_ci		/* we don't support both inline data & bio data */
3608c2ecf20Sopenharmony_ci		goto out;
3618c2ecf20Sopenharmony_ci	}
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	if (!block->vec_cnt)
3648c2ecf20Sopenharmony_ci		goto out;
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	atomic_inc(&lc->io_blocks);
3678c2ecf20Sopenharmony_ci	bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
3688c2ecf20Sopenharmony_ci	if (!bio) {
3698c2ecf20Sopenharmony_ci		DMERR("Couldn't alloc log bio");
3708c2ecf20Sopenharmony_ci		goto error;
3718c2ecf20Sopenharmony_ci	}
3728c2ecf20Sopenharmony_ci	bio->bi_iter.bi_size = 0;
3738c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = sector;
3748c2ecf20Sopenharmony_ci	bio_set_dev(bio, lc->logdev->bdev);
3758c2ecf20Sopenharmony_ci	bio->bi_end_io = log_end_io;
3768c2ecf20Sopenharmony_ci	bio->bi_private = lc;
3778c2ecf20Sopenharmony_ci	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci	for (i = 0; i < block->vec_cnt; i++) {
3808c2ecf20Sopenharmony_ci		/*
3818c2ecf20Sopenharmony_ci		 * The page offset is always 0 because we allocate a new page
3828c2ecf20Sopenharmony_ci		 * for every bvec in the original bio for simplicity sake.
3838c2ecf20Sopenharmony_ci		 */
3848c2ecf20Sopenharmony_ci		ret = bio_add_page(bio, block->vecs[i].bv_page,
3858c2ecf20Sopenharmony_ci				   block->vecs[i].bv_len, 0);
3868c2ecf20Sopenharmony_ci		if (ret != block->vecs[i].bv_len) {
3878c2ecf20Sopenharmony_ci			atomic_inc(&lc->io_blocks);
3888c2ecf20Sopenharmony_ci			submit_bio(bio);
3898c2ecf20Sopenharmony_ci			bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
3908c2ecf20Sopenharmony_ci			if (!bio) {
3918c2ecf20Sopenharmony_ci				DMERR("Couldn't alloc log bio");
3928c2ecf20Sopenharmony_ci				goto error;
3938c2ecf20Sopenharmony_ci			}
3948c2ecf20Sopenharmony_ci			bio->bi_iter.bi_size = 0;
3958c2ecf20Sopenharmony_ci			bio->bi_iter.bi_sector = sector;
3968c2ecf20Sopenharmony_ci			bio_set_dev(bio, lc->logdev->bdev);
3978c2ecf20Sopenharmony_ci			bio->bi_end_io = log_end_io;
3988c2ecf20Sopenharmony_ci			bio->bi_private = lc;
3998c2ecf20Sopenharmony_ci			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci			ret = bio_add_page(bio, block->vecs[i].bv_page,
4028c2ecf20Sopenharmony_ci					   block->vecs[i].bv_len, 0);
4038c2ecf20Sopenharmony_ci			if (ret != block->vecs[i].bv_len) {
4048c2ecf20Sopenharmony_ci				DMERR("Couldn't add page on new bio?");
4058c2ecf20Sopenharmony_ci				bio_put(bio);
4068c2ecf20Sopenharmony_ci				goto error;
4078c2ecf20Sopenharmony_ci			}
4088c2ecf20Sopenharmony_ci		}
4098c2ecf20Sopenharmony_ci		sector += block->vecs[i].bv_len >> SECTOR_SHIFT;
4108c2ecf20Sopenharmony_ci	}
4118c2ecf20Sopenharmony_ci	submit_bio(bio);
4128c2ecf20Sopenharmony_ciout:
4138c2ecf20Sopenharmony_ci	kfree(block->data);
4148c2ecf20Sopenharmony_ci	kfree(block);
4158c2ecf20Sopenharmony_ci	put_pending_block(lc);
4168c2ecf20Sopenharmony_ci	return 0;
4178c2ecf20Sopenharmony_cierror:
4188c2ecf20Sopenharmony_ci	free_pending_block(lc, block);
4198c2ecf20Sopenharmony_ci	put_io_block(lc);
4208c2ecf20Sopenharmony_ci	return -1;
4218c2ecf20Sopenharmony_ci}
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_cistatic int log_super(struct log_writes_c *lc)
4248c2ecf20Sopenharmony_ci{
4258c2ecf20Sopenharmony_ci	struct log_write_super super;
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci	super.magic = cpu_to_le64(WRITE_LOG_MAGIC);
4288c2ecf20Sopenharmony_ci	super.version = cpu_to_le64(WRITE_LOG_VERSION);
4298c2ecf20Sopenharmony_ci	super.nr_entries = cpu_to_le64(lc->logged_entries);
4308c2ecf20Sopenharmony_ci	super.sectorsize = cpu_to_le32(lc->sectorsize);
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	if (write_metadata(lc, &super, sizeof(super), NULL, 0,
4338c2ecf20Sopenharmony_ci			   WRITE_LOG_SUPER_SECTOR)) {
4348c2ecf20Sopenharmony_ci		DMERR("Couldn't write super");
4358c2ecf20Sopenharmony_ci		return -1;
4368c2ecf20Sopenharmony_ci	}
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	/*
4398c2ecf20Sopenharmony_ci	 * Super sector should be writen in-order, otherwise the
4408c2ecf20Sopenharmony_ci	 * nr_entries could be rewritten incorrectly by an old bio.
4418c2ecf20Sopenharmony_ci	 */
4428c2ecf20Sopenharmony_ci	wait_for_completion_io(&lc->super_done);
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	return 0;
4458c2ecf20Sopenharmony_ci}
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_cistatic inline sector_t logdev_last_sector(struct log_writes_c *lc)
4488c2ecf20Sopenharmony_ci{
4498c2ecf20Sopenharmony_ci	return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT;
4508c2ecf20Sopenharmony_ci}
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_cistatic int log_writes_kthread(void *arg)
4538c2ecf20Sopenharmony_ci{
4548c2ecf20Sopenharmony_ci	struct log_writes_c *lc = (struct log_writes_c *)arg;
4558c2ecf20Sopenharmony_ci	sector_t sector = 0;
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci	while (!kthread_should_stop()) {
4588c2ecf20Sopenharmony_ci		bool super = false;
4598c2ecf20Sopenharmony_ci		bool logging_enabled;
4608c2ecf20Sopenharmony_ci		struct pending_block *block = NULL;
4618c2ecf20Sopenharmony_ci		int ret;
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci		spin_lock_irq(&lc->blocks_lock);
4648c2ecf20Sopenharmony_ci		if (!list_empty(&lc->logging_blocks)) {
4658c2ecf20Sopenharmony_ci			block = list_first_entry(&lc->logging_blocks,
4668c2ecf20Sopenharmony_ci						 struct pending_block, list);
4678c2ecf20Sopenharmony_ci			list_del_init(&block->list);
4688c2ecf20Sopenharmony_ci			if (!lc->logging_enabled)
4698c2ecf20Sopenharmony_ci				goto next;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci			sector = lc->next_sector;
4728c2ecf20Sopenharmony_ci			if (!(block->flags & LOG_DISCARD_FLAG))
4738c2ecf20Sopenharmony_ci				lc->next_sector += dev_to_bio_sectors(lc, block->nr_sectors);
4748c2ecf20Sopenharmony_ci			lc->next_sector += dev_to_bio_sectors(lc, 1);
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci			/*
4778c2ecf20Sopenharmony_ci			 * Apparently the size of the device may not be known
4788c2ecf20Sopenharmony_ci			 * right away, so handle this properly.
4798c2ecf20Sopenharmony_ci			 */
4808c2ecf20Sopenharmony_ci			if (!lc->end_sector)
4818c2ecf20Sopenharmony_ci				lc->end_sector = logdev_last_sector(lc);
4828c2ecf20Sopenharmony_ci			if (lc->end_sector &&
4838c2ecf20Sopenharmony_ci			    lc->next_sector >= lc->end_sector) {
4848c2ecf20Sopenharmony_ci				DMERR("Ran out of space on the logdev");
4858c2ecf20Sopenharmony_ci				lc->logging_enabled = false;
4868c2ecf20Sopenharmony_ci				goto next;
4878c2ecf20Sopenharmony_ci			}
4888c2ecf20Sopenharmony_ci			lc->logged_entries++;
4898c2ecf20Sopenharmony_ci			atomic_inc(&lc->io_blocks);
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci			super = (block->flags & (LOG_FUA_FLAG | LOG_MARK_FLAG));
4928c2ecf20Sopenharmony_ci			if (super)
4938c2ecf20Sopenharmony_ci				atomic_inc(&lc->io_blocks);
4948c2ecf20Sopenharmony_ci		}
4958c2ecf20Sopenharmony_cinext:
4968c2ecf20Sopenharmony_ci		logging_enabled = lc->logging_enabled;
4978c2ecf20Sopenharmony_ci		spin_unlock_irq(&lc->blocks_lock);
4988c2ecf20Sopenharmony_ci		if (block) {
4998c2ecf20Sopenharmony_ci			if (logging_enabled) {
5008c2ecf20Sopenharmony_ci				ret = log_one_block(lc, block, sector);
5018c2ecf20Sopenharmony_ci				if (!ret && super)
5028c2ecf20Sopenharmony_ci					ret = log_super(lc);
5038c2ecf20Sopenharmony_ci				if (ret) {
5048c2ecf20Sopenharmony_ci					spin_lock_irq(&lc->blocks_lock);
5058c2ecf20Sopenharmony_ci					lc->logging_enabled = false;
5068c2ecf20Sopenharmony_ci					spin_unlock_irq(&lc->blocks_lock);
5078c2ecf20Sopenharmony_ci				}
5088c2ecf20Sopenharmony_ci			} else
5098c2ecf20Sopenharmony_ci				free_pending_block(lc, block);
5108c2ecf20Sopenharmony_ci			continue;
5118c2ecf20Sopenharmony_ci		}
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci		if (!try_to_freeze()) {
5148c2ecf20Sopenharmony_ci			set_current_state(TASK_INTERRUPTIBLE);
5158c2ecf20Sopenharmony_ci			if (!kthread_should_stop() &&
5168c2ecf20Sopenharmony_ci			    list_empty(&lc->logging_blocks))
5178c2ecf20Sopenharmony_ci				schedule();
5188c2ecf20Sopenharmony_ci			__set_current_state(TASK_RUNNING);
5198c2ecf20Sopenharmony_ci		}
5208c2ecf20Sopenharmony_ci	}
5218c2ecf20Sopenharmony_ci	return 0;
5228c2ecf20Sopenharmony_ci}
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci/*
5258c2ecf20Sopenharmony_ci * Construct a log-writes mapping:
5268c2ecf20Sopenharmony_ci * log-writes <dev_path> <log_dev_path>
5278c2ecf20Sopenharmony_ci */
5288c2ecf20Sopenharmony_cistatic int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
5298c2ecf20Sopenharmony_ci{
5308c2ecf20Sopenharmony_ci	struct log_writes_c *lc;
5318c2ecf20Sopenharmony_ci	struct dm_arg_set as;
5328c2ecf20Sopenharmony_ci	const char *devname, *logdevname;
5338c2ecf20Sopenharmony_ci	int ret;
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci	as.argc = argc;
5368c2ecf20Sopenharmony_ci	as.argv = argv;
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	if (argc < 2) {
5398c2ecf20Sopenharmony_ci		ti->error = "Invalid argument count";
5408c2ecf20Sopenharmony_ci		return -EINVAL;
5418c2ecf20Sopenharmony_ci	}
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci	lc = kzalloc(sizeof(struct log_writes_c), GFP_KERNEL);
5448c2ecf20Sopenharmony_ci	if (!lc) {
5458c2ecf20Sopenharmony_ci		ti->error = "Cannot allocate context";
5468c2ecf20Sopenharmony_ci		return -ENOMEM;
5478c2ecf20Sopenharmony_ci	}
5488c2ecf20Sopenharmony_ci	spin_lock_init(&lc->blocks_lock);
5498c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lc->unflushed_blocks);
5508c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lc->logging_blocks);
5518c2ecf20Sopenharmony_ci	init_waitqueue_head(&lc->wait);
5528c2ecf20Sopenharmony_ci	init_completion(&lc->super_done);
5538c2ecf20Sopenharmony_ci	atomic_set(&lc->io_blocks, 0);
5548c2ecf20Sopenharmony_ci	atomic_set(&lc->pending_blocks, 0);
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_ci	devname = dm_shift_arg(&as);
5578c2ecf20Sopenharmony_ci	ret = dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev);
5588c2ecf20Sopenharmony_ci	if (ret) {
5598c2ecf20Sopenharmony_ci		ti->error = "Device lookup failed";
5608c2ecf20Sopenharmony_ci		goto bad;
5618c2ecf20Sopenharmony_ci	}
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci	logdevname = dm_shift_arg(&as);
5648c2ecf20Sopenharmony_ci	ret = dm_get_device(ti, logdevname, dm_table_get_mode(ti->table),
5658c2ecf20Sopenharmony_ci			    &lc->logdev);
5668c2ecf20Sopenharmony_ci	if (ret) {
5678c2ecf20Sopenharmony_ci		ti->error = "Log device lookup failed";
5688c2ecf20Sopenharmony_ci		dm_put_device(ti, lc->dev);
5698c2ecf20Sopenharmony_ci		goto bad;
5708c2ecf20Sopenharmony_ci	}
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	lc->sectorsize = bdev_logical_block_size(lc->dev->bdev);
5738c2ecf20Sopenharmony_ci	lc->sectorshift = ilog2(lc->sectorsize);
5748c2ecf20Sopenharmony_ci	lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write");
5758c2ecf20Sopenharmony_ci	if (IS_ERR(lc->log_kthread)) {
5768c2ecf20Sopenharmony_ci		ret = PTR_ERR(lc->log_kthread);
5778c2ecf20Sopenharmony_ci		ti->error = "Couldn't alloc kthread";
5788c2ecf20Sopenharmony_ci		dm_put_device(ti, lc->dev);
5798c2ecf20Sopenharmony_ci		dm_put_device(ti, lc->logdev);
5808c2ecf20Sopenharmony_ci		goto bad;
5818c2ecf20Sopenharmony_ci	}
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci	/*
5848c2ecf20Sopenharmony_ci	 * next_sector is in 512b sectors to correspond to what bi_sector expects.
5858c2ecf20Sopenharmony_ci	 * The super starts at sector 0, and the next_sector is the next logical
5868c2ecf20Sopenharmony_ci	 * one based on the sectorsize of the device.
5878c2ecf20Sopenharmony_ci	 */
5888c2ecf20Sopenharmony_ci	lc->next_sector = lc->sectorsize >> SECTOR_SHIFT;
5898c2ecf20Sopenharmony_ci	lc->logging_enabled = true;
5908c2ecf20Sopenharmony_ci	lc->end_sector = logdev_last_sector(lc);
5918c2ecf20Sopenharmony_ci	lc->device_supports_discard = true;
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ci	ti->num_flush_bios = 1;
5948c2ecf20Sopenharmony_ci	ti->flush_supported = true;
5958c2ecf20Sopenharmony_ci	ti->num_discard_bios = 1;
5968c2ecf20Sopenharmony_ci	ti->discards_supported = true;
5978c2ecf20Sopenharmony_ci	ti->per_io_data_size = sizeof(struct per_bio_data);
5988c2ecf20Sopenharmony_ci	ti->private = lc;
5998c2ecf20Sopenharmony_ci	return 0;
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_cibad:
6028c2ecf20Sopenharmony_ci	kfree(lc);
6038c2ecf20Sopenharmony_ci	return ret;
6048c2ecf20Sopenharmony_ci}
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_cistatic int log_mark(struct log_writes_c *lc, char *data)
6078c2ecf20Sopenharmony_ci{
6088c2ecf20Sopenharmony_ci	struct pending_block *block;
6098c2ecf20Sopenharmony_ci	size_t maxsize = lc->sectorsize - sizeof(struct log_write_entry);
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
6128c2ecf20Sopenharmony_ci	if (!block) {
6138c2ecf20Sopenharmony_ci		DMERR("Error allocating pending block");
6148c2ecf20Sopenharmony_ci		return -ENOMEM;
6158c2ecf20Sopenharmony_ci	}
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci	block->data = kstrndup(data, maxsize - 1, GFP_KERNEL);
6188c2ecf20Sopenharmony_ci	if (!block->data) {
6198c2ecf20Sopenharmony_ci		DMERR("Error copying mark data");
6208c2ecf20Sopenharmony_ci		kfree(block);
6218c2ecf20Sopenharmony_ci		return -ENOMEM;
6228c2ecf20Sopenharmony_ci	}
6238c2ecf20Sopenharmony_ci	atomic_inc(&lc->pending_blocks);
6248c2ecf20Sopenharmony_ci	block->datalen = strlen(block->data);
6258c2ecf20Sopenharmony_ci	block->flags |= LOG_MARK_FLAG;
6268c2ecf20Sopenharmony_ci	spin_lock_irq(&lc->blocks_lock);
6278c2ecf20Sopenharmony_ci	list_add_tail(&block->list, &lc->logging_blocks);
6288c2ecf20Sopenharmony_ci	spin_unlock_irq(&lc->blocks_lock);
6298c2ecf20Sopenharmony_ci	wake_up_process(lc->log_kthread);
6308c2ecf20Sopenharmony_ci	return 0;
6318c2ecf20Sopenharmony_ci}
6328c2ecf20Sopenharmony_ci
6338c2ecf20Sopenharmony_cistatic void log_writes_dtr(struct dm_target *ti)
6348c2ecf20Sopenharmony_ci{
6358c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
6368c2ecf20Sopenharmony_ci
6378c2ecf20Sopenharmony_ci	spin_lock_irq(&lc->blocks_lock);
6388c2ecf20Sopenharmony_ci	list_splice_init(&lc->unflushed_blocks, &lc->logging_blocks);
6398c2ecf20Sopenharmony_ci	spin_unlock_irq(&lc->blocks_lock);
6408c2ecf20Sopenharmony_ci
6418c2ecf20Sopenharmony_ci	/*
6428c2ecf20Sopenharmony_ci	 * This is just nice to have since it'll update the super to include the
6438c2ecf20Sopenharmony_ci	 * unflushed blocks, if it fails we don't really care.
6448c2ecf20Sopenharmony_ci	 */
6458c2ecf20Sopenharmony_ci	log_mark(lc, "dm-log-writes-end");
6468c2ecf20Sopenharmony_ci	wake_up_process(lc->log_kthread);
6478c2ecf20Sopenharmony_ci	wait_event(lc->wait, !atomic_read(&lc->io_blocks) &&
6488c2ecf20Sopenharmony_ci		   !atomic_read(&lc->pending_blocks));
6498c2ecf20Sopenharmony_ci	kthread_stop(lc->log_kthread);
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci	WARN_ON(!list_empty(&lc->logging_blocks));
6528c2ecf20Sopenharmony_ci	WARN_ON(!list_empty(&lc->unflushed_blocks));
6538c2ecf20Sopenharmony_ci	dm_put_device(ti, lc->dev);
6548c2ecf20Sopenharmony_ci	dm_put_device(ti, lc->logdev);
6558c2ecf20Sopenharmony_ci	kfree(lc);
6568c2ecf20Sopenharmony_ci}
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_cistatic void normal_map_bio(struct dm_target *ti, struct bio *bio)
6598c2ecf20Sopenharmony_ci{
6608c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	bio_set_dev(bio, lc->dev->bdev);
6638c2ecf20Sopenharmony_ci}
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_cistatic int log_writes_map(struct dm_target *ti, struct bio *bio)
6668c2ecf20Sopenharmony_ci{
6678c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
6688c2ecf20Sopenharmony_ci	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
6698c2ecf20Sopenharmony_ci	struct pending_block *block;
6708c2ecf20Sopenharmony_ci	struct bvec_iter iter;
6718c2ecf20Sopenharmony_ci	struct bio_vec bv;
6728c2ecf20Sopenharmony_ci	size_t alloc_size;
6738c2ecf20Sopenharmony_ci	int i = 0;
6748c2ecf20Sopenharmony_ci	bool flush_bio = (bio->bi_opf & REQ_PREFLUSH);
6758c2ecf20Sopenharmony_ci	bool fua_bio = (bio->bi_opf & REQ_FUA);
6768c2ecf20Sopenharmony_ci	bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD);
6778c2ecf20Sopenharmony_ci	bool meta_bio = (bio->bi_opf & REQ_META);
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci	pb->block = NULL;
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_ci	/* Don't bother doing anything if logging has been disabled */
6828c2ecf20Sopenharmony_ci	if (!lc->logging_enabled)
6838c2ecf20Sopenharmony_ci		goto map_bio;
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_ci	/*
6868c2ecf20Sopenharmony_ci	 * Map reads as normal.
6878c2ecf20Sopenharmony_ci	 */
6888c2ecf20Sopenharmony_ci	if (bio_data_dir(bio) == READ)
6898c2ecf20Sopenharmony_ci		goto map_bio;
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_ci	/* No sectors and not a flush?  Don't care */
6928c2ecf20Sopenharmony_ci	if (!bio_sectors(bio) && !flush_bio)
6938c2ecf20Sopenharmony_ci		goto map_bio;
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci	/*
6968c2ecf20Sopenharmony_ci	 * Discards will have bi_size set but there's no actual data, so just
6978c2ecf20Sopenharmony_ci	 * allocate the size of the pending block.
6988c2ecf20Sopenharmony_ci	 */
6998c2ecf20Sopenharmony_ci	if (discard_bio)
7008c2ecf20Sopenharmony_ci		alloc_size = sizeof(struct pending_block);
7018c2ecf20Sopenharmony_ci	else
7028c2ecf20Sopenharmony_ci		alloc_size = struct_size(block, vecs, bio_segments(bio));
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_ci	block = kzalloc(alloc_size, GFP_NOIO);
7058c2ecf20Sopenharmony_ci	if (!block) {
7068c2ecf20Sopenharmony_ci		DMERR("Error allocating pending block");
7078c2ecf20Sopenharmony_ci		spin_lock_irq(&lc->blocks_lock);
7088c2ecf20Sopenharmony_ci		lc->logging_enabled = false;
7098c2ecf20Sopenharmony_ci		spin_unlock_irq(&lc->blocks_lock);
7108c2ecf20Sopenharmony_ci		return DM_MAPIO_KILL;
7118c2ecf20Sopenharmony_ci	}
7128c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&block->list);
7138c2ecf20Sopenharmony_ci	pb->block = block;
7148c2ecf20Sopenharmony_ci	atomic_inc(&lc->pending_blocks);
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci	if (flush_bio)
7178c2ecf20Sopenharmony_ci		block->flags |= LOG_FLUSH_FLAG;
7188c2ecf20Sopenharmony_ci	if (fua_bio)
7198c2ecf20Sopenharmony_ci		block->flags |= LOG_FUA_FLAG;
7208c2ecf20Sopenharmony_ci	if (discard_bio)
7218c2ecf20Sopenharmony_ci		block->flags |= LOG_DISCARD_FLAG;
7228c2ecf20Sopenharmony_ci	if (meta_bio)
7238c2ecf20Sopenharmony_ci		block->flags |= LOG_METADATA_FLAG;
7248c2ecf20Sopenharmony_ci
7258c2ecf20Sopenharmony_ci	block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector);
7268c2ecf20Sopenharmony_ci	block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio));
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci	/* We don't need the data, just submit */
7298c2ecf20Sopenharmony_ci	if (discard_bio) {
7308c2ecf20Sopenharmony_ci		WARN_ON(flush_bio || fua_bio);
7318c2ecf20Sopenharmony_ci		if (lc->device_supports_discard)
7328c2ecf20Sopenharmony_ci			goto map_bio;
7338c2ecf20Sopenharmony_ci		bio_endio(bio);
7348c2ecf20Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
7358c2ecf20Sopenharmony_ci	}
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_ci	/* Flush bio, splice the unflushed blocks onto this list and submit */
7388c2ecf20Sopenharmony_ci	if (flush_bio && !bio_sectors(bio)) {
7398c2ecf20Sopenharmony_ci		spin_lock_irq(&lc->blocks_lock);
7408c2ecf20Sopenharmony_ci		list_splice_init(&lc->unflushed_blocks, &block->list);
7418c2ecf20Sopenharmony_ci		spin_unlock_irq(&lc->blocks_lock);
7428c2ecf20Sopenharmony_ci		goto map_bio;
7438c2ecf20Sopenharmony_ci	}
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	/*
7468c2ecf20Sopenharmony_ci	 * We will write this bio somewhere else way later so we need to copy
7478c2ecf20Sopenharmony_ci	 * the actual contents into new pages so we know the data will always be
7488c2ecf20Sopenharmony_ci	 * there.
7498c2ecf20Sopenharmony_ci	 *
7508c2ecf20Sopenharmony_ci	 * We do this because this could be a bio from O_DIRECT in which case we
7518c2ecf20Sopenharmony_ci	 * can't just hold onto the page until some later point, we have to
7528c2ecf20Sopenharmony_ci	 * manually copy the contents.
7538c2ecf20Sopenharmony_ci	 */
7548c2ecf20Sopenharmony_ci	bio_for_each_segment(bv, bio, iter) {
7558c2ecf20Sopenharmony_ci		struct page *page;
7568c2ecf20Sopenharmony_ci		void *src, *dst;
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_ci		page = alloc_page(GFP_NOIO);
7598c2ecf20Sopenharmony_ci		if (!page) {
7608c2ecf20Sopenharmony_ci			DMERR("Error allocing page");
7618c2ecf20Sopenharmony_ci			free_pending_block(lc, block);
7628c2ecf20Sopenharmony_ci			spin_lock_irq(&lc->blocks_lock);
7638c2ecf20Sopenharmony_ci			lc->logging_enabled = false;
7648c2ecf20Sopenharmony_ci			spin_unlock_irq(&lc->blocks_lock);
7658c2ecf20Sopenharmony_ci			return DM_MAPIO_KILL;
7668c2ecf20Sopenharmony_ci		}
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_ci		src = kmap_atomic(bv.bv_page);
7698c2ecf20Sopenharmony_ci		dst = kmap_atomic(page);
7708c2ecf20Sopenharmony_ci		memcpy(dst, src + bv.bv_offset, bv.bv_len);
7718c2ecf20Sopenharmony_ci		kunmap_atomic(dst);
7728c2ecf20Sopenharmony_ci		kunmap_atomic(src);
7738c2ecf20Sopenharmony_ci		block->vecs[i].bv_page = page;
7748c2ecf20Sopenharmony_ci		block->vecs[i].bv_len = bv.bv_len;
7758c2ecf20Sopenharmony_ci		block->vec_cnt++;
7768c2ecf20Sopenharmony_ci		i++;
7778c2ecf20Sopenharmony_ci	}
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_ci	/* Had a flush with data in it, weird */
7808c2ecf20Sopenharmony_ci	if (flush_bio) {
7818c2ecf20Sopenharmony_ci		spin_lock_irq(&lc->blocks_lock);
7828c2ecf20Sopenharmony_ci		list_splice_init(&lc->unflushed_blocks, &block->list);
7838c2ecf20Sopenharmony_ci		spin_unlock_irq(&lc->blocks_lock);
7848c2ecf20Sopenharmony_ci	}
7858c2ecf20Sopenharmony_cimap_bio:
7868c2ecf20Sopenharmony_ci	normal_map_bio(ti, bio);
7878c2ecf20Sopenharmony_ci	return DM_MAPIO_REMAPPED;
7888c2ecf20Sopenharmony_ci}
7898c2ecf20Sopenharmony_ci
7908c2ecf20Sopenharmony_cistatic int normal_end_io(struct dm_target *ti, struct bio *bio,
7918c2ecf20Sopenharmony_ci		blk_status_t *error)
7928c2ecf20Sopenharmony_ci{
7938c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
7948c2ecf20Sopenharmony_ci	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci	if (bio_data_dir(bio) == WRITE && pb->block) {
7978c2ecf20Sopenharmony_ci		struct pending_block *block = pb->block;
7988c2ecf20Sopenharmony_ci		unsigned long flags;
7998c2ecf20Sopenharmony_ci
8008c2ecf20Sopenharmony_ci		spin_lock_irqsave(&lc->blocks_lock, flags);
8018c2ecf20Sopenharmony_ci		if (block->flags & LOG_FLUSH_FLAG) {
8028c2ecf20Sopenharmony_ci			list_splice_tail_init(&block->list, &lc->logging_blocks);
8038c2ecf20Sopenharmony_ci			list_add_tail(&block->list, &lc->logging_blocks);
8048c2ecf20Sopenharmony_ci			wake_up_process(lc->log_kthread);
8058c2ecf20Sopenharmony_ci		} else if (block->flags & LOG_FUA_FLAG) {
8068c2ecf20Sopenharmony_ci			list_add_tail(&block->list, &lc->logging_blocks);
8078c2ecf20Sopenharmony_ci			wake_up_process(lc->log_kthread);
8088c2ecf20Sopenharmony_ci		} else
8098c2ecf20Sopenharmony_ci			list_add_tail(&block->list, &lc->unflushed_blocks);
8108c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lc->blocks_lock, flags);
8118c2ecf20Sopenharmony_ci	}
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci	return DM_ENDIO_DONE;
8148c2ecf20Sopenharmony_ci}
8158c2ecf20Sopenharmony_ci
8168c2ecf20Sopenharmony_ci/*
8178c2ecf20Sopenharmony_ci * INFO format: <logged entries> <highest allocated sector>
8188c2ecf20Sopenharmony_ci */
8198c2ecf20Sopenharmony_cistatic void log_writes_status(struct dm_target *ti, status_type_t type,
8208c2ecf20Sopenharmony_ci			      unsigned status_flags, char *result,
8218c2ecf20Sopenharmony_ci			      unsigned maxlen)
8228c2ecf20Sopenharmony_ci{
8238c2ecf20Sopenharmony_ci	unsigned sz = 0;
8248c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
8258c2ecf20Sopenharmony_ci
8268c2ecf20Sopenharmony_ci	switch (type) {
8278c2ecf20Sopenharmony_ci	case STATUSTYPE_INFO:
8288c2ecf20Sopenharmony_ci		DMEMIT("%llu %llu", lc->logged_entries,
8298c2ecf20Sopenharmony_ci		       (unsigned long long)lc->next_sector - 1);
8308c2ecf20Sopenharmony_ci		if (!lc->logging_enabled)
8318c2ecf20Sopenharmony_ci			DMEMIT(" logging_disabled");
8328c2ecf20Sopenharmony_ci		break;
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_ci	case STATUSTYPE_TABLE:
8358c2ecf20Sopenharmony_ci		DMEMIT("%s %s", lc->dev->name, lc->logdev->name);
8368c2ecf20Sopenharmony_ci		break;
8378c2ecf20Sopenharmony_ci	}
8388c2ecf20Sopenharmony_ci}
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_cistatic int log_writes_prepare_ioctl(struct dm_target *ti,
8418c2ecf20Sopenharmony_ci				    struct block_device **bdev)
8428c2ecf20Sopenharmony_ci{
8438c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
8448c2ecf20Sopenharmony_ci	struct dm_dev *dev = lc->dev;
8458c2ecf20Sopenharmony_ci
8468c2ecf20Sopenharmony_ci	*bdev = dev->bdev;
8478c2ecf20Sopenharmony_ci	/*
8488c2ecf20Sopenharmony_ci	 * Only pass ioctls through if the device sizes match exactly.
8498c2ecf20Sopenharmony_ci	 */
8508c2ecf20Sopenharmony_ci	if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
8518c2ecf20Sopenharmony_ci		return 1;
8528c2ecf20Sopenharmony_ci	return 0;
8538c2ecf20Sopenharmony_ci}
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_cistatic int log_writes_iterate_devices(struct dm_target *ti,
8568c2ecf20Sopenharmony_ci				      iterate_devices_callout_fn fn,
8578c2ecf20Sopenharmony_ci				      void *data)
8588c2ecf20Sopenharmony_ci{
8598c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci	return fn(ti, lc->dev, 0, ti->len, data);
8628c2ecf20Sopenharmony_ci}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci/*
8658c2ecf20Sopenharmony_ci * Messages supported:
8668c2ecf20Sopenharmony_ci *   mark <mark data> - specify the marked data.
8678c2ecf20Sopenharmony_ci */
8688c2ecf20Sopenharmony_cistatic int log_writes_message(struct dm_target *ti, unsigned argc, char **argv,
8698c2ecf20Sopenharmony_ci			      char *result, unsigned maxlen)
8708c2ecf20Sopenharmony_ci{
8718c2ecf20Sopenharmony_ci	int r = -EINVAL;
8728c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_ci	if (argc != 2) {
8758c2ecf20Sopenharmony_ci		DMWARN("Invalid log-writes message arguments, expect 2 arguments, got %d", argc);
8768c2ecf20Sopenharmony_ci		return r;
8778c2ecf20Sopenharmony_ci	}
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci	if (!strcasecmp(argv[0], "mark"))
8808c2ecf20Sopenharmony_ci		r = log_mark(lc, argv[1]);
8818c2ecf20Sopenharmony_ci	else
8828c2ecf20Sopenharmony_ci		DMWARN("Unrecognised log writes target message received: %s", argv[0]);
8838c2ecf20Sopenharmony_ci
8848c2ecf20Sopenharmony_ci	return r;
8858c2ecf20Sopenharmony_ci}
8868c2ecf20Sopenharmony_ci
8878c2ecf20Sopenharmony_cistatic void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits)
8888c2ecf20Sopenharmony_ci{
8898c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
8908c2ecf20Sopenharmony_ci	struct request_queue *q = bdev_get_queue(lc->dev->bdev);
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_ci	if (!q || !blk_queue_discard(q)) {
8938c2ecf20Sopenharmony_ci		lc->device_supports_discard = false;
8948c2ecf20Sopenharmony_ci		limits->discard_granularity = lc->sectorsize;
8958c2ecf20Sopenharmony_ci		limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
8968c2ecf20Sopenharmony_ci	}
8978c2ecf20Sopenharmony_ci	limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev);
8988c2ecf20Sopenharmony_ci	limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev);
8998c2ecf20Sopenharmony_ci	limits->io_min = limits->physical_block_size;
9008c2ecf20Sopenharmony_ci}
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_DAX_DRIVER)
9038c2ecf20Sopenharmony_cistatic int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
9048c2ecf20Sopenharmony_ci		   struct iov_iter *i)
9058c2ecf20Sopenharmony_ci{
9068c2ecf20Sopenharmony_ci	struct pending_block *block;
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ci	if (!bytes)
9098c2ecf20Sopenharmony_ci		return 0;
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_ci	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
9128c2ecf20Sopenharmony_ci	if (!block) {
9138c2ecf20Sopenharmony_ci		DMERR("Error allocating dax pending block");
9148c2ecf20Sopenharmony_ci		return -ENOMEM;
9158c2ecf20Sopenharmony_ci	}
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci	block->data = kzalloc(bytes, GFP_KERNEL);
9188c2ecf20Sopenharmony_ci	if (!block->data) {
9198c2ecf20Sopenharmony_ci		DMERR("Error allocating dax data space");
9208c2ecf20Sopenharmony_ci		kfree(block);
9218c2ecf20Sopenharmony_ci		return -ENOMEM;
9228c2ecf20Sopenharmony_ci	}
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci	/* write data provided via the iterator */
9258c2ecf20Sopenharmony_ci	if (!copy_from_iter(block->data, bytes, i)) {
9268c2ecf20Sopenharmony_ci		DMERR("Error copying dax data");
9278c2ecf20Sopenharmony_ci		kfree(block->data);
9288c2ecf20Sopenharmony_ci		kfree(block);
9298c2ecf20Sopenharmony_ci		return -EIO;
9308c2ecf20Sopenharmony_ci	}
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci	/* rewind the iterator so that the block driver can use it */
9338c2ecf20Sopenharmony_ci	iov_iter_revert(i, bytes);
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_ci	block->datalen = bytes;
9368c2ecf20Sopenharmony_ci	block->sector = bio_to_dev_sectors(lc, sector);
9378c2ecf20Sopenharmony_ci	block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci	atomic_inc(&lc->pending_blocks);
9408c2ecf20Sopenharmony_ci	spin_lock_irq(&lc->blocks_lock);
9418c2ecf20Sopenharmony_ci	list_add_tail(&block->list, &lc->unflushed_blocks);
9428c2ecf20Sopenharmony_ci	spin_unlock_irq(&lc->blocks_lock);
9438c2ecf20Sopenharmony_ci	wake_up_process(lc->log_kthread);
9448c2ecf20Sopenharmony_ci
9458c2ecf20Sopenharmony_ci	return 0;
9468c2ecf20Sopenharmony_ci}
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_cistatic long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
9498c2ecf20Sopenharmony_ci					 long nr_pages, void **kaddr, pfn_t *pfn)
9508c2ecf20Sopenharmony_ci{
9518c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
9528c2ecf20Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
9538c2ecf20Sopenharmony_ci	int ret;
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_ci	ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff);
9568c2ecf20Sopenharmony_ci	if (ret)
9578c2ecf20Sopenharmony_ci		return ret;
9588c2ecf20Sopenharmony_ci	return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn);
9598c2ecf20Sopenharmony_ci}
9608c2ecf20Sopenharmony_ci
9618c2ecf20Sopenharmony_cistatic size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
9628c2ecf20Sopenharmony_ci					    pgoff_t pgoff, void *addr, size_t bytes,
9638c2ecf20Sopenharmony_ci					    struct iov_iter *i)
9648c2ecf20Sopenharmony_ci{
9658c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
9668c2ecf20Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
9678c2ecf20Sopenharmony_ci	int err;
9688c2ecf20Sopenharmony_ci
9698c2ecf20Sopenharmony_ci	if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
9708c2ecf20Sopenharmony_ci		return 0;
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_ci	/* Don't bother doing anything if logging has been disabled */
9738c2ecf20Sopenharmony_ci	if (!lc->logging_enabled)
9748c2ecf20Sopenharmony_ci		goto dax_copy;
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci	err = log_dax(lc, sector, bytes, i);
9778c2ecf20Sopenharmony_ci	if (err) {
9788c2ecf20Sopenharmony_ci		DMWARN("Error %d logging DAX write", err);
9798c2ecf20Sopenharmony_ci		return 0;
9808c2ecf20Sopenharmony_ci	}
9818c2ecf20Sopenharmony_cidax_copy:
9828c2ecf20Sopenharmony_ci	return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
9838c2ecf20Sopenharmony_ci}
9848c2ecf20Sopenharmony_ci
9858c2ecf20Sopenharmony_cistatic size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
9868c2ecf20Sopenharmony_ci					  pgoff_t pgoff, void *addr, size_t bytes,
9878c2ecf20Sopenharmony_ci					  struct iov_iter *i)
9888c2ecf20Sopenharmony_ci{
9898c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
9908c2ecf20Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_ci	if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
9938c2ecf20Sopenharmony_ci		return 0;
9948c2ecf20Sopenharmony_ci	return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
9958c2ecf20Sopenharmony_ci}
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_cistatic int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
9988c2ecf20Sopenharmony_ci					  size_t nr_pages)
9998c2ecf20Sopenharmony_ci{
10008c2ecf20Sopenharmony_ci	int ret;
10018c2ecf20Sopenharmony_ci	struct log_writes_c *lc = ti->private;
10028c2ecf20Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
10038c2ecf20Sopenharmony_ci
10048c2ecf20Sopenharmony_ci	ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT,
10058c2ecf20Sopenharmony_ci			     &pgoff);
10068c2ecf20Sopenharmony_ci	if (ret)
10078c2ecf20Sopenharmony_ci		return ret;
10088c2ecf20Sopenharmony_ci	return dax_zero_page_range(lc->dev->dax_dev, pgoff,
10098c2ecf20Sopenharmony_ci				   nr_pages << PAGE_SHIFT);
10108c2ecf20Sopenharmony_ci}
10118c2ecf20Sopenharmony_ci
10128c2ecf20Sopenharmony_ci#else
10138c2ecf20Sopenharmony_ci#define log_writes_dax_direct_access NULL
10148c2ecf20Sopenharmony_ci#define log_writes_dax_copy_from_iter NULL
10158c2ecf20Sopenharmony_ci#define log_writes_dax_copy_to_iter NULL
10168c2ecf20Sopenharmony_ci#define log_writes_dax_zero_page_range NULL
10178c2ecf20Sopenharmony_ci#endif
10188c2ecf20Sopenharmony_ci
10198c2ecf20Sopenharmony_cistatic struct target_type log_writes_target = {
10208c2ecf20Sopenharmony_ci	.name   = "log-writes",
10218c2ecf20Sopenharmony_ci	.version = {1, 1, 0},
10228c2ecf20Sopenharmony_ci	.module = THIS_MODULE,
10238c2ecf20Sopenharmony_ci	.ctr    = log_writes_ctr,
10248c2ecf20Sopenharmony_ci	.dtr    = log_writes_dtr,
10258c2ecf20Sopenharmony_ci	.map    = log_writes_map,
10268c2ecf20Sopenharmony_ci	.end_io = normal_end_io,
10278c2ecf20Sopenharmony_ci	.status = log_writes_status,
10288c2ecf20Sopenharmony_ci	.prepare_ioctl = log_writes_prepare_ioctl,
10298c2ecf20Sopenharmony_ci	.message = log_writes_message,
10308c2ecf20Sopenharmony_ci	.iterate_devices = log_writes_iterate_devices,
10318c2ecf20Sopenharmony_ci	.io_hints = log_writes_io_hints,
10328c2ecf20Sopenharmony_ci	.direct_access = log_writes_dax_direct_access,
10338c2ecf20Sopenharmony_ci	.dax_copy_from_iter = log_writes_dax_copy_from_iter,
10348c2ecf20Sopenharmony_ci	.dax_copy_to_iter = log_writes_dax_copy_to_iter,
10358c2ecf20Sopenharmony_ci	.dax_zero_page_range = log_writes_dax_zero_page_range,
10368c2ecf20Sopenharmony_ci};
10378c2ecf20Sopenharmony_ci
10388c2ecf20Sopenharmony_cistatic int __init dm_log_writes_init(void)
10398c2ecf20Sopenharmony_ci{
10408c2ecf20Sopenharmony_ci	int r = dm_register_target(&log_writes_target);
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_ci	if (r < 0)
10438c2ecf20Sopenharmony_ci		DMERR("register failed %d", r);
10448c2ecf20Sopenharmony_ci
10458c2ecf20Sopenharmony_ci	return r;
10468c2ecf20Sopenharmony_ci}
10478c2ecf20Sopenharmony_ci
10488c2ecf20Sopenharmony_cistatic void __exit dm_log_writes_exit(void)
10498c2ecf20Sopenharmony_ci{
10508c2ecf20Sopenharmony_ci	dm_unregister_target(&log_writes_target);
10518c2ecf20Sopenharmony_ci}
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_cimodule_init(dm_log_writes_init);
10548c2ecf20Sopenharmony_cimodule_exit(dm_log_writes_exit);
10558c2ecf20Sopenharmony_ci
10568c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " log writes target");
10578c2ecf20Sopenharmony_ciMODULE_AUTHOR("Josef Bacik <jbacik@fb.com>");
10588c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
1059