162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Partial Parity Log for closing the RAID5 write hole
462306a36Sopenharmony_ci * Copyright (c) 2017, Intel Corporation.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/kernel.h>
862306a36Sopenharmony_ci#include <linux/blkdev.h>
962306a36Sopenharmony_ci#include <linux/slab.h>
1062306a36Sopenharmony_ci#include <linux/crc32c.h>
1162306a36Sopenharmony_ci#include <linux/async_tx.h>
1262306a36Sopenharmony_ci#include <linux/raid/md_p.h>
1362306a36Sopenharmony_ci#include "md.h"
1462306a36Sopenharmony_ci#include "raid5.h"
1562306a36Sopenharmony_ci#include "raid5-log.h"
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci/*
1862306a36Sopenharmony_ci * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for
1962306a36Sopenharmony_ci * partial parity data. The header contains an array of entries
2062306a36Sopenharmony_ci * (struct ppl_header_entry) which describe the logged write requests.
2162306a36Sopenharmony_ci * Partial parity for the entries comes after the header, written in the same
2262306a36Sopenharmony_ci * sequence as the entries:
2362306a36Sopenharmony_ci *
2462306a36Sopenharmony_ci * Header
2562306a36Sopenharmony_ci *   entry0
2662306a36Sopenharmony_ci *   ...
2762306a36Sopenharmony_ci *   entryN
2862306a36Sopenharmony_ci * PP data
2962306a36Sopenharmony_ci *   PP for entry0
3062306a36Sopenharmony_ci *   ...
3162306a36Sopenharmony_ci *   PP for entryN
3262306a36Sopenharmony_ci *
3362306a36Sopenharmony_ci * An entry describes one or more consecutive stripe_heads, up to a full
3462306a36Sopenharmony_ci * stripe. The modifed raid data chunks form an m-by-n matrix, where m is the
3562306a36Sopenharmony_ci * number of stripe_heads in the entry and n is the number of modified data
3662306a36Sopenharmony_ci * disks. Every stripe_head in the entry must write to the same data disks.
3762306a36Sopenharmony_ci * An example of a valid case described by a single entry (writes to the first
3862306a36Sopenharmony_ci * stripe of a 4 disk array, 16k chunk size):
3962306a36Sopenharmony_ci *
4062306a36Sopenharmony_ci * sh->sector   dd0   dd1   dd2    ppl
4162306a36Sopenharmony_ci *            +-----+-----+-----+
4262306a36Sopenharmony_ci * 0          | --- | --- | --- | +----+
4362306a36Sopenharmony_ci * 8          | -W- | -W- | --- | | pp |   data_sector = 8
4462306a36Sopenharmony_ci * 16         | -W- | -W- | --- | | pp |   data_size = 3 * 2 * 4k
4562306a36Sopenharmony_ci * 24         | -W- | -W- | --- | | pp |   pp_size = 3 * 4k
4662306a36Sopenharmony_ci *            +-----+-----+-----+ +----+
4762306a36Sopenharmony_ci *
4862306a36Sopenharmony_ci * data_sector is the first raid sector of the modified data, data_size is the
4962306a36Sopenharmony_ci * total size of modified data and pp_size is the size of partial parity for
5062306a36Sopenharmony_ci * this entry. Entries for full stripe writes contain no partial parity
5162306a36Sopenharmony_ci * (pp_size = 0), they only mark the stripes for which parity should be
5262306a36Sopenharmony_ci * recalculated after an unclean shutdown. Every entry holds a checksum of its
5362306a36Sopenharmony_ci * partial parity, the header also has a checksum of the header itself.
5462306a36Sopenharmony_ci *
5562306a36Sopenharmony_ci * A write request is always logged to the PPL instance stored on the parity
5662306a36Sopenharmony_ci * disk of the corresponding stripe. For each member disk there is one ppl_log
5762306a36Sopenharmony_ci * used to handle logging for this disk, independently from others. They are
5862306a36Sopenharmony_ci * grouped in child_logs array in struct ppl_conf, which is assigned to
5962306a36Sopenharmony_ci * r5conf->log_private.
6062306a36Sopenharmony_ci *
6162306a36Sopenharmony_ci * ppl_io_unit represents a full PPL write, header_page contains the ppl_header.
6262306a36Sopenharmony_ci * PPL entries for logged stripes are added in ppl_log_stripe(). A stripe_head
6362306a36Sopenharmony_ci * can be appended to the last entry if it meets the conditions for a valid
6462306a36Sopenharmony_ci * entry described above, otherwise a new entry is added. Checksums of entries
6562306a36Sopenharmony_ci * are calculated incrementally as stripes containing partial parity are being
6662306a36Sopenharmony_ci * added. ppl_submit_iounit() calculates the checksum of the header and submits
6762306a36Sopenharmony_ci * a bio containing the header page and partial parity pages (sh->ppl_page) for
6862306a36Sopenharmony_ci * all stripes of the io_unit. When the PPL write completes, the stripes
6962306a36Sopenharmony_ci * associated with the io_unit are released and raid5d starts writing their data
7062306a36Sopenharmony_ci * and parity. When all stripes are written, the io_unit is freed and the next
7162306a36Sopenharmony_ci * can be submitted.
7262306a36Sopenharmony_ci *
7362306a36Sopenharmony_ci * An io_unit is used to gather stripes until it is submitted or becomes full
7462306a36Sopenharmony_ci * (if the maximum number of entries or size of PPL is reached). Another io_unit
7562306a36Sopenharmony_ci * can't be submitted until the previous has completed (PPL and stripe
7662306a36Sopenharmony_ci * data+parity is written). The log->io_list tracks all io_units of a log
7762306a36Sopenharmony_ci * (for a single member disk). New io_units are added to the end of the list
7862306a36Sopenharmony_ci * and the first io_unit is submitted, if it is not submitted already.
7962306a36Sopenharmony_ci * The current io_unit accepting new stripes is always at the end of the list.
8062306a36Sopenharmony_ci *
8162306a36Sopenharmony_ci * If write-back cache is enabled for any of the disks in the array, its data
8262306a36Sopenharmony_ci * must be flushed before next io_unit is submitted.
8362306a36Sopenharmony_ci */
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci#define PPL_SPACE_SIZE (128 * 1024)
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_cistruct ppl_conf {
8862306a36Sopenharmony_ci	struct mddev *mddev;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	/* array of child logs, one for each raid disk */
9162306a36Sopenharmony_ci	struct ppl_log *child_logs;
9262306a36Sopenharmony_ci	int count;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	int block_size;		/* the logical block size used for data_sector
9562306a36Sopenharmony_ci				 * in ppl_header_entry */
9662306a36Sopenharmony_ci	u32 signature;		/* raid array identifier */
9762306a36Sopenharmony_ci	atomic64_t seq;		/* current log write sequence number */
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	struct kmem_cache *io_kc;
10062306a36Sopenharmony_ci	mempool_t io_pool;
10162306a36Sopenharmony_ci	struct bio_set bs;
10262306a36Sopenharmony_ci	struct bio_set flush_bs;
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	/* used only for recovery */
10562306a36Sopenharmony_ci	int recovered_entries;
10662306a36Sopenharmony_ci	int mismatch_count;
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	/* stripes to retry if failed to allocate io_unit */
10962306a36Sopenharmony_ci	struct list_head no_mem_stripes;
11062306a36Sopenharmony_ci	spinlock_t no_mem_stripes_lock;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	unsigned short write_hint;
11362306a36Sopenharmony_ci};
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_cistruct ppl_log {
11662306a36Sopenharmony_ci	struct ppl_conf *ppl_conf;	/* shared between all log instances */
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	struct md_rdev *rdev;		/* array member disk associated with
11962306a36Sopenharmony_ci					 * this log instance */
12062306a36Sopenharmony_ci	struct mutex io_mutex;
12162306a36Sopenharmony_ci	struct ppl_io_unit *current_io;	/* current io_unit accepting new data
12262306a36Sopenharmony_ci					 * always at the end of io_list */
12362306a36Sopenharmony_ci	spinlock_t io_list_lock;
12462306a36Sopenharmony_ci	struct list_head io_list;	/* all io_units of this log */
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	sector_t next_io_sector;
12762306a36Sopenharmony_ci	unsigned int entry_space;
12862306a36Sopenharmony_ci	bool use_multippl;
12962306a36Sopenharmony_ci	bool wb_cache_on;
13062306a36Sopenharmony_ci	unsigned long disk_flush_bitmap;
13162306a36Sopenharmony_ci};
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci#define PPL_IO_INLINE_BVECS 32
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_cistruct ppl_io_unit {
13662306a36Sopenharmony_ci	struct ppl_log *log;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	struct page *header_page;	/* for ppl_header */
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	unsigned int entries_count;	/* number of entries in ppl_header */
14162306a36Sopenharmony_ci	unsigned int pp_size;		/* total size current of partial parity */
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	u64 seq;			/* sequence number of this log write */
14462306a36Sopenharmony_ci	struct list_head log_sibling;	/* log->io_list */
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	struct list_head stripe_list;	/* stripes added to the io_unit */
14762306a36Sopenharmony_ci	atomic_t pending_stripes;	/* how many stripes not written to raid */
14862306a36Sopenharmony_ci	atomic_t pending_flushes;	/* how many disk flushes are in progress */
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	bool submitted;			/* true if write to log started */
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	/* inline bio and its biovec for submitting the iounit */
15362306a36Sopenharmony_ci	struct bio bio;
15462306a36Sopenharmony_ci	struct bio_vec biovec[PPL_IO_INLINE_BVECS];
15562306a36Sopenharmony_ci};
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistruct dma_async_tx_descriptor *
15862306a36Sopenharmony_ciops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
15962306a36Sopenharmony_ci		       struct dma_async_tx_descriptor *tx)
16062306a36Sopenharmony_ci{
16162306a36Sopenharmony_ci	int disks = sh->disks;
16262306a36Sopenharmony_ci	struct page **srcs = percpu->scribble;
16362306a36Sopenharmony_ci	int count = 0, pd_idx = sh->pd_idx, i;
16462306a36Sopenharmony_ci	struct async_submit_ctl submit;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	/*
16962306a36Sopenharmony_ci	 * Partial parity is the XOR of stripe data chunks that are not changed
17062306a36Sopenharmony_ci	 * during the write request. Depending on available data
17162306a36Sopenharmony_ci	 * (read-modify-write vs. reconstruct-write case) we calculate it
17262306a36Sopenharmony_ci	 * differently.
17362306a36Sopenharmony_ci	 */
17462306a36Sopenharmony_ci	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
17562306a36Sopenharmony_ci		/*
17662306a36Sopenharmony_ci		 * rmw: xor old data and parity from updated disks
17762306a36Sopenharmony_ci		 * This is calculated earlier by ops_run_prexor5() so just copy
17862306a36Sopenharmony_ci		 * the parity dev page.
17962306a36Sopenharmony_ci		 */
18062306a36Sopenharmony_ci		srcs[count++] = sh->dev[pd_idx].page;
18162306a36Sopenharmony_ci	} else if (sh->reconstruct_state == reconstruct_state_drain_run) {
18262306a36Sopenharmony_ci		/* rcw: xor data from all not updated disks */
18362306a36Sopenharmony_ci		for (i = disks; i--;) {
18462306a36Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
18562306a36Sopenharmony_ci			if (test_bit(R5_UPTODATE, &dev->flags))
18662306a36Sopenharmony_ci				srcs[count++] = dev->page;
18762306a36Sopenharmony_ci		}
18862306a36Sopenharmony_ci	} else {
18962306a36Sopenharmony_ci		return tx;
19062306a36Sopenharmony_ci	}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx,
19362306a36Sopenharmony_ci			  NULL, sh, (void *) (srcs + sh->disks + 2));
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	if (count == 1)
19662306a36Sopenharmony_ci		tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE,
19762306a36Sopenharmony_ci				  &submit);
19862306a36Sopenharmony_ci	else
19962306a36Sopenharmony_ci		tx = async_xor(sh->ppl_page, srcs, 0, count, PAGE_SIZE,
20062306a36Sopenharmony_ci			       &submit);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	return tx;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
20662306a36Sopenharmony_ci{
20762306a36Sopenharmony_ci	struct kmem_cache *kc = pool_data;
20862306a36Sopenharmony_ci	struct ppl_io_unit *io;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	io = kmem_cache_alloc(kc, gfp_mask);
21162306a36Sopenharmony_ci	if (!io)
21262306a36Sopenharmony_ci		return NULL;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	io->header_page = alloc_page(gfp_mask);
21562306a36Sopenharmony_ci	if (!io->header_page) {
21662306a36Sopenharmony_ci		kmem_cache_free(kc, io);
21762306a36Sopenharmony_ci		return NULL;
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	return io;
22162306a36Sopenharmony_ci}
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_cistatic void ppl_io_pool_free(void *element, void *pool_data)
22462306a36Sopenharmony_ci{
22562306a36Sopenharmony_ci	struct kmem_cache *kc = pool_data;
22662306a36Sopenharmony_ci	struct ppl_io_unit *io = element;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	__free_page(io->header_page);
22962306a36Sopenharmony_ci	kmem_cache_free(kc, io);
23062306a36Sopenharmony_ci}
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_cistatic struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
23362306a36Sopenharmony_ci					  struct stripe_head *sh)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
23662306a36Sopenharmony_ci	struct ppl_io_unit *io;
23762306a36Sopenharmony_ci	struct ppl_header *pplhdr;
23862306a36Sopenharmony_ci	struct page *header_page;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	io = mempool_alloc(&ppl_conf->io_pool, GFP_NOWAIT);
24162306a36Sopenharmony_ci	if (!io)
24262306a36Sopenharmony_ci		return NULL;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	header_page = io->header_page;
24562306a36Sopenharmony_ci	memset(io, 0, sizeof(*io));
24662306a36Sopenharmony_ci	io->header_page = header_page;
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	io->log = log;
24962306a36Sopenharmony_ci	INIT_LIST_HEAD(&io->log_sibling);
25062306a36Sopenharmony_ci	INIT_LIST_HEAD(&io->stripe_list);
25162306a36Sopenharmony_ci	atomic_set(&io->pending_stripes, 0);
25262306a36Sopenharmony_ci	atomic_set(&io->pending_flushes, 0);
25362306a36Sopenharmony_ci	bio_init(&io->bio, log->rdev->bdev, io->biovec, PPL_IO_INLINE_BVECS,
25462306a36Sopenharmony_ci		 REQ_OP_WRITE | REQ_FUA);
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	pplhdr = page_address(io->header_page);
25762306a36Sopenharmony_ci	clear_page(pplhdr);
25862306a36Sopenharmony_ci	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
25962306a36Sopenharmony_ci	pplhdr->signature = cpu_to_le32(ppl_conf->signature);
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	io->seq = atomic64_add_return(1, &ppl_conf->seq);
26262306a36Sopenharmony_ci	pplhdr->generation = cpu_to_le64(io->seq);
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	return io;
26562306a36Sopenharmony_ci}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_cistatic int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
26862306a36Sopenharmony_ci{
26962306a36Sopenharmony_ci	struct ppl_io_unit *io = log->current_io;
27062306a36Sopenharmony_ci	struct ppl_header_entry *e = NULL;
27162306a36Sopenharmony_ci	struct ppl_header *pplhdr;
27262306a36Sopenharmony_ci	int i;
27362306a36Sopenharmony_ci	sector_t data_sector = 0;
27462306a36Sopenharmony_ci	int data_disks = 0;
27562306a36Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector);
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	/* check if current io_unit is full */
28062306a36Sopenharmony_ci	if (io && (io->pp_size == log->entry_space ||
28162306a36Sopenharmony_ci		   io->entries_count == PPL_HDR_MAX_ENTRIES)) {
28262306a36Sopenharmony_ci		pr_debug("%s: add io_unit blocked by seq: %llu\n",
28362306a36Sopenharmony_ci			 __func__, io->seq);
28462306a36Sopenharmony_ci		io = NULL;
28562306a36Sopenharmony_ci	}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	/* add a new unit if there is none or the current is full */
28862306a36Sopenharmony_ci	if (!io) {
28962306a36Sopenharmony_ci		io = ppl_new_iounit(log, sh);
29062306a36Sopenharmony_ci		if (!io)
29162306a36Sopenharmony_ci			return -ENOMEM;
29262306a36Sopenharmony_ci		spin_lock_irq(&log->io_list_lock);
29362306a36Sopenharmony_ci		list_add_tail(&io->log_sibling, &log->io_list);
29462306a36Sopenharmony_ci		spin_unlock_irq(&log->io_list_lock);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci		log->current_io = io;
29762306a36Sopenharmony_ci	}
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	for (i = 0; i < sh->disks; i++) {
30062306a36Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci		if (i != sh->pd_idx && test_bit(R5_Wantwrite, &dev->flags)) {
30362306a36Sopenharmony_ci			if (!data_disks || dev->sector < data_sector)
30462306a36Sopenharmony_ci				data_sector = dev->sector;
30562306a36Sopenharmony_ci			data_disks++;
30662306a36Sopenharmony_ci		}
30762306a36Sopenharmony_ci	}
30862306a36Sopenharmony_ci	BUG_ON(!data_disks);
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	pr_debug("%s: seq: %llu data_sector: %llu data_disks: %d\n", __func__,
31162306a36Sopenharmony_ci		 io->seq, (unsigned long long)data_sector, data_disks);
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	pplhdr = page_address(io->header_page);
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	if (io->entries_count > 0) {
31662306a36Sopenharmony_ci		struct ppl_header_entry *last =
31762306a36Sopenharmony_ci				&pplhdr->entries[io->entries_count - 1];
31862306a36Sopenharmony_ci		struct stripe_head *sh_last = list_last_entry(
31962306a36Sopenharmony_ci				&io->stripe_list, struct stripe_head, log_list);
32062306a36Sopenharmony_ci		u64 data_sector_last = le64_to_cpu(last->data_sector);
32162306a36Sopenharmony_ci		u32 data_size_last = le32_to_cpu(last->data_size);
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci		/*
32462306a36Sopenharmony_ci		 * Check if we can append the stripe to the last entry. It must
32562306a36Sopenharmony_ci		 * be just after the last logged stripe and write to the same
32662306a36Sopenharmony_ci		 * disks. Use bit shift and logarithm to avoid 64-bit division.
32762306a36Sopenharmony_ci		 */
32862306a36Sopenharmony_ci		if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) &&
32962306a36Sopenharmony_ci		    (data_sector >> ilog2(conf->chunk_sectors) ==
33062306a36Sopenharmony_ci		     data_sector_last >> ilog2(conf->chunk_sectors)) &&
33162306a36Sopenharmony_ci		    ((data_sector - data_sector_last) * data_disks ==
33262306a36Sopenharmony_ci		     data_size_last >> 9))
33362306a36Sopenharmony_ci			e = last;
33462306a36Sopenharmony_ci	}
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci	if (!e) {
33762306a36Sopenharmony_ci		e = &pplhdr->entries[io->entries_count++];
33862306a36Sopenharmony_ci		e->data_sector = cpu_to_le64(data_sector);
33962306a36Sopenharmony_ci		e->parity_disk = cpu_to_le32(sh->pd_idx);
34062306a36Sopenharmony_ci		e->checksum = cpu_to_le32(~0);
34162306a36Sopenharmony_ci	}
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	le32_add_cpu(&e->data_size, data_disks << PAGE_SHIFT);
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci	/* don't write any PP if full stripe write */
34662306a36Sopenharmony_ci	if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) {
34762306a36Sopenharmony_ci		le32_add_cpu(&e->pp_size, PAGE_SIZE);
34862306a36Sopenharmony_ci		io->pp_size += PAGE_SIZE;
34962306a36Sopenharmony_ci		e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum),
35062306a36Sopenharmony_ci						    page_address(sh->ppl_page),
35162306a36Sopenharmony_ci						    PAGE_SIZE));
35262306a36Sopenharmony_ci	}
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	list_add_tail(&sh->log_list, &io->stripe_list);
35562306a36Sopenharmony_ci	atomic_inc(&io->pending_stripes);
35662306a36Sopenharmony_ci	sh->ppl_io = io;
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	return 0;
35962306a36Sopenharmony_ci}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ciint ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)
36262306a36Sopenharmony_ci{
36362306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
36462306a36Sopenharmony_ci	struct ppl_io_unit *io = sh->ppl_io;
36562306a36Sopenharmony_ci	struct ppl_log *log;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page ||
36862306a36Sopenharmony_ci	    !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) ||
36962306a36Sopenharmony_ci	    !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) {
37062306a36Sopenharmony_ci		clear_bit(STRIPE_LOG_TRAPPED, &sh->state);
37162306a36Sopenharmony_ci		return -EAGAIN;
37262306a36Sopenharmony_ci	}
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	log = &ppl_conf->child_logs[sh->pd_idx];
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci	mutex_lock(&log->io_mutex);
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) {
37962306a36Sopenharmony_ci		mutex_unlock(&log->io_mutex);
38062306a36Sopenharmony_ci		return -EAGAIN;
38162306a36Sopenharmony_ci	}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	set_bit(STRIPE_LOG_TRAPPED, &sh->state);
38462306a36Sopenharmony_ci	clear_bit(STRIPE_DELAYED, &sh->state);
38562306a36Sopenharmony_ci	atomic_inc(&sh->count);
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	if (ppl_log_stripe(log, sh)) {
38862306a36Sopenharmony_ci		spin_lock_irq(&ppl_conf->no_mem_stripes_lock);
38962306a36Sopenharmony_ci		list_add_tail(&sh->log_list, &ppl_conf->no_mem_stripes);
39062306a36Sopenharmony_ci		spin_unlock_irq(&ppl_conf->no_mem_stripes_lock);
39162306a36Sopenharmony_ci	}
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	mutex_unlock(&log->io_mutex);
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	return 0;
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_cistatic void ppl_log_endio(struct bio *bio)
39962306a36Sopenharmony_ci{
40062306a36Sopenharmony_ci	struct ppl_io_unit *io = bio->bi_private;
40162306a36Sopenharmony_ci	struct ppl_log *log = io->log;
40262306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
40362306a36Sopenharmony_ci	struct stripe_head *sh, *next;
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	pr_debug("%s: seq: %llu\n", __func__, io->seq);
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	if (bio->bi_status)
40862306a36Sopenharmony_ci		md_error(ppl_conf->mddev, log->rdev);
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
41162306a36Sopenharmony_ci		list_del_init(&sh->log_list);
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
41462306a36Sopenharmony_ci		raid5_release_stripe(sh);
41562306a36Sopenharmony_ci	}
41662306a36Sopenharmony_ci}
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_cistatic void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	pr_debug("%s: seq: %llu size: %u sector: %llu dev: %pg\n",
42162306a36Sopenharmony_ci		 __func__, io->seq, bio->bi_iter.bi_size,
42262306a36Sopenharmony_ci		 (unsigned long long)bio->bi_iter.bi_sector,
42362306a36Sopenharmony_ci		 bio->bi_bdev);
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	submit_bio(bio);
42662306a36Sopenharmony_ci}
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_cistatic void ppl_submit_iounit(struct ppl_io_unit *io)
42962306a36Sopenharmony_ci{
43062306a36Sopenharmony_ci	struct ppl_log *log = io->log;
43162306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
43262306a36Sopenharmony_ci	struct ppl_header *pplhdr = page_address(io->header_page);
43362306a36Sopenharmony_ci	struct bio *bio = &io->bio;
43462306a36Sopenharmony_ci	struct stripe_head *sh;
43562306a36Sopenharmony_ci	int i;
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	bio->bi_private = io;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) {
44062306a36Sopenharmony_ci		ppl_log_endio(bio);
44162306a36Sopenharmony_ci		return;
44262306a36Sopenharmony_ci	}
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	for (i = 0; i < io->entries_count; i++) {
44562306a36Sopenharmony_ci		struct ppl_header_entry *e = &pplhdr->entries[i];
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci		pr_debug("%s: seq: %llu entry: %d data_sector: %llu pp_size: %u data_size: %u\n",
44862306a36Sopenharmony_ci			 __func__, io->seq, i, le64_to_cpu(e->data_sector),
44962306a36Sopenharmony_ci			 le32_to_cpu(e->pp_size), le32_to_cpu(e->data_size));
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci		e->data_sector = cpu_to_le64(le64_to_cpu(e->data_sector) >>
45262306a36Sopenharmony_ci					     ilog2(ppl_conf->block_size >> 9));
45362306a36Sopenharmony_ci		e->checksum = cpu_to_le32(~le32_to_cpu(e->checksum));
45462306a36Sopenharmony_ci	}
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	pplhdr->entries_count = cpu_to_le32(io->entries_count);
45762306a36Sopenharmony_ci	pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci	/* Rewind the buffer if current PPL is larger then remaining space */
46062306a36Sopenharmony_ci	if (log->use_multippl &&
46162306a36Sopenharmony_ci	    log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector <
46262306a36Sopenharmony_ci	    (PPL_HEADER_SIZE + io->pp_size) >> 9)
46362306a36Sopenharmony_ci		log->next_io_sector = log->rdev->ppl.sector;
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	bio->bi_end_io = ppl_log_endio;
46762306a36Sopenharmony_ci	bio->bi_iter.bi_sector = log->next_io_sector;
46862306a36Sopenharmony_ci	__bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci	pr_debug("%s: log->current_io_sector: %llu\n", __func__,
47162306a36Sopenharmony_ci	    (unsigned long long)log->next_io_sector);
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	if (log->use_multippl)
47462306a36Sopenharmony_ci		log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	WARN_ON(log->disk_flush_bitmap != 0);
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	list_for_each_entry(sh, &io->stripe_list, log_list) {
47962306a36Sopenharmony_ci		for (i = 0; i < sh->disks; i++) {
48062306a36Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci			if ((ppl_conf->child_logs[i].wb_cache_on) &&
48362306a36Sopenharmony_ci			    (test_bit(R5_Wantwrite, &dev->flags))) {
48462306a36Sopenharmony_ci				set_bit(i, &log->disk_flush_bitmap);
48562306a36Sopenharmony_ci			}
48662306a36Sopenharmony_ci		}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci		/* entries for full stripe writes have no partial parity */
48962306a36Sopenharmony_ci		if (test_bit(STRIPE_FULL_WRITE, &sh->state))
49062306a36Sopenharmony_ci			continue;
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci		if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) {
49362306a36Sopenharmony_ci			struct bio *prev = bio;
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci			bio = bio_alloc_bioset(prev->bi_bdev, BIO_MAX_VECS,
49662306a36Sopenharmony_ci					       prev->bi_opf, GFP_NOIO,
49762306a36Sopenharmony_ci					       &ppl_conf->bs);
49862306a36Sopenharmony_ci			bio->bi_iter.bi_sector = bio_end_sector(prev);
49962306a36Sopenharmony_ci			__bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci			bio_chain(bio, prev);
50262306a36Sopenharmony_ci			ppl_submit_iounit_bio(io, prev);
50362306a36Sopenharmony_ci		}
50462306a36Sopenharmony_ci	}
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	ppl_submit_iounit_bio(io, bio);
50762306a36Sopenharmony_ci}
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_cistatic void ppl_submit_current_io(struct ppl_log *log)
51062306a36Sopenharmony_ci{
51162306a36Sopenharmony_ci	struct ppl_io_unit *io;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	spin_lock_irq(&log->io_list_lock);
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit,
51662306a36Sopenharmony_ci				      log_sibling);
51762306a36Sopenharmony_ci	if (io && io->submitted)
51862306a36Sopenharmony_ci		io = NULL;
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	spin_unlock_irq(&log->io_list_lock);
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	if (io) {
52362306a36Sopenharmony_ci		io->submitted = true;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci		if (io == log->current_io)
52662306a36Sopenharmony_ci			log->current_io = NULL;
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci		ppl_submit_iounit(io);
52962306a36Sopenharmony_ci	}
53062306a36Sopenharmony_ci}
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_civoid ppl_write_stripe_run(struct r5conf *conf)
53362306a36Sopenharmony_ci{
53462306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
53562306a36Sopenharmony_ci	struct ppl_log *log;
53662306a36Sopenharmony_ci	int i;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	for (i = 0; i < ppl_conf->count; i++) {
53962306a36Sopenharmony_ci		log = &ppl_conf->child_logs[i];
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci		mutex_lock(&log->io_mutex);
54262306a36Sopenharmony_ci		ppl_submit_current_io(log);
54362306a36Sopenharmony_ci		mutex_unlock(&log->io_mutex);
54462306a36Sopenharmony_ci	}
54562306a36Sopenharmony_ci}
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_cistatic void ppl_io_unit_finished(struct ppl_io_unit *io)
54862306a36Sopenharmony_ci{
54962306a36Sopenharmony_ci	struct ppl_log *log = io->log;
55062306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
55162306a36Sopenharmony_ci	struct r5conf *conf = ppl_conf->mddev->private;
55262306a36Sopenharmony_ci	unsigned long flags;
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	pr_debug("%s: seq: %llu\n", __func__, io->seq);
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	local_irq_save(flags);
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	spin_lock(&log->io_list_lock);
55962306a36Sopenharmony_ci	list_del(&io->log_sibling);
56062306a36Sopenharmony_ci	spin_unlock(&log->io_list_lock);
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci	mempool_free(io, &ppl_conf->io_pool);
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	spin_lock(&ppl_conf->no_mem_stripes_lock);
56562306a36Sopenharmony_ci	if (!list_empty(&ppl_conf->no_mem_stripes)) {
56662306a36Sopenharmony_ci		struct stripe_head *sh;
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci		sh = list_first_entry(&ppl_conf->no_mem_stripes,
56962306a36Sopenharmony_ci				      struct stripe_head, log_list);
57062306a36Sopenharmony_ci		list_del_init(&sh->log_list);
57162306a36Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
57262306a36Sopenharmony_ci		raid5_release_stripe(sh);
57362306a36Sopenharmony_ci	}
57462306a36Sopenharmony_ci	spin_unlock(&ppl_conf->no_mem_stripes_lock);
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	local_irq_restore(flags);
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	wake_up(&conf->wait_for_quiescent);
57962306a36Sopenharmony_ci}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_cistatic void ppl_flush_endio(struct bio *bio)
58262306a36Sopenharmony_ci{
58362306a36Sopenharmony_ci	struct ppl_io_unit *io = bio->bi_private;
58462306a36Sopenharmony_ci	struct ppl_log *log = io->log;
58562306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
58662306a36Sopenharmony_ci	struct r5conf *conf = ppl_conf->mddev->private;
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_ci	pr_debug("%s: dev: %pg\n", __func__, bio->bi_bdev);
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	if (bio->bi_status) {
59162306a36Sopenharmony_ci		struct md_rdev *rdev;
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci		rcu_read_lock();
59462306a36Sopenharmony_ci		rdev = md_find_rdev_rcu(conf->mddev, bio_dev(bio));
59562306a36Sopenharmony_ci		if (rdev)
59662306a36Sopenharmony_ci			md_error(rdev->mddev, rdev);
59762306a36Sopenharmony_ci		rcu_read_unlock();
59862306a36Sopenharmony_ci	}
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	bio_put(bio);
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	if (atomic_dec_and_test(&io->pending_flushes)) {
60362306a36Sopenharmony_ci		ppl_io_unit_finished(io);
60462306a36Sopenharmony_ci		md_wakeup_thread(conf->mddev->thread);
60562306a36Sopenharmony_ci	}
60662306a36Sopenharmony_ci}
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_cistatic void ppl_do_flush(struct ppl_io_unit *io)
60962306a36Sopenharmony_ci{
61062306a36Sopenharmony_ci	struct ppl_log *log = io->log;
61162306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
61262306a36Sopenharmony_ci	struct r5conf *conf = ppl_conf->mddev->private;
61362306a36Sopenharmony_ci	int raid_disks = conf->raid_disks;
61462306a36Sopenharmony_ci	int flushed_disks = 0;
61562306a36Sopenharmony_ci	int i;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	atomic_set(&io->pending_flushes, raid_disks);
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	for_each_set_bit(i, &log->disk_flush_bitmap, raid_disks) {
62062306a36Sopenharmony_ci		struct md_rdev *rdev;
62162306a36Sopenharmony_ci		struct block_device *bdev = NULL;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci		rcu_read_lock();
62462306a36Sopenharmony_ci		rdev = rcu_dereference(conf->disks[i].rdev);
62562306a36Sopenharmony_ci		if (rdev && !test_bit(Faulty, &rdev->flags))
62662306a36Sopenharmony_ci			bdev = rdev->bdev;
62762306a36Sopenharmony_ci		rcu_read_unlock();
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci		if (bdev) {
63062306a36Sopenharmony_ci			struct bio *bio;
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci			bio = bio_alloc_bioset(bdev, 0,
63362306a36Sopenharmony_ci					       REQ_OP_WRITE | REQ_PREFLUSH,
63462306a36Sopenharmony_ci					       GFP_NOIO, &ppl_conf->flush_bs);
63562306a36Sopenharmony_ci			bio->bi_private = io;
63662306a36Sopenharmony_ci			bio->bi_end_io = ppl_flush_endio;
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci			pr_debug("%s: dev: %ps\n", __func__, bio->bi_bdev);
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci			submit_bio(bio);
64162306a36Sopenharmony_ci			flushed_disks++;
64262306a36Sopenharmony_ci		}
64362306a36Sopenharmony_ci	}
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	log->disk_flush_bitmap = 0;
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci	for (i = flushed_disks ; i < raid_disks; i++) {
64862306a36Sopenharmony_ci		if (atomic_dec_and_test(&io->pending_flushes))
64962306a36Sopenharmony_ci			ppl_io_unit_finished(io);
65062306a36Sopenharmony_ci	}
65162306a36Sopenharmony_ci}
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_cistatic inline bool ppl_no_io_unit_submitted(struct r5conf *conf,
65462306a36Sopenharmony_ci					    struct ppl_log *log)
65562306a36Sopenharmony_ci{
65662306a36Sopenharmony_ci	struct ppl_io_unit *io;
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci	io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit,
65962306a36Sopenharmony_ci				      log_sibling);
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci	return !io || !io->submitted;
66262306a36Sopenharmony_ci}
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_civoid ppl_quiesce(struct r5conf *conf, int quiesce)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
66762306a36Sopenharmony_ci	int i;
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	if (quiesce) {
67062306a36Sopenharmony_ci		for (i = 0; i < ppl_conf->count; i++) {
67162306a36Sopenharmony_ci			struct ppl_log *log = &ppl_conf->child_logs[i];
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci			spin_lock_irq(&log->io_list_lock);
67462306a36Sopenharmony_ci			wait_event_lock_irq(conf->wait_for_quiescent,
67562306a36Sopenharmony_ci					    ppl_no_io_unit_submitted(conf, log),
67662306a36Sopenharmony_ci					    log->io_list_lock);
67762306a36Sopenharmony_ci			spin_unlock_irq(&log->io_list_lock);
67862306a36Sopenharmony_ci		}
67962306a36Sopenharmony_ci	}
68062306a36Sopenharmony_ci}
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ciint ppl_handle_flush_request(struct bio *bio)
68362306a36Sopenharmony_ci{
68462306a36Sopenharmony_ci	if (bio->bi_iter.bi_size == 0) {
68562306a36Sopenharmony_ci		bio_endio(bio);
68662306a36Sopenharmony_ci		return 0;
68762306a36Sopenharmony_ci	}
68862306a36Sopenharmony_ci	bio->bi_opf &= ~REQ_PREFLUSH;
68962306a36Sopenharmony_ci	return -EAGAIN;
69062306a36Sopenharmony_ci}
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_civoid ppl_stripe_write_finished(struct stripe_head *sh)
69362306a36Sopenharmony_ci{
69462306a36Sopenharmony_ci	struct ppl_io_unit *io;
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci	io = sh->ppl_io;
69762306a36Sopenharmony_ci	sh->ppl_io = NULL;
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	if (io && atomic_dec_and_test(&io->pending_stripes)) {
70062306a36Sopenharmony_ci		if (io->log->disk_flush_bitmap)
70162306a36Sopenharmony_ci			ppl_do_flush(io);
70262306a36Sopenharmony_ci		else
70362306a36Sopenharmony_ci			ppl_io_unit_finished(io);
70462306a36Sopenharmony_ci	}
70562306a36Sopenharmony_ci}
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_cistatic void ppl_xor(int size, struct page *page1, struct page *page2)
70862306a36Sopenharmony_ci{
70962306a36Sopenharmony_ci	struct async_submit_ctl submit;
71062306a36Sopenharmony_ci	struct dma_async_tx_descriptor *tx;
71162306a36Sopenharmony_ci	struct page *xor_srcs[] = { page1, page2 };
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_ACK|ASYNC_TX_XOR_DROP_DST,
71462306a36Sopenharmony_ci			  NULL, NULL, NULL, NULL);
71562306a36Sopenharmony_ci	tx = async_xor(page1, xor_srcs, 0, 2, size, &submit);
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	async_tx_quiesce(&tx);
71862306a36Sopenharmony_ci}
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci/*
72162306a36Sopenharmony_ci * PPL recovery strategy: xor partial parity and data from all modified data
72262306a36Sopenharmony_ci * disks within a stripe and write the result as the new stripe parity. If all
72362306a36Sopenharmony_ci * stripe data disks are modified (full stripe write), no partial parity is
72462306a36Sopenharmony_ci * available, so just xor the data disks.
72562306a36Sopenharmony_ci *
72662306a36Sopenharmony_ci * Recovery of a PPL entry shall occur only if all modified data disks are
72762306a36Sopenharmony_ci * available and read from all of them succeeds.
72862306a36Sopenharmony_ci *
72962306a36Sopenharmony_ci * A PPL entry applies to a stripe, partial parity size for an entry is at most
73062306a36Sopenharmony_ci * the size of the chunk. Examples of possible cases for a single entry:
73162306a36Sopenharmony_ci *
73262306a36Sopenharmony_ci * case 0: single data disk write:
73362306a36Sopenharmony_ci *   data0    data1    data2     ppl        parity
73462306a36Sopenharmony_ci * +--------+--------+--------+           +--------------------+
73562306a36Sopenharmony_ci * | ------ | ------ | ------ | +----+    | (no change)        |
73662306a36Sopenharmony_ci * | ------ | -data- | ------ | | pp | -> | data1 ^ pp         |
73762306a36Sopenharmony_ci * | ------ | -data- | ------ | | pp | -> | data1 ^ pp         |
73862306a36Sopenharmony_ci * | ------ | ------ | ------ | +----+    | (no change)        |
73962306a36Sopenharmony_ci * +--------+--------+--------+           +--------------------+
74062306a36Sopenharmony_ci * pp_size = data_size
74162306a36Sopenharmony_ci *
74262306a36Sopenharmony_ci * case 1: more than one data disk write:
74362306a36Sopenharmony_ci *   data0    data1    data2     ppl        parity
74462306a36Sopenharmony_ci * +--------+--------+--------+           +--------------------+
74562306a36Sopenharmony_ci * | ------ | ------ | ------ | +----+    | (no change)        |
74662306a36Sopenharmony_ci * | -data- | -data- | ------ | | pp | -> | data0 ^ data1 ^ pp |
74762306a36Sopenharmony_ci * | -data- | -data- | ------ | | pp | -> | data0 ^ data1 ^ pp |
74862306a36Sopenharmony_ci * | ------ | ------ | ------ | +----+    | (no change)        |
74962306a36Sopenharmony_ci * +--------+--------+--------+           +--------------------+
75062306a36Sopenharmony_ci * pp_size = data_size / modified_data_disks
75162306a36Sopenharmony_ci *
75262306a36Sopenharmony_ci * case 2: write to all data disks (also full stripe write):
75362306a36Sopenharmony_ci *   data0    data1    data2                parity
75462306a36Sopenharmony_ci * +--------+--------+--------+           +--------------------+
75562306a36Sopenharmony_ci * | ------ | ------ | ------ |           | (no change)        |
75662306a36Sopenharmony_ci * | -data- | -data- | -data- | --------> | xor all data       |
75762306a36Sopenharmony_ci * | ------ | ------ | ------ | --------> | (no change)        |
75862306a36Sopenharmony_ci * | ------ | ------ | ------ |           | (no change)        |
75962306a36Sopenharmony_ci * +--------+--------+--------+           +--------------------+
76062306a36Sopenharmony_ci * pp_size = 0
76162306a36Sopenharmony_ci *
76262306a36Sopenharmony_ci * The following cases are possible only in other implementations. The recovery
76362306a36Sopenharmony_ci * code can handle them, but they are not generated at runtime because they can
76462306a36Sopenharmony_ci * be reduced to cases 0, 1 and 2:
76562306a36Sopenharmony_ci *
76662306a36Sopenharmony_ci * case 3:
76762306a36Sopenharmony_ci *   data0    data1    data2     ppl        parity
76862306a36Sopenharmony_ci * +--------+--------+--------+ +----+    +--------------------+
76962306a36Sopenharmony_ci * | ------ | -data- | -data- | | pp |    | data1 ^ data2 ^ pp |
77062306a36Sopenharmony_ci * | ------ | -data- | -data- | | pp | -> | data1 ^ data2 ^ pp |
77162306a36Sopenharmony_ci * | -data- | -data- | -data- | | -- | -> | xor all data       |
77262306a36Sopenharmony_ci * | -data- | -data- | ------ | | pp |    | data0 ^ data1 ^ pp |
77362306a36Sopenharmony_ci * +--------+--------+--------+ +----+    +--------------------+
77462306a36Sopenharmony_ci * pp_size = chunk_size
77562306a36Sopenharmony_ci *
77662306a36Sopenharmony_ci * case 4:
77762306a36Sopenharmony_ci *   data0    data1    data2     ppl        parity
77862306a36Sopenharmony_ci * +--------+--------+--------+ +----+    +--------------------+
77962306a36Sopenharmony_ci * | ------ | -data- | ------ | | pp |    | data1 ^ pp         |
78062306a36Sopenharmony_ci * | ------ | ------ | ------ | | -- | -> | (no change)        |
78162306a36Sopenharmony_ci * | ------ | ------ | ------ | | -- | -> | (no change)        |
78262306a36Sopenharmony_ci * | -data- | ------ | ------ | | pp |    | data0 ^ pp         |
78362306a36Sopenharmony_ci * +--------+--------+--------+ +----+    +--------------------+
78462306a36Sopenharmony_ci * pp_size = chunk_size
78562306a36Sopenharmony_ci */
78662306a36Sopenharmony_cistatic int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
78762306a36Sopenharmony_ci			     sector_t ppl_sector)
78862306a36Sopenharmony_ci{
78962306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
79062306a36Sopenharmony_ci	struct mddev *mddev = ppl_conf->mddev;
79162306a36Sopenharmony_ci	struct r5conf *conf = mddev->private;
79262306a36Sopenharmony_ci	int block_size = ppl_conf->block_size;
79362306a36Sopenharmony_ci	struct page *page1;
79462306a36Sopenharmony_ci	struct page *page2;
79562306a36Sopenharmony_ci	sector_t r_sector_first;
79662306a36Sopenharmony_ci	sector_t r_sector_last;
79762306a36Sopenharmony_ci	int strip_sectors;
79862306a36Sopenharmony_ci	int data_disks;
79962306a36Sopenharmony_ci	int i;
80062306a36Sopenharmony_ci	int ret = 0;
80162306a36Sopenharmony_ci	unsigned int pp_size = le32_to_cpu(e->pp_size);
80262306a36Sopenharmony_ci	unsigned int data_size = le32_to_cpu(e->data_size);
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	page1 = alloc_page(GFP_KERNEL);
80562306a36Sopenharmony_ci	page2 = alloc_page(GFP_KERNEL);
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	if (!page1 || !page2) {
80862306a36Sopenharmony_ci		ret = -ENOMEM;
80962306a36Sopenharmony_ci		goto out;
81062306a36Sopenharmony_ci	}
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci	r_sector_first = le64_to_cpu(e->data_sector) * (block_size >> 9);
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci	if ((pp_size >> 9) < conf->chunk_sectors) {
81562306a36Sopenharmony_ci		if (pp_size > 0) {
81662306a36Sopenharmony_ci			data_disks = data_size / pp_size;
81762306a36Sopenharmony_ci			strip_sectors = pp_size >> 9;
81862306a36Sopenharmony_ci		} else {
81962306a36Sopenharmony_ci			data_disks = conf->raid_disks - conf->max_degraded;
82062306a36Sopenharmony_ci			strip_sectors = (data_size >> 9) / data_disks;
82162306a36Sopenharmony_ci		}
82262306a36Sopenharmony_ci		r_sector_last = r_sector_first +
82362306a36Sopenharmony_ci				(data_disks - 1) * conf->chunk_sectors +
82462306a36Sopenharmony_ci				strip_sectors;
82562306a36Sopenharmony_ci	} else {
82662306a36Sopenharmony_ci		data_disks = conf->raid_disks - conf->max_degraded;
82762306a36Sopenharmony_ci		strip_sectors = conf->chunk_sectors;
82862306a36Sopenharmony_ci		r_sector_last = r_sector_first + (data_size >> 9);
82962306a36Sopenharmony_ci	}
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	pr_debug("%s: array sector first: %llu last: %llu\n", __func__,
83262306a36Sopenharmony_ci		 (unsigned long long)r_sector_first,
83362306a36Sopenharmony_ci		 (unsigned long long)r_sector_last);
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	/* if start and end is 4k aligned, use a 4k block */
83662306a36Sopenharmony_ci	if (block_size == 512 &&
83762306a36Sopenharmony_ci	    (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 &&
83862306a36Sopenharmony_ci	    (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0)
83962306a36Sopenharmony_ci		block_size = RAID5_STRIPE_SIZE(conf);
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	/* iterate through blocks in strip */
84262306a36Sopenharmony_ci	for (i = 0; i < strip_sectors; i += (block_size >> 9)) {
84362306a36Sopenharmony_ci		bool update_parity = false;
84462306a36Sopenharmony_ci		sector_t parity_sector;
84562306a36Sopenharmony_ci		struct md_rdev *parity_rdev;
84662306a36Sopenharmony_ci		struct stripe_head sh;
84762306a36Sopenharmony_ci		int disk;
84862306a36Sopenharmony_ci		int indent = 0;
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_ci		pr_debug("%s:%*s iter %d start\n", __func__, indent, "", i);
85162306a36Sopenharmony_ci		indent += 2;
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci		memset(page_address(page1), 0, PAGE_SIZE);
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci		/* iterate through data member disks */
85662306a36Sopenharmony_ci		for (disk = 0; disk < data_disks; disk++) {
85762306a36Sopenharmony_ci			int dd_idx;
85862306a36Sopenharmony_ci			struct md_rdev *rdev;
85962306a36Sopenharmony_ci			sector_t sector;
86062306a36Sopenharmony_ci			sector_t r_sector = r_sector_first + i +
86162306a36Sopenharmony_ci					    (disk * conf->chunk_sectors);
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci			pr_debug("%s:%*s data member disk %d start\n",
86462306a36Sopenharmony_ci				 __func__, indent, "", disk);
86562306a36Sopenharmony_ci			indent += 2;
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci			if (r_sector >= r_sector_last) {
86862306a36Sopenharmony_ci				pr_debug("%s:%*s array sector %llu doesn't need parity update\n",
86962306a36Sopenharmony_ci					 __func__, indent, "",
87062306a36Sopenharmony_ci					 (unsigned long long)r_sector);
87162306a36Sopenharmony_ci				indent -= 2;
87262306a36Sopenharmony_ci				continue;
87362306a36Sopenharmony_ci			}
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci			update_parity = true;
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci			/* map raid sector to member disk */
87862306a36Sopenharmony_ci			sector = raid5_compute_sector(conf, r_sector, 0,
87962306a36Sopenharmony_ci						      &dd_idx, NULL);
88062306a36Sopenharmony_ci			pr_debug("%s:%*s processing array sector %llu => data member disk %d, sector %llu\n",
88162306a36Sopenharmony_ci				 __func__, indent, "",
88262306a36Sopenharmony_ci				 (unsigned long long)r_sector, dd_idx,
88362306a36Sopenharmony_ci				 (unsigned long long)sector);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci			/* Array has not started so rcu dereference is safe */
88662306a36Sopenharmony_ci			rdev = rcu_dereference_protected(
88762306a36Sopenharmony_ci					conf->disks[dd_idx].rdev, 1);
88862306a36Sopenharmony_ci			if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
88962306a36Sopenharmony_ci				      sector >= rdev->recovery_offset)) {
89062306a36Sopenharmony_ci				pr_debug("%s:%*s data member disk %d missing\n",
89162306a36Sopenharmony_ci					 __func__, indent, "", dd_idx);
89262306a36Sopenharmony_ci				update_parity = false;
89362306a36Sopenharmony_ci				break;
89462306a36Sopenharmony_ci			}
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci			pr_debug("%s:%*s reading data member disk %pg sector %llu\n",
89762306a36Sopenharmony_ci				 __func__, indent, "", rdev->bdev,
89862306a36Sopenharmony_ci				 (unsigned long long)sector);
89962306a36Sopenharmony_ci			if (!sync_page_io(rdev, sector, block_size, page2,
90062306a36Sopenharmony_ci					REQ_OP_READ, false)) {
90162306a36Sopenharmony_ci				md_error(mddev, rdev);
90262306a36Sopenharmony_ci				pr_debug("%s:%*s read failed!\n", __func__,
90362306a36Sopenharmony_ci					 indent, "");
90462306a36Sopenharmony_ci				ret = -EIO;
90562306a36Sopenharmony_ci				goto out;
90662306a36Sopenharmony_ci			}
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci			ppl_xor(block_size, page1, page2);
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ci			indent -= 2;
91162306a36Sopenharmony_ci		}
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci		if (!update_parity)
91462306a36Sopenharmony_ci			continue;
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci		if (pp_size > 0) {
91762306a36Sopenharmony_ci			pr_debug("%s:%*s reading pp disk sector %llu\n",
91862306a36Sopenharmony_ci				 __func__, indent, "",
91962306a36Sopenharmony_ci				 (unsigned long long)(ppl_sector + i));
92062306a36Sopenharmony_ci			if (!sync_page_io(log->rdev,
92162306a36Sopenharmony_ci					ppl_sector - log->rdev->data_offset + i,
92262306a36Sopenharmony_ci					block_size, page2, REQ_OP_READ,
92362306a36Sopenharmony_ci					false)) {
92462306a36Sopenharmony_ci				pr_debug("%s:%*s read failed!\n", __func__,
92562306a36Sopenharmony_ci					 indent, "");
92662306a36Sopenharmony_ci				md_error(mddev, log->rdev);
92762306a36Sopenharmony_ci				ret = -EIO;
92862306a36Sopenharmony_ci				goto out;
92962306a36Sopenharmony_ci			}
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci			ppl_xor(block_size, page1, page2);
93262306a36Sopenharmony_ci		}
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci		/* map raid sector to parity disk */
93562306a36Sopenharmony_ci		parity_sector = raid5_compute_sector(conf, r_sector_first + i,
93662306a36Sopenharmony_ci				0, &disk, &sh);
93762306a36Sopenharmony_ci		BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci		/* Array has not started so rcu dereference is safe */
94062306a36Sopenharmony_ci		parity_rdev = rcu_dereference_protected(
94162306a36Sopenharmony_ci					conf->disks[sh.pd_idx].rdev, 1);
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci		BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
94462306a36Sopenharmony_ci		pr_debug("%s:%*s write parity at sector %llu, disk %pg\n",
94562306a36Sopenharmony_ci			 __func__, indent, "",
94662306a36Sopenharmony_ci			 (unsigned long long)parity_sector,
94762306a36Sopenharmony_ci			 parity_rdev->bdev);
94862306a36Sopenharmony_ci		if (!sync_page_io(parity_rdev, parity_sector, block_size,
94962306a36Sopenharmony_ci				  page1, REQ_OP_WRITE, false)) {
95062306a36Sopenharmony_ci			pr_debug("%s:%*s parity write error!\n", __func__,
95162306a36Sopenharmony_ci				 indent, "");
95262306a36Sopenharmony_ci			md_error(mddev, parity_rdev);
95362306a36Sopenharmony_ci			ret = -EIO;
95462306a36Sopenharmony_ci			goto out;
95562306a36Sopenharmony_ci		}
95662306a36Sopenharmony_ci	}
95762306a36Sopenharmony_ciout:
95862306a36Sopenharmony_ci	if (page1)
95962306a36Sopenharmony_ci		__free_page(page1);
96062306a36Sopenharmony_ci	if (page2)
96162306a36Sopenharmony_ci		__free_page(page2);
96262306a36Sopenharmony_ci	return ret;
96362306a36Sopenharmony_ci}
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_cistatic int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
96662306a36Sopenharmony_ci		       sector_t offset)
96762306a36Sopenharmony_ci{
96862306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
96962306a36Sopenharmony_ci	struct md_rdev *rdev = log->rdev;
97062306a36Sopenharmony_ci	struct mddev *mddev = rdev->mddev;
97162306a36Sopenharmony_ci	sector_t ppl_sector = rdev->ppl.sector + offset +
97262306a36Sopenharmony_ci			      (PPL_HEADER_SIZE >> 9);
97362306a36Sopenharmony_ci	struct page *page;
97462306a36Sopenharmony_ci	int i;
97562306a36Sopenharmony_ci	int ret = 0;
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci	page = alloc_page(GFP_KERNEL);
97862306a36Sopenharmony_ci	if (!page)
97962306a36Sopenharmony_ci		return -ENOMEM;
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	/* iterate through all PPL entries saved */
98262306a36Sopenharmony_ci	for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) {
98362306a36Sopenharmony_ci		struct ppl_header_entry *e = &pplhdr->entries[i];
98462306a36Sopenharmony_ci		u32 pp_size = le32_to_cpu(e->pp_size);
98562306a36Sopenharmony_ci		sector_t sector = ppl_sector;
98662306a36Sopenharmony_ci		int ppl_entry_sectors = pp_size >> 9;
98762306a36Sopenharmony_ci		u32 crc, crc_stored;
98862306a36Sopenharmony_ci
98962306a36Sopenharmony_ci		pr_debug("%s: disk: %d entry: %d ppl_sector: %llu pp_size: %u\n",
99062306a36Sopenharmony_ci			 __func__, rdev->raid_disk, i,
99162306a36Sopenharmony_ci			 (unsigned long long)ppl_sector, pp_size);
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci		crc = ~0;
99462306a36Sopenharmony_ci		crc_stored = le32_to_cpu(e->checksum);
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci		/* read parial parity for this entry and calculate its checksum */
99762306a36Sopenharmony_ci		while (pp_size) {
99862306a36Sopenharmony_ci			int s = pp_size > PAGE_SIZE ? PAGE_SIZE : pp_size;
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci			if (!sync_page_io(rdev, sector - rdev->data_offset,
100162306a36Sopenharmony_ci					s, page, REQ_OP_READ, false)) {
100262306a36Sopenharmony_ci				md_error(mddev, rdev);
100362306a36Sopenharmony_ci				ret = -EIO;
100462306a36Sopenharmony_ci				goto out;
100562306a36Sopenharmony_ci			}
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci			crc = crc32c_le(crc, page_address(page), s);
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci			pp_size -= s;
101062306a36Sopenharmony_ci			sector += s >> 9;
101162306a36Sopenharmony_ci		}
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci		crc = ~crc;
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci		if (crc != crc_stored) {
101662306a36Sopenharmony_ci			/*
101762306a36Sopenharmony_ci			 * Don't recover this entry if the checksum does not
101862306a36Sopenharmony_ci			 * match, but keep going and try to recover other
101962306a36Sopenharmony_ci			 * entries.
102062306a36Sopenharmony_ci			 */
102162306a36Sopenharmony_ci			pr_debug("%s: ppl entry crc does not match: stored: 0x%x calculated: 0x%x\n",
102262306a36Sopenharmony_ci				 __func__, crc_stored, crc);
102362306a36Sopenharmony_ci			ppl_conf->mismatch_count++;
102462306a36Sopenharmony_ci		} else {
102562306a36Sopenharmony_ci			ret = ppl_recover_entry(log, e, ppl_sector);
102662306a36Sopenharmony_ci			if (ret)
102762306a36Sopenharmony_ci				goto out;
102862306a36Sopenharmony_ci			ppl_conf->recovered_entries++;
102962306a36Sopenharmony_ci		}
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci		ppl_sector += ppl_entry_sectors;
103262306a36Sopenharmony_ci	}
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci	/* flush the disk cache after recovery if necessary */
103562306a36Sopenharmony_ci	ret = blkdev_issue_flush(rdev->bdev);
103662306a36Sopenharmony_ciout:
103762306a36Sopenharmony_ci	__free_page(page);
103862306a36Sopenharmony_ci	return ret;
103962306a36Sopenharmony_ci}
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_cistatic int ppl_write_empty_header(struct ppl_log *log)
104262306a36Sopenharmony_ci{
104362306a36Sopenharmony_ci	struct page *page;
104462306a36Sopenharmony_ci	struct ppl_header *pplhdr;
104562306a36Sopenharmony_ci	struct md_rdev *rdev = log->rdev;
104662306a36Sopenharmony_ci	int ret = 0;
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci	pr_debug("%s: disk: %d ppl_sector: %llu\n", __func__,
104962306a36Sopenharmony_ci		 rdev->raid_disk, (unsigned long long)rdev->ppl.sector);
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	page = alloc_page(GFP_NOIO | __GFP_ZERO);
105262306a36Sopenharmony_ci	if (!page)
105362306a36Sopenharmony_ci		return -ENOMEM;
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci	pplhdr = page_address(page);
105662306a36Sopenharmony_ci	/* zero out PPL space to avoid collision with old PPLs */
105762306a36Sopenharmony_ci	blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector,
105862306a36Sopenharmony_ci			    log->rdev->ppl.size, GFP_NOIO, 0);
105962306a36Sopenharmony_ci	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
106062306a36Sopenharmony_ci	pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
106162306a36Sopenharmony_ci	pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci	if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
106462306a36Sopenharmony_ci			  PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
106562306a36Sopenharmony_ci			  REQ_FUA, false)) {
106662306a36Sopenharmony_ci		md_error(rdev->mddev, rdev);
106762306a36Sopenharmony_ci		ret = -EIO;
106862306a36Sopenharmony_ci	}
106962306a36Sopenharmony_ci
107062306a36Sopenharmony_ci	__free_page(page);
107162306a36Sopenharmony_ci	return ret;
107262306a36Sopenharmony_ci}
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_cistatic int ppl_load_distributed(struct ppl_log *log)
107562306a36Sopenharmony_ci{
107662306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
107762306a36Sopenharmony_ci	struct md_rdev *rdev = log->rdev;
107862306a36Sopenharmony_ci	struct mddev *mddev = rdev->mddev;
107962306a36Sopenharmony_ci	struct page *page, *page2;
108062306a36Sopenharmony_ci	struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL;
108162306a36Sopenharmony_ci	u32 crc, crc_stored;
108262306a36Sopenharmony_ci	u32 signature;
108362306a36Sopenharmony_ci	int ret = 0, i;
108462306a36Sopenharmony_ci	sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0;
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk);
108762306a36Sopenharmony_ci	/* read PPL headers, find the recent one */
108862306a36Sopenharmony_ci	page = alloc_page(GFP_KERNEL);
108962306a36Sopenharmony_ci	if (!page)
109062306a36Sopenharmony_ci		return -ENOMEM;
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci	page2 = alloc_page(GFP_KERNEL);
109362306a36Sopenharmony_ci	if (!page2) {
109462306a36Sopenharmony_ci		__free_page(page);
109562306a36Sopenharmony_ci		return -ENOMEM;
109662306a36Sopenharmony_ci	}
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	/* searching ppl area for latest ppl */
109962306a36Sopenharmony_ci	while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) {
110062306a36Sopenharmony_ci		if (!sync_page_io(rdev,
110162306a36Sopenharmony_ci				  rdev->ppl.sector - rdev->data_offset +
110262306a36Sopenharmony_ci				  pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ,
110362306a36Sopenharmony_ci				  false)) {
110462306a36Sopenharmony_ci			md_error(mddev, rdev);
110562306a36Sopenharmony_ci			ret = -EIO;
110662306a36Sopenharmony_ci			/* if not able to read - don't recover any PPL */
110762306a36Sopenharmony_ci			pplhdr = NULL;
110862306a36Sopenharmony_ci			break;
110962306a36Sopenharmony_ci		}
111062306a36Sopenharmony_ci		pplhdr = page_address(page);
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci		/* check header validity */
111362306a36Sopenharmony_ci		crc_stored = le32_to_cpu(pplhdr->checksum);
111462306a36Sopenharmony_ci		pplhdr->checksum = 0;
111562306a36Sopenharmony_ci		crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci		if (crc_stored != crc) {
111862306a36Sopenharmony_ci			pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
111962306a36Sopenharmony_ci				 __func__, crc_stored, crc,
112062306a36Sopenharmony_ci				 (unsigned long long)pplhdr_offset);
112162306a36Sopenharmony_ci			pplhdr = prev_pplhdr;
112262306a36Sopenharmony_ci			pplhdr_offset = prev_pplhdr_offset;
112362306a36Sopenharmony_ci			break;
112462306a36Sopenharmony_ci		}
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci		signature = le32_to_cpu(pplhdr->signature);
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci		if (mddev->external) {
112962306a36Sopenharmony_ci			/*
113062306a36Sopenharmony_ci			 * For external metadata the header signature is set and
113162306a36Sopenharmony_ci			 * validated in userspace.
113262306a36Sopenharmony_ci			 */
113362306a36Sopenharmony_ci			ppl_conf->signature = signature;
113462306a36Sopenharmony_ci		} else if (ppl_conf->signature != signature) {
113562306a36Sopenharmony_ci			pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n",
113662306a36Sopenharmony_ci				 __func__, signature, ppl_conf->signature,
113762306a36Sopenharmony_ci				 (unsigned long long)pplhdr_offset);
113862306a36Sopenharmony_ci			pplhdr = prev_pplhdr;
113962306a36Sopenharmony_ci			pplhdr_offset = prev_pplhdr_offset;
114062306a36Sopenharmony_ci			break;
114162306a36Sopenharmony_ci		}
114262306a36Sopenharmony_ci
114362306a36Sopenharmony_ci		if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) >
114462306a36Sopenharmony_ci		    le64_to_cpu(pplhdr->generation)) {
114562306a36Sopenharmony_ci			/* previous was newest */
114662306a36Sopenharmony_ci			pplhdr = prev_pplhdr;
114762306a36Sopenharmony_ci			pplhdr_offset = prev_pplhdr_offset;
114862306a36Sopenharmony_ci			break;
114962306a36Sopenharmony_ci		}
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci		prev_pplhdr_offset = pplhdr_offset;
115262306a36Sopenharmony_ci		prev_pplhdr = pplhdr;
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_ci		swap(page, page2);
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci		/* calculate next potential ppl offset */
115762306a36Sopenharmony_ci		for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++)
115862306a36Sopenharmony_ci			pplhdr_offset +=
115962306a36Sopenharmony_ci			    le32_to_cpu(pplhdr->entries[i].pp_size) >> 9;
116062306a36Sopenharmony_ci		pplhdr_offset += PPL_HEADER_SIZE >> 9;
116162306a36Sopenharmony_ci	}
116262306a36Sopenharmony_ci
116362306a36Sopenharmony_ci	/* no valid ppl found */
116462306a36Sopenharmony_ci	if (!pplhdr)
116562306a36Sopenharmony_ci		ppl_conf->mismatch_count++;
116662306a36Sopenharmony_ci	else
116762306a36Sopenharmony_ci		pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n",
116862306a36Sopenharmony_ci		    __func__, (unsigned long long)pplhdr_offset,
116962306a36Sopenharmony_ci		    le64_to_cpu(pplhdr->generation));
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci	/* attempt to recover from log if we are starting a dirty array */
117262306a36Sopenharmony_ci	if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector)
117362306a36Sopenharmony_ci		ret = ppl_recover(log, pplhdr, pplhdr_offset);
117462306a36Sopenharmony_ci
117562306a36Sopenharmony_ci	/* write empty header if we are starting the array */
117662306a36Sopenharmony_ci	if (!ret && !mddev->pers)
117762306a36Sopenharmony_ci		ret = ppl_write_empty_header(log);
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci	__free_page(page);
118062306a36Sopenharmony_ci	__free_page(page2);
118162306a36Sopenharmony_ci
118262306a36Sopenharmony_ci	pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
118362306a36Sopenharmony_ci		 __func__, ret, ppl_conf->mismatch_count,
118462306a36Sopenharmony_ci		 ppl_conf->recovered_entries);
118562306a36Sopenharmony_ci	return ret;
118662306a36Sopenharmony_ci}
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_cistatic int ppl_load(struct ppl_conf *ppl_conf)
118962306a36Sopenharmony_ci{
119062306a36Sopenharmony_ci	int ret = 0;
119162306a36Sopenharmony_ci	u32 signature = 0;
119262306a36Sopenharmony_ci	bool signature_set = false;
119362306a36Sopenharmony_ci	int i;
119462306a36Sopenharmony_ci
119562306a36Sopenharmony_ci	for (i = 0; i < ppl_conf->count; i++) {
119662306a36Sopenharmony_ci		struct ppl_log *log = &ppl_conf->child_logs[i];
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ci		/* skip missing drive */
119962306a36Sopenharmony_ci		if (!log->rdev)
120062306a36Sopenharmony_ci			continue;
120162306a36Sopenharmony_ci
120262306a36Sopenharmony_ci		ret = ppl_load_distributed(log);
120362306a36Sopenharmony_ci		if (ret)
120462306a36Sopenharmony_ci			break;
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_ci		/*
120762306a36Sopenharmony_ci		 * For external metadata we can't check if the signature is
120862306a36Sopenharmony_ci		 * correct on a single drive, but we can check if it is the same
120962306a36Sopenharmony_ci		 * on all drives.
121062306a36Sopenharmony_ci		 */
121162306a36Sopenharmony_ci		if (ppl_conf->mddev->external) {
121262306a36Sopenharmony_ci			if (!signature_set) {
121362306a36Sopenharmony_ci				signature = ppl_conf->signature;
121462306a36Sopenharmony_ci				signature_set = true;
121562306a36Sopenharmony_ci			} else if (signature != ppl_conf->signature) {
121662306a36Sopenharmony_ci				pr_warn("md/raid:%s: PPL header signature does not match on all member drives\n",
121762306a36Sopenharmony_ci					mdname(ppl_conf->mddev));
121862306a36Sopenharmony_ci				ret = -EINVAL;
121962306a36Sopenharmony_ci				break;
122062306a36Sopenharmony_ci			}
122162306a36Sopenharmony_ci		}
122262306a36Sopenharmony_ci	}
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci	pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
122562306a36Sopenharmony_ci		 __func__, ret, ppl_conf->mismatch_count,
122662306a36Sopenharmony_ci		 ppl_conf->recovered_entries);
122762306a36Sopenharmony_ci	return ret;
122862306a36Sopenharmony_ci}
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_cistatic void __ppl_exit_log(struct ppl_conf *ppl_conf)
123162306a36Sopenharmony_ci{
123262306a36Sopenharmony_ci	clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
123362306a36Sopenharmony_ci	clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags);
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_ci	kfree(ppl_conf->child_logs);
123662306a36Sopenharmony_ci
123762306a36Sopenharmony_ci	bioset_exit(&ppl_conf->bs);
123862306a36Sopenharmony_ci	bioset_exit(&ppl_conf->flush_bs);
123962306a36Sopenharmony_ci	mempool_exit(&ppl_conf->io_pool);
124062306a36Sopenharmony_ci	kmem_cache_destroy(ppl_conf->io_kc);
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci	kfree(ppl_conf);
124362306a36Sopenharmony_ci}
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_civoid ppl_exit_log(struct r5conf *conf)
124662306a36Sopenharmony_ci{
124762306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_ci	if (ppl_conf) {
125062306a36Sopenharmony_ci		__ppl_exit_log(ppl_conf);
125162306a36Sopenharmony_ci		conf->log_private = NULL;
125262306a36Sopenharmony_ci	}
125362306a36Sopenharmony_ci}
125462306a36Sopenharmony_ci
125562306a36Sopenharmony_cistatic int ppl_validate_rdev(struct md_rdev *rdev)
125662306a36Sopenharmony_ci{
125762306a36Sopenharmony_ci	int ppl_data_sectors;
125862306a36Sopenharmony_ci	int ppl_size_new;
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	/*
126162306a36Sopenharmony_ci	 * The configured PPL size must be enough to store
126262306a36Sopenharmony_ci	 * the header and (at the very least) partial parity
126362306a36Sopenharmony_ci	 * for one stripe. Round it down to ensure the data
126462306a36Sopenharmony_ci	 * space is cleanly divisible by stripe size.
126562306a36Sopenharmony_ci	 */
126662306a36Sopenharmony_ci	ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9);
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_ci	if (ppl_data_sectors > 0)
126962306a36Sopenharmony_ci		ppl_data_sectors = rounddown(ppl_data_sectors,
127062306a36Sopenharmony_ci				RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private));
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci	if (ppl_data_sectors <= 0) {
127362306a36Sopenharmony_ci		pr_warn("md/raid:%s: PPL space too small on %pg\n",
127462306a36Sopenharmony_ci			mdname(rdev->mddev), rdev->bdev);
127562306a36Sopenharmony_ci		return -ENOSPC;
127662306a36Sopenharmony_ci	}
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci	ppl_size_new = ppl_data_sectors + (PPL_HEADER_SIZE >> 9);
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_ci	if ((rdev->ppl.sector < rdev->data_offset &&
128162306a36Sopenharmony_ci	     rdev->ppl.sector + ppl_size_new > rdev->data_offset) ||
128262306a36Sopenharmony_ci	    (rdev->ppl.sector >= rdev->data_offset &&
128362306a36Sopenharmony_ci	     rdev->data_offset + rdev->sectors > rdev->ppl.sector)) {
128462306a36Sopenharmony_ci		pr_warn("md/raid:%s: PPL space overlaps with data on %pg\n",
128562306a36Sopenharmony_ci			mdname(rdev->mddev), rdev->bdev);
128662306a36Sopenharmony_ci		return -EINVAL;
128762306a36Sopenharmony_ci	}
128862306a36Sopenharmony_ci
128962306a36Sopenharmony_ci	if (!rdev->mddev->external &&
129062306a36Sopenharmony_ci	    ((rdev->ppl.offset > 0 && rdev->ppl.offset < (rdev->sb_size >> 9)) ||
129162306a36Sopenharmony_ci	     (rdev->ppl.offset <= 0 && rdev->ppl.offset + ppl_size_new > 0))) {
129262306a36Sopenharmony_ci		pr_warn("md/raid:%s: PPL space overlaps with superblock on %pg\n",
129362306a36Sopenharmony_ci			mdname(rdev->mddev), rdev->bdev);
129462306a36Sopenharmony_ci		return -EINVAL;
129562306a36Sopenharmony_ci	}
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci	rdev->ppl.size = ppl_size_new;
129862306a36Sopenharmony_ci
129962306a36Sopenharmony_ci	return 0;
130062306a36Sopenharmony_ci}
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_cistatic void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
130362306a36Sopenharmony_ci{
130462306a36Sopenharmony_ci	if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
130562306a36Sopenharmony_ci				      PPL_HEADER_SIZE) * 2) {
130662306a36Sopenharmony_ci		log->use_multippl = true;
130762306a36Sopenharmony_ci		set_bit(MD_HAS_MULTIPLE_PPLS,
130862306a36Sopenharmony_ci			&log->ppl_conf->mddev->flags);
130962306a36Sopenharmony_ci		log->entry_space = PPL_SPACE_SIZE;
131062306a36Sopenharmony_ci	} else {
131162306a36Sopenharmony_ci		log->use_multippl = false;
131262306a36Sopenharmony_ci		log->entry_space = (log->rdev->ppl.size << 9) -
131362306a36Sopenharmony_ci				   PPL_HEADER_SIZE;
131462306a36Sopenharmony_ci	}
131562306a36Sopenharmony_ci	log->next_io_sector = rdev->ppl.sector;
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci	if (bdev_write_cache(rdev->bdev))
131862306a36Sopenharmony_ci		log->wb_cache_on = true;
131962306a36Sopenharmony_ci}
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ciint ppl_init_log(struct r5conf *conf)
132262306a36Sopenharmony_ci{
132362306a36Sopenharmony_ci	struct ppl_conf *ppl_conf;
132462306a36Sopenharmony_ci	struct mddev *mddev = conf->mddev;
132562306a36Sopenharmony_ci	int ret = 0;
132662306a36Sopenharmony_ci	int max_disks;
132762306a36Sopenharmony_ci	int i;
132862306a36Sopenharmony_ci
132962306a36Sopenharmony_ci	pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n",
133062306a36Sopenharmony_ci		 mdname(conf->mddev));
133162306a36Sopenharmony_ci
133262306a36Sopenharmony_ci	if (PAGE_SIZE != 4096)
133362306a36Sopenharmony_ci		return -EINVAL;
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci	if (mddev->level != 5) {
133662306a36Sopenharmony_ci		pr_warn("md/raid:%s PPL is not compatible with raid level %d\n",
133762306a36Sopenharmony_ci			mdname(mddev), mddev->level);
133862306a36Sopenharmony_ci		return -EINVAL;
133962306a36Sopenharmony_ci	}
134062306a36Sopenharmony_ci
134162306a36Sopenharmony_ci	if (mddev->bitmap_info.file || mddev->bitmap_info.offset) {
134262306a36Sopenharmony_ci		pr_warn("md/raid:%s PPL is not compatible with bitmap\n",
134362306a36Sopenharmony_ci			mdname(mddev));
134462306a36Sopenharmony_ci		return -EINVAL;
134562306a36Sopenharmony_ci	}
134662306a36Sopenharmony_ci
134762306a36Sopenharmony_ci	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
134862306a36Sopenharmony_ci		pr_warn("md/raid:%s PPL is not compatible with journal\n",
134962306a36Sopenharmony_ci			mdname(mddev));
135062306a36Sopenharmony_ci		return -EINVAL;
135162306a36Sopenharmony_ci	}
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_ci	max_disks = sizeof_field(struct ppl_log, disk_flush_bitmap) *
135462306a36Sopenharmony_ci		BITS_PER_BYTE;
135562306a36Sopenharmony_ci	if (conf->raid_disks > max_disks) {
135662306a36Sopenharmony_ci		pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n",
135762306a36Sopenharmony_ci			mdname(mddev), max_disks);
135862306a36Sopenharmony_ci		return -EINVAL;
135962306a36Sopenharmony_ci	}
136062306a36Sopenharmony_ci
136162306a36Sopenharmony_ci	ppl_conf = kzalloc(sizeof(struct ppl_conf), GFP_KERNEL);
136262306a36Sopenharmony_ci	if (!ppl_conf)
136362306a36Sopenharmony_ci		return -ENOMEM;
136462306a36Sopenharmony_ci
136562306a36Sopenharmony_ci	ppl_conf->mddev = mddev;
136662306a36Sopenharmony_ci
136762306a36Sopenharmony_ci	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
136862306a36Sopenharmony_ci	if (!ppl_conf->io_kc) {
136962306a36Sopenharmony_ci		ret = -ENOMEM;
137062306a36Sopenharmony_ci		goto err;
137162306a36Sopenharmony_ci	}
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_ci	ret = mempool_init(&ppl_conf->io_pool, conf->raid_disks, ppl_io_pool_alloc,
137462306a36Sopenharmony_ci			   ppl_io_pool_free, ppl_conf->io_kc);
137562306a36Sopenharmony_ci	if (ret)
137662306a36Sopenharmony_ci		goto err;
137762306a36Sopenharmony_ci
137862306a36Sopenharmony_ci	ret = bioset_init(&ppl_conf->bs, conf->raid_disks, 0, BIOSET_NEED_BVECS);
137962306a36Sopenharmony_ci	if (ret)
138062306a36Sopenharmony_ci		goto err;
138162306a36Sopenharmony_ci
138262306a36Sopenharmony_ci	ret = bioset_init(&ppl_conf->flush_bs, conf->raid_disks, 0, 0);
138362306a36Sopenharmony_ci	if (ret)
138462306a36Sopenharmony_ci		goto err;
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_ci	ppl_conf->count = conf->raid_disks;
138762306a36Sopenharmony_ci	ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log),
138862306a36Sopenharmony_ci				       GFP_KERNEL);
138962306a36Sopenharmony_ci	if (!ppl_conf->child_logs) {
139062306a36Sopenharmony_ci		ret = -ENOMEM;
139162306a36Sopenharmony_ci		goto err;
139262306a36Sopenharmony_ci	}
139362306a36Sopenharmony_ci
139462306a36Sopenharmony_ci	atomic64_set(&ppl_conf->seq, 0);
139562306a36Sopenharmony_ci	INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
139662306a36Sopenharmony_ci	spin_lock_init(&ppl_conf->no_mem_stripes_lock);
139762306a36Sopenharmony_ci
139862306a36Sopenharmony_ci	if (!mddev->external) {
139962306a36Sopenharmony_ci		ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
140062306a36Sopenharmony_ci		ppl_conf->block_size = 512;
140162306a36Sopenharmony_ci	} else {
140262306a36Sopenharmony_ci		ppl_conf->block_size = queue_logical_block_size(mddev->queue);
140362306a36Sopenharmony_ci	}
140462306a36Sopenharmony_ci
140562306a36Sopenharmony_ci	for (i = 0; i < ppl_conf->count; i++) {
140662306a36Sopenharmony_ci		struct ppl_log *log = &ppl_conf->child_logs[i];
140762306a36Sopenharmony_ci		/* Array has not started so rcu dereference is safe */
140862306a36Sopenharmony_ci		struct md_rdev *rdev =
140962306a36Sopenharmony_ci			rcu_dereference_protected(conf->disks[i].rdev, 1);
141062306a36Sopenharmony_ci
141162306a36Sopenharmony_ci		mutex_init(&log->io_mutex);
141262306a36Sopenharmony_ci		spin_lock_init(&log->io_list_lock);
141362306a36Sopenharmony_ci		INIT_LIST_HEAD(&log->io_list);
141462306a36Sopenharmony_ci
141562306a36Sopenharmony_ci		log->ppl_conf = ppl_conf;
141662306a36Sopenharmony_ci		log->rdev = rdev;
141762306a36Sopenharmony_ci
141862306a36Sopenharmony_ci		if (rdev) {
141962306a36Sopenharmony_ci			ret = ppl_validate_rdev(rdev);
142062306a36Sopenharmony_ci			if (ret)
142162306a36Sopenharmony_ci				goto err;
142262306a36Sopenharmony_ci
142362306a36Sopenharmony_ci			ppl_init_child_log(log, rdev);
142462306a36Sopenharmony_ci		}
142562306a36Sopenharmony_ci	}
142662306a36Sopenharmony_ci
142762306a36Sopenharmony_ci	/* load and possibly recover the logs from the member disks */
142862306a36Sopenharmony_ci	ret = ppl_load(ppl_conf);
142962306a36Sopenharmony_ci
143062306a36Sopenharmony_ci	if (ret) {
143162306a36Sopenharmony_ci		goto err;
143262306a36Sopenharmony_ci	} else if (!mddev->pers && mddev->recovery_cp == 0 &&
143362306a36Sopenharmony_ci		   ppl_conf->recovered_entries > 0 &&
143462306a36Sopenharmony_ci		   ppl_conf->mismatch_count == 0) {
143562306a36Sopenharmony_ci		/*
143662306a36Sopenharmony_ci		 * If we are starting a dirty array and the recovery succeeds
143762306a36Sopenharmony_ci		 * without any issues, set the array as clean.
143862306a36Sopenharmony_ci		 */
143962306a36Sopenharmony_ci		mddev->recovery_cp = MaxSector;
144062306a36Sopenharmony_ci		set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
144162306a36Sopenharmony_ci	} else if (mddev->pers && ppl_conf->mismatch_count > 0) {
144262306a36Sopenharmony_ci		/* no mismatch allowed when enabling PPL for a running array */
144362306a36Sopenharmony_ci		ret = -EINVAL;
144462306a36Sopenharmony_ci		goto err;
144562306a36Sopenharmony_ci	}
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci	conf->log_private = ppl_conf;
144862306a36Sopenharmony_ci	set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
144962306a36Sopenharmony_ci
145062306a36Sopenharmony_ci	return 0;
145162306a36Sopenharmony_cierr:
145262306a36Sopenharmony_ci	__ppl_exit_log(ppl_conf);
145362306a36Sopenharmony_ci	return ret;
145462306a36Sopenharmony_ci}
145562306a36Sopenharmony_ci
145662306a36Sopenharmony_ciint ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
145762306a36Sopenharmony_ci{
145862306a36Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
145962306a36Sopenharmony_ci	struct ppl_log *log;
146062306a36Sopenharmony_ci	int ret = 0;
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_ci	if (!rdev)
146362306a36Sopenharmony_ci		return -EINVAL;
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_ci	pr_debug("%s: disk: %d operation: %s dev: %pg\n",
146662306a36Sopenharmony_ci		 __func__, rdev->raid_disk, add ? "add" : "remove",
146762306a36Sopenharmony_ci		 rdev->bdev);
146862306a36Sopenharmony_ci
146962306a36Sopenharmony_ci	if (rdev->raid_disk < 0)
147062306a36Sopenharmony_ci		return 0;
147162306a36Sopenharmony_ci
147262306a36Sopenharmony_ci	if (rdev->raid_disk >= ppl_conf->count)
147362306a36Sopenharmony_ci		return -ENODEV;
147462306a36Sopenharmony_ci
147562306a36Sopenharmony_ci	log = &ppl_conf->child_logs[rdev->raid_disk];
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci	mutex_lock(&log->io_mutex);
147862306a36Sopenharmony_ci	if (add) {
147962306a36Sopenharmony_ci		ret = ppl_validate_rdev(rdev);
148062306a36Sopenharmony_ci		if (!ret) {
148162306a36Sopenharmony_ci			log->rdev = rdev;
148262306a36Sopenharmony_ci			ret = ppl_write_empty_header(log);
148362306a36Sopenharmony_ci			ppl_init_child_log(log, rdev);
148462306a36Sopenharmony_ci		}
148562306a36Sopenharmony_ci	} else {
148662306a36Sopenharmony_ci		log->rdev = NULL;
148762306a36Sopenharmony_ci	}
148862306a36Sopenharmony_ci	mutex_unlock(&log->io_mutex);
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_ci	return ret;
149162306a36Sopenharmony_ci}
149262306a36Sopenharmony_ci
149362306a36Sopenharmony_cistatic ssize_t
149462306a36Sopenharmony_cippl_write_hint_show(struct mddev *mddev, char *buf)
149562306a36Sopenharmony_ci{
149662306a36Sopenharmony_ci	return sprintf(buf, "%d\n", 0);
149762306a36Sopenharmony_ci}
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_cistatic ssize_t
150062306a36Sopenharmony_cippl_write_hint_store(struct mddev *mddev, const char *page, size_t len)
150162306a36Sopenharmony_ci{
150262306a36Sopenharmony_ci	struct r5conf *conf;
150362306a36Sopenharmony_ci	int err = 0;
150462306a36Sopenharmony_ci	unsigned short new;
150562306a36Sopenharmony_ci
150662306a36Sopenharmony_ci	if (len >= PAGE_SIZE)
150762306a36Sopenharmony_ci		return -EINVAL;
150862306a36Sopenharmony_ci	if (kstrtou16(page, 10, &new))
150962306a36Sopenharmony_ci		return -EINVAL;
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_ci	err = mddev_lock(mddev);
151262306a36Sopenharmony_ci	if (err)
151362306a36Sopenharmony_ci		return err;
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_ci	conf = mddev->private;
151662306a36Sopenharmony_ci	if (!conf)
151762306a36Sopenharmony_ci		err = -ENODEV;
151862306a36Sopenharmony_ci	else if (!raid5_has_ppl(conf) || !conf->log_private)
151962306a36Sopenharmony_ci		err = -EINVAL;
152062306a36Sopenharmony_ci
152162306a36Sopenharmony_ci	mddev_unlock(mddev);
152262306a36Sopenharmony_ci
152362306a36Sopenharmony_ci	return err ?: len;
152462306a36Sopenharmony_ci}
152562306a36Sopenharmony_ci
152662306a36Sopenharmony_cistruct md_sysfs_entry
152762306a36Sopenharmony_cippl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR,
152862306a36Sopenharmony_ci			ppl_write_hint_show,
152962306a36Sopenharmony_ci			ppl_write_hint_store);
1530