xref: /kernel/linux/linux-5.10/drivers/md/raid5-ppl.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Partial Parity Log for closing the RAID5 write hole
48c2ecf20Sopenharmony_ci * Copyright (c) 2017, Intel Corporation.
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#include <linux/kernel.h>
88c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
98c2ecf20Sopenharmony_ci#include <linux/slab.h>
108c2ecf20Sopenharmony_ci#include <linux/crc32c.h>
118c2ecf20Sopenharmony_ci#include <linux/async_tx.h>
128c2ecf20Sopenharmony_ci#include <linux/raid/md_p.h>
138c2ecf20Sopenharmony_ci#include "md.h"
148c2ecf20Sopenharmony_ci#include "raid5.h"
158c2ecf20Sopenharmony_ci#include "raid5-log.h"
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci/*
188c2ecf20Sopenharmony_ci * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for
198c2ecf20Sopenharmony_ci * partial parity data. The header contains an array of entries
208c2ecf20Sopenharmony_ci * (struct ppl_header_entry) which describe the logged write requests.
218c2ecf20Sopenharmony_ci * Partial parity for the entries comes after the header, written in the same
228c2ecf20Sopenharmony_ci * sequence as the entries:
238c2ecf20Sopenharmony_ci *
248c2ecf20Sopenharmony_ci * Header
258c2ecf20Sopenharmony_ci *   entry0
268c2ecf20Sopenharmony_ci *   ...
278c2ecf20Sopenharmony_ci *   entryN
288c2ecf20Sopenharmony_ci * PP data
298c2ecf20Sopenharmony_ci *   PP for entry0
308c2ecf20Sopenharmony_ci *   ...
318c2ecf20Sopenharmony_ci *   PP for entryN
328c2ecf20Sopenharmony_ci *
338c2ecf20Sopenharmony_ci * An entry describes one or more consecutive stripe_heads, up to a full
348c2ecf20Sopenharmony_ci * stripe. The modifed raid data chunks form an m-by-n matrix, where m is the
358c2ecf20Sopenharmony_ci * number of stripe_heads in the entry and n is the number of modified data
368c2ecf20Sopenharmony_ci * disks. Every stripe_head in the entry must write to the same data disks.
378c2ecf20Sopenharmony_ci * An example of a valid case described by a single entry (writes to the first
388c2ecf20Sopenharmony_ci * stripe of a 4 disk array, 16k chunk size):
398c2ecf20Sopenharmony_ci *
408c2ecf20Sopenharmony_ci * sh->sector   dd0   dd1   dd2    ppl
418c2ecf20Sopenharmony_ci *            +-----+-----+-----+
428c2ecf20Sopenharmony_ci * 0          | --- | --- | --- | +----+
438c2ecf20Sopenharmony_ci * 8          | -W- | -W- | --- | | pp |   data_sector = 8
448c2ecf20Sopenharmony_ci * 16         | -W- | -W- | --- | | pp |   data_size = 3 * 2 * 4k
458c2ecf20Sopenharmony_ci * 24         | -W- | -W- | --- | | pp |   pp_size = 3 * 4k
468c2ecf20Sopenharmony_ci *            +-----+-----+-----+ +----+
478c2ecf20Sopenharmony_ci *
488c2ecf20Sopenharmony_ci * data_sector is the first raid sector of the modified data, data_size is the
498c2ecf20Sopenharmony_ci * total size of modified data and pp_size is the size of partial parity for
508c2ecf20Sopenharmony_ci * this entry. Entries for full stripe writes contain no partial parity
518c2ecf20Sopenharmony_ci * (pp_size = 0), they only mark the stripes for which parity should be
528c2ecf20Sopenharmony_ci * recalculated after an unclean shutdown. Every entry holds a checksum of its
538c2ecf20Sopenharmony_ci * partial parity, the header also has a checksum of the header itself.
548c2ecf20Sopenharmony_ci *
558c2ecf20Sopenharmony_ci * A write request is always logged to the PPL instance stored on the parity
568c2ecf20Sopenharmony_ci * disk of the corresponding stripe. For each member disk there is one ppl_log
578c2ecf20Sopenharmony_ci * used to handle logging for this disk, independently from others. They are
588c2ecf20Sopenharmony_ci * grouped in child_logs array in struct ppl_conf, which is assigned to
598c2ecf20Sopenharmony_ci * r5conf->log_private.
608c2ecf20Sopenharmony_ci *
618c2ecf20Sopenharmony_ci * ppl_io_unit represents a full PPL write, header_page contains the ppl_header.
628c2ecf20Sopenharmony_ci * PPL entries for logged stripes are added in ppl_log_stripe(). A stripe_head
638c2ecf20Sopenharmony_ci * can be appended to the last entry if it meets the conditions for a valid
648c2ecf20Sopenharmony_ci * entry described above, otherwise a new entry is added. Checksums of entries
658c2ecf20Sopenharmony_ci * are calculated incrementally as stripes containing partial parity are being
668c2ecf20Sopenharmony_ci * added. ppl_submit_iounit() calculates the checksum of the header and submits
678c2ecf20Sopenharmony_ci * a bio containing the header page and partial parity pages (sh->ppl_page) for
688c2ecf20Sopenharmony_ci * all stripes of the io_unit. When the PPL write completes, the stripes
698c2ecf20Sopenharmony_ci * associated with the io_unit are released and raid5d starts writing their data
708c2ecf20Sopenharmony_ci * and parity. When all stripes are written, the io_unit is freed and the next
718c2ecf20Sopenharmony_ci * can be submitted.
728c2ecf20Sopenharmony_ci *
738c2ecf20Sopenharmony_ci * An io_unit is used to gather stripes until it is submitted or becomes full
748c2ecf20Sopenharmony_ci * (if the maximum number of entries or size of PPL is reached). Another io_unit
758c2ecf20Sopenharmony_ci * can't be submitted until the previous has completed (PPL and stripe
768c2ecf20Sopenharmony_ci * data+parity is written). The log->io_list tracks all io_units of a log
778c2ecf20Sopenharmony_ci * (for a single member disk). New io_units are added to the end of the list
788c2ecf20Sopenharmony_ci * and the first io_unit is submitted, if it is not submitted already.
798c2ecf20Sopenharmony_ci * The current io_unit accepting new stripes is always at the end of the list.
808c2ecf20Sopenharmony_ci *
818c2ecf20Sopenharmony_ci * If write-back cache is enabled for any of the disks in the array, its data
828c2ecf20Sopenharmony_ci * must be flushed before next io_unit is submitted.
838c2ecf20Sopenharmony_ci */
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci#define PPL_SPACE_SIZE (128 * 1024)
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistruct ppl_conf {
888c2ecf20Sopenharmony_ci	struct mddev *mddev;
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	/* array of child logs, one for each raid disk */
918c2ecf20Sopenharmony_ci	struct ppl_log *child_logs;
928c2ecf20Sopenharmony_ci	int count;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	int block_size;		/* the logical block size used for data_sector
958c2ecf20Sopenharmony_ci				 * in ppl_header_entry */
968c2ecf20Sopenharmony_ci	u32 signature;		/* raid array identifier */
978c2ecf20Sopenharmony_ci	atomic64_t seq;		/* current log write sequence number */
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	struct kmem_cache *io_kc;
1008c2ecf20Sopenharmony_ci	mempool_t io_pool;
1018c2ecf20Sopenharmony_ci	struct bio_set bs;
1028c2ecf20Sopenharmony_ci	struct bio_set flush_bs;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	/* used only for recovery */
1058c2ecf20Sopenharmony_ci	int recovered_entries;
1068c2ecf20Sopenharmony_ci	int mismatch_count;
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	/* stripes to retry if failed to allocate io_unit */
1098c2ecf20Sopenharmony_ci	struct list_head no_mem_stripes;
1108c2ecf20Sopenharmony_ci	spinlock_t no_mem_stripes_lock;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	unsigned short write_hint;
1138c2ecf20Sopenharmony_ci};
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_cistruct ppl_log {
1168c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf;	/* shared between all log instances */
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	struct md_rdev *rdev;		/* array member disk associated with
1198c2ecf20Sopenharmony_ci					 * this log instance */
1208c2ecf20Sopenharmony_ci	struct mutex io_mutex;
1218c2ecf20Sopenharmony_ci	struct ppl_io_unit *current_io;	/* current io_unit accepting new data
1228c2ecf20Sopenharmony_ci					 * always at the end of io_list */
1238c2ecf20Sopenharmony_ci	spinlock_t io_list_lock;
1248c2ecf20Sopenharmony_ci	struct list_head io_list;	/* all io_units of this log */
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	sector_t next_io_sector;
1278c2ecf20Sopenharmony_ci	unsigned int entry_space;
1288c2ecf20Sopenharmony_ci	bool use_multippl;
1298c2ecf20Sopenharmony_ci	bool wb_cache_on;
1308c2ecf20Sopenharmony_ci	unsigned long disk_flush_bitmap;
1318c2ecf20Sopenharmony_ci};
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci#define PPL_IO_INLINE_BVECS 32
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_cistruct ppl_io_unit {
1368c2ecf20Sopenharmony_ci	struct ppl_log *log;
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	struct page *header_page;	/* for ppl_header */
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	unsigned int entries_count;	/* number of entries in ppl_header */
1418c2ecf20Sopenharmony_ci	unsigned int pp_size;		/* total size current of partial parity */
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	u64 seq;			/* sequence number of this log write */
1448c2ecf20Sopenharmony_ci	struct list_head log_sibling;	/* log->io_list */
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	struct list_head stripe_list;	/* stripes added to the io_unit */
1478c2ecf20Sopenharmony_ci	atomic_t pending_stripes;	/* how many stripes not written to raid */
1488c2ecf20Sopenharmony_ci	atomic_t pending_flushes;	/* how many disk flushes are in progress */
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	bool submitted;			/* true if write to log started */
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	/* inline bio and its biovec for submitting the iounit */
1538c2ecf20Sopenharmony_ci	struct bio bio;
1548c2ecf20Sopenharmony_ci	struct bio_vec biovec[PPL_IO_INLINE_BVECS];
1558c2ecf20Sopenharmony_ci};
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_cistruct dma_async_tx_descriptor *
1588c2ecf20Sopenharmony_ciops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
1598c2ecf20Sopenharmony_ci		       struct dma_async_tx_descriptor *tx)
1608c2ecf20Sopenharmony_ci{
1618c2ecf20Sopenharmony_ci	int disks = sh->disks;
1628c2ecf20Sopenharmony_ci	struct page **srcs = percpu->scribble;
1638c2ecf20Sopenharmony_ci	int count = 0, pd_idx = sh->pd_idx, i;
1648c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	/*
1698c2ecf20Sopenharmony_ci	 * Partial parity is the XOR of stripe data chunks that are not changed
1708c2ecf20Sopenharmony_ci	 * during the write request. Depending on available data
1718c2ecf20Sopenharmony_ci	 * (read-modify-write vs. reconstruct-write case) we calculate it
1728c2ecf20Sopenharmony_ci	 * differently.
1738c2ecf20Sopenharmony_ci	 */
1748c2ecf20Sopenharmony_ci	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
1758c2ecf20Sopenharmony_ci		/*
1768c2ecf20Sopenharmony_ci		 * rmw: xor old data and parity from updated disks
1778c2ecf20Sopenharmony_ci		 * This is calculated earlier by ops_run_prexor5() so just copy
1788c2ecf20Sopenharmony_ci		 * the parity dev page.
1798c2ecf20Sopenharmony_ci		 */
1808c2ecf20Sopenharmony_ci		srcs[count++] = sh->dev[pd_idx].page;
1818c2ecf20Sopenharmony_ci	} else if (sh->reconstruct_state == reconstruct_state_drain_run) {
1828c2ecf20Sopenharmony_ci		/* rcw: xor data from all not updated disks */
1838c2ecf20Sopenharmony_ci		for (i = disks; i--;) {
1848c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
1858c2ecf20Sopenharmony_ci			if (test_bit(R5_UPTODATE, &dev->flags))
1868c2ecf20Sopenharmony_ci				srcs[count++] = dev->page;
1878c2ecf20Sopenharmony_ci		}
1888c2ecf20Sopenharmony_ci	} else {
1898c2ecf20Sopenharmony_ci		return tx;
1908c2ecf20Sopenharmony_ci	}
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx,
1938c2ecf20Sopenharmony_ci			  NULL, sh, (void *) (srcs + sh->disks + 2));
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	if (count == 1)
1968c2ecf20Sopenharmony_ci		tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE,
1978c2ecf20Sopenharmony_ci				  &submit);
1988c2ecf20Sopenharmony_ci	else
1998c2ecf20Sopenharmony_ci		tx = async_xor(sh->ppl_page, srcs, 0, count, PAGE_SIZE,
2008c2ecf20Sopenharmony_ci			       &submit);
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	return tx;
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_cistatic void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
2068c2ecf20Sopenharmony_ci{
2078c2ecf20Sopenharmony_ci	struct kmem_cache *kc = pool_data;
2088c2ecf20Sopenharmony_ci	struct ppl_io_unit *io;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	io = kmem_cache_alloc(kc, gfp_mask);
2118c2ecf20Sopenharmony_ci	if (!io)
2128c2ecf20Sopenharmony_ci		return NULL;
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	io->header_page = alloc_page(gfp_mask);
2158c2ecf20Sopenharmony_ci	if (!io->header_page) {
2168c2ecf20Sopenharmony_ci		kmem_cache_free(kc, io);
2178c2ecf20Sopenharmony_ci		return NULL;
2188c2ecf20Sopenharmony_ci	}
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	return io;
2218c2ecf20Sopenharmony_ci}
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_cistatic void ppl_io_pool_free(void *element, void *pool_data)
2248c2ecf20Sopenharmony_ci{
2258c2ecf20Sopenharmony_ci	struct kmem_cache *kc = pool_data;
2268c2ecf20Sopenharmony_ci	struct ppl_io_unit *io = element;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	__free_page(io->header_page);
2298c2ecf20Sopenharmony_ci	kmem_cache_free(kc, io);
2308c2ecf20Sopenharmony_ci}
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_cistatic struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
2338c2ecf20Sopenharmony_ci					  struct stripe_head *sh)
2348c2ecf20Sopenharmony_ci{
2358c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
2368c2ecf20Sopenharmony_ci	struct ppl_io_unit *io;
2378c2ecf20Sopenharmony_ci	struct ppl_header *pplhdr;
2388c2ecf20Sopenharmony_ci	struct page *header_page;
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	io = mempool_alloc(&ppl_conf->io_pool, GFP_NOWAIT);
2418c2ecf20Sopenharmony_ci	if (!io)
2428c2ecf20Sopenharmony_ci		return NULL;
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	header_page = io->header_page;
2458c2ecf20Sopenharmony_ci	memset(io, 0, sizeof(*io));
2468c2ecf20Sopenharmony_ci	io->header_page = header_page;
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	io->log = log;
2498c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&io->log_sibling);
2508c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&io->stripe_list);
2518c2ecf20Sopenharmony_ci	atomic_set(&io->pending_stripes, 0);
2528c2ecf20Sopenharmony_ci	atomic_set(&io->pending_flushes, 0);
2538c2ecf20Sopenharmony_ci	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	pplhdr = page_address(io->header_page);
2568c2ecf20Sopenharmony_ci	clear_page(pplhdr);
2578c2ecf20Sopenharmony_ci	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
2588c2ecf20Sopenharmony_ci	pplhdr->signature = cpu_to_le32(ppl_conf->signature);
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	io->seq = atomic64_add_return(1, &ppl_conf->seq);
2618c2ecf20Sopenharmony_ci	pplhdr->generation = cpu_to_le64(io->seq);
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	return io;
2648c2ecf20Sopenharmony_ci}
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_cistatic int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
2678c2ecf20Sopenharmony_ci{
2688c2ecf20Sopenharmony_ci	struct ppl_io_unit *io = log->current_io;
2698c2ecf20Sopenharmony_ci	struct ppl_header_entry *e = NULL;
2708c2ecf20Sopenharmony_ci	struct ppl_header *pplhdr;
2718c2ecf20Sopenharmony_ci	int i;
2728c2ecf20Sopenharmony_ci	sector_t data_sector = 0;
2738c2ecf20Sopenharmony_ci	int data_disks = 0;
2748c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci	pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector);
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	/* check if current io_unit is full */
2798c2ecf20Sopenharmony_ci	if (io && (io->pp_size == log->entry_space ||
2808c2ecf20Sopenharmony_ci		   io->entries_count == PPL_HDR_MAX_ENTRIES)) {
2818c2ecf20Sopenharmony_ci		pr_debug("%s: add io_unit blocked by seq: %llu\n",
2828c2ecf20Sopenharmony_ci			 __func__, io->seq);
2838c2ecf20Sopenharmony_ci		io = NULL;
2848c2ecf20Sopenharmony_ci	}
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	/* add a new unit if there is none or the current is full */
2878c2ecf20Sopenharmony_ci	if (!io) {
2888c2ecf20Sopenharmony_ci		io = ppl_new_iounit(log, sh);
2898c2ecf20Sopenharmony_ci		if (!io)
2908c2ecf20Sopenharmony_ci			return -ENOMEM;
2918c2ecf20Sopenharmony_ci		spin_lock_irq(&log->io_list_lock);
2928c2ecf20Sopenharmony_ci		list_add_tail(&io->log_sibling, &log->io_list);
2938c2ecf20Sopenharmony_ci		spin_unlock_irq(&log->io_list_lock);
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci		log->current_io = io;
2968c2ecf20Sopenharmony_ci	}
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	for (i = 0; i < sh->disks; i++) {
2998c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci		if (i != sh->pd_idx && test_bit(R5_Wantwrite, &dev->flags)) {
3028c2ecf20Sopenharmony_ci			if (!data_disks || dev->sector < data_sector)
3038c2ecf20Sopenharmony_ci				data_sector = dev->sector;
3048c2ecf20Sopenharmony_ci			data_disks++;
3058c2ecf20Sopenharmony_ci		}
3068c2ecf20Sopenharmony_ci	}
3078c2ecf20Sopenharmony_ci	BUG_ON(!data_disks);
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	pr_debug("%s: seq: %llu data_sector: %llu data_disks: %d\n", __func__,
3108c2ecf20Sopenharmony_ci		 io->seq, (unsigned long long)data_sector, data_disks);
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci	pplhdr = page_address(io->header_page);
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci	if (io->entries_count > 0) {
3158c2ecf20Sopenharmony_ci		struct ppl_header_entry *last =
3168c2ecf20Sopenharmony_ci				&pplhdr->entries[io->entries_count - 1];
3178c2ecf20Sopenharmony_ci		struct stripe_head *sh_last = list_last_entry(
3188c2ecf20Sopenharmony_ci				&io->stripe_list, struct stripe_head, log_list);
3198c2ecf20Sopenharmony_ci		u64 data_sector_last = le64_to_cpu(last->data_sector);
3208c2ecf20Sopenharmony_ci		u32 data_size_last = le32_to_cpu(last->data_size);
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci		/*
3238c2ecf20Sopenharmony_ci		 * Check if we can append the stripe to the last entry. It must
3248c2ecf20Sopenharmony_ci		 * be just after the last logged stripe and write to the same
3258c2ecf20Sopenharmony_ci		 * disks. Use bit shift and logarithm to avoid 64-bit division.
3268c2ecf20Sopenharmony_ci		 */
3278c2ecf20Sopenharmony_ci		if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) &&
3288c2ecf20Sopenharmony_ci		    (data_sector >> ilog2(conf->chunk_sectors) ==
3298c2ecf20Sopenharmony_ci		     data_sector_last >> ilog2(conf->chunk_sectors)) &&
3308c2ecf20Sopenharmony_ci		    ((data_sector - data_sector_last) * data_disks ==
3318c2ecf20Sopenharmony_ci		     data_size_last >> 9))
3328c2ecf20Sopenharmony_ci			e = last;
3338c2ecf20Sopenharmony_ci	}
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	if (!e) {
3368c2ecf20Sopenharmony_ci		e = &pplhdr->entries[io->entries_count++];
3378c2ecf20Sopenharmony_ci		e->data_sector = cpu_to_le64(data_sector);
3388c2ecf20Sopenharmony_ci		e->parity_disk = cpu_to_le32(sh->pd_idx);
3398c2ecf20Sopenharmony_ci		e->checksum = cpu_to_le32(~0);
3408c2ecf20Sopenharmony_ci	}
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci	le32_add_cpu(&e->data_size, data_disks << PAGE_SHIFT);
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	/* don't write any PP if full stripe write */
3458c2ecf20Sopenharmony_ci	if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) {
3468c2ecf20Sopenharmony_ci		le32_add_cpu(&e->pp_size, PAGE_SIZE);
3478c2ecf20Sopenharmony_ci		io->pp_size += PAGE_SIZE;
3488c2ecf20Sopenharmony_ci		e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum),
3498c2ecf20Sopenharmony_ci						    page_address(sh->ppl_page),
3508c2ecf20Sopenharmony_ci						    PAGE_SIZE));
3518c2ecf20Sopenharmony_ci	}
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci	list_add_tail(&sh->log_list, &io->stripe_list);
3548c2ecf20Sopenharmony_ci	atomic_inc(&io->pending_stripes);
3558c2ecf20Sopenharmony_ci	sh->ppl_io = io;
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci	return 0;
3588c2ecf20Sopenharmony_ci}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ciint ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)
3618c2ecf20Sopenharmony_ci{
3628c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
3638c2ecf20Sopenharmony_ci	struct ppl_io_unit *io = sh->ppl_io;
3648c2ecf20Sopenharmony_ci	struct ppl_log *log;
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page ||
3678c2ecf20Sopenharmony_ci	    !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) ||
3688c2ecf20Sopenharmony_ci	    !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) {
3698c2ecf20Sopenharmony_ci		clear_bit(STRIPE_LOG_TRAPPED, &sh->state);
3708c2ecf20Sopenharmony_ci		return -EAGAIN;
3718c2ecf20Sopenharmony_ci	}
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	log = &ppl_conf->child_logs[sh->pd_idx];
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	mutex_lock(&log->io_mutex);
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) {
3788c2ecf20Sopenharmony_ci		mutex_unlock(&log->io_mutex);
3798c2ecf20Sopenharmony_ci		return -EAGAIN;
3808c2ecf20Sopenharmony_ci	}
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci	set_bit(STRIPE_LOG_TRAPPED, &sh->state);
3838c2ecf20Sopenharmony_ci	clear_bit(STRIPE_DELAYED, &sh->state);
3848c2ecf20Sopenharmony_ci	atomic_inc(&sh->count);
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci	if (ppl_log_stripe(log, sh)) {
3878c2ecf20Sopenharmony_ci		spin_lock_irq(&ppl_conf->no_mem_stripes_lock);
3888c2ecf20Sopenharmony_ci		list_add_tail(&sh->log_list, &ppl_conf->no_mem_stripes);
3898c2ecf20Sopenharmony_ci		spin_unlock_irq(&ppl_conf->no_mem_stripes_lock);
3908c2ecf20Sopenharmony_ci	}
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	mutex_unlock(&log->io_mutex);
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	return 0;
3958c2ecf20Sopenharmony_ci}
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_cistatic void ppl_log_endio(struct bio *bio)
3988c2ecf20Sopenharmony_ci{
3998c2ecf20Sopenharmony_ci	struct ppl_io_unit *io = bio->bi_private;
4008c2ecf20Sopenharmony_ci	struct ppl_log *log = io->log;
4018c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
4028c2ecf20Sopenharmony_ci	struct stripe_head *sh, *next;
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	pr_debug("%s: seq: %llu\n", __func__, io->seq);
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci	if (bio->bi_status)
4078c2ecf20Sopenharmony_ci		md_error(ppl_conf->mddev, log->rdev);
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
4108c2ecf20Sopenharmony_ci		list_del_init(&sh->log_list);
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
4138c2ecf20Sopenharmony_ci		raid5_release_stripe(sh);
4148c2ecf20Sopenharmony_ci	}
4158c2ecf20Sopenharmony_ci}
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_cistatic void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio)
4188c2ecf20Sopenharmony_ci{
4198c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci	pr_debug("%s: seq: %llu size: %u sector: %llu dev: %s\n",
4228c2ecf20Sopenharmony_ci		 __func__, io->seq, bio->bi_iter.bi_size,
4238c2ecf20Sopenharmony_ci		 (unsigned long long)bio->bi_iter.bi_sector,
4248c2ecf20Sopenharmony_ci		 bio_devname(bio, b));
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	submit_bio(bio);
4278c2ecf20Sopenharmony_ci}
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_cistatic void ppl_submit_iounit(struct ppl_io_unit *io)
4308c2ecf20Sopenharmony_ci{
4318c2ecf20Sopenharmony_ci	struct ppl_log *log = io->log;
4328c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
4338c2ecf20Sopenharmony_ci	struct ppl_header *pplhdr = page_address(io->header_page);
4348c2ecf20Sopenharmony_ci	struct bio *bio = &io->bio;
4358c2ecf20Sopenharmony_ci	struct stripe_head *sh;
4368c2ecf20Sopenharmony_ci	int i;
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	bio->bi_private = io;
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_ci	if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) {
4418c2ecf20Sopenharmony_ci		ppl_log_endio(bio);
4428c2ecf20Sopenharmony_ci		return;
4438c2ecf20Sopenharmony_ci	}
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci	for (i = 0; i < io->entries_count; i++) {
4468c2ecf20Sopenharmony_ci		struct ppl_header_entry *e = &pplhdr->entries[i];
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci		pr_debug("%s: seq: %llu entry: %d data_sector: %llu pp_size: %u data_size: %u\n",
4498c2ecf20Sopenharmony_ci			 __func__, io->seq, i, le64_to_cpu(e->data_sector),
4508c2ecf20Sopenharmony_ci			 le32_to_cpu(e->pp_size), le32_to_cpu(e->data_size));
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci		e->data_sector = cpu_to_le64(le64_to_cpu(e->data_sector) >>
4538c2ecf20Sopenharmony_ci					     ilog2(ppl_conf->block_size >> 9));
4548c2ecf20Sopenharmony_ci		e->checksum = cpu_to_le32(~le32_to_cpu(e->checksum));
4558c2ecf20Sopenharmony_ci	}
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci	pplhdr->entries_count = cpu_to_le32(io->entries_count);
4588c2ecf20Sopenharmony_ci	pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_ci	/* Rewind the buffer if current PPL is larger then remaining space */
4618c2ecf20Sopenharmony_ci	if (log->use_multippl &&
4628c2ecf20Sopenharmony_ci	    log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector <
4638c2ecf20Sopenharmony_ci	    (PPL_HEADER_SIZE + io->pp_size) >> 9)
4648c2ecf20Sopenharmony_ci		log->next_io_sector = log->rdev->ppl.sector;
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci	bio->bi_end_io = ppl_log_endio;
4688c2ecf20Sopenharmony_ci	bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
4698c2ecf20Sopenharmony_ci	bio_set_dev(bio, log->rdev->bdev);
4708c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = log->next_io_sector;
4718c2ecf20Sopenharmony_ci	bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
4728c2ecf20Sopenharmony_ci	bio->bi_write_hint = ppl_conf->write_hint;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	pr_debug("%s: log->current_io_sector: %llu\n", __func__,
4758c2ecf20Sopenharmony_ci	    (unsigned long long)log->next_io_sector);
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	if (log->use_multippl)
4788c2ecf20Sopenharmony_ci		log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci	WARN_ON(log->disk_flush_bitmap != 0);
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci	list_for_each_entry(sh, &io->stripe_list, log_list) {
4838c2ecf20Sopenharmony_ci		for (i = 0; i < sh->disks; i++) {
4848c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci			if ((ppl_conf->child_logs[i].wb_cache_on) &&
4878c2ecf20Sopenharmony_ci			    (test_bit(R5_Wantwrite, &dev->flags))) {
4888c2ecf20Sopenharmony_ci				set_bit(i, &log->disk_flush_bitmap);
4898c2ecf20Sopenharmony_ci			}
4908c2ecf20Sopenharmony_ci		}
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci		/* entries for full stripe writes have no partial parity */
4938c2ecf20Sopenharmony_ci		if (test_bit(STRIPE_FULL_WRITE, &sh->state))
4948c2ecf20Sopenharmony_ci			continue;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci		if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) {
4978c2ecf20Sopenharmony_ci			struct bio *prev = bio;
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci			bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
5008c2ecf20Sopenharmony_ci					       &ppl_conf->bs);
5018c2ecf20Sopenharmony_ci			bio->bi_opf = prev->bi_opf;
5028c2ecf20Sopenharmony_ci			bio->bi_write_hint = prev->bi_write_hint;
5038c2ecf20Sopenharmony_ci			bio_copy_dev(bio, prev);
5048c2ecf20Sopenharmony_ci			bio->bi_iter.bi_sector = bio_end_sector(prev);
5058c2ecf20Sopenharmony_ci			bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci			bio_chain(bio, prev);
5088c2ecf20Sopenharmony_ci			ppl_submit_iounit_bio(io, prev);
5098c2ecf20Sopenharmony_ci		}
5108c2ecf20Sopenharmony_ci	}
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	ppl_submit_iounit_bio(io, bio);
5138c2ecf20Sopenharmony_ci}
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_cistatic void ppl_submit_current_io(struct ppl_log *log)
5168c2ecf20Sopenharmony_ci{
5178c2ecf20Sopenharmony_ci	struct ppl_io_unit *io;
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci	spin_lock_irq(&log->io_list_lock);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit,
5228c2ecf20Sopenharmony_ci				      log_sibling);
5238c2ecf20Sopenharmony_ci	if (io && io->submitted)
5248c2ecf20Sopenharmony_ci		io = NULL;
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_ci	spin_unlock_irq(&log->io_list_lock);
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	if (io) {
5298c2ecf20Sopenharmony_ci		io->submitted = true;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci		if (io == log->current_io)
5328c2ecf20Sopenharmony_ci			log->current_io = NULL;
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci		ppl_submit_iounit(io);
5358c2ecf20Sopenharmony_ci	}
5368c2ecf20Sopenharmony_ci}
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_civoid ppl_write_stripe_run(struct r5conf *conf)
5398c2ecf20Sopenharmony_ci{
5408c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
5418c2ecf20Sopenharmony_ci	struct ppl_log *log;
5428c2ecf20Sopenharmony_ci	int i;
5438c2ecf20Sopenharmony_ci
5448c2ecf20Sopenharmony_ci	for (i = 0; i < ppl_conf->count; i++) {
5458c2ecf20Sopenharmony_ci		log = &ppl_conf->child_logs[i];
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci		mutex_lock(&log->io_mutex);
5488c2ecf20Sopenharmony_ci		ppl_submit_current_io(log);
5498c2ecf20Sopenharmony_ci		mutex_unlock(&log->io_mutex);
5508c2ecf20Sopenharmony_ci	}
5518c2ecf20Sopenharmony_ci}
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_cistatic void ppl_io_unit_finished(struct ppl_io_unit *io)
5548c2ecf20Sopenharmony_ci{
5558c2ecf20Sopenharmony_ci	struct ppl_log *log = io->log;
5568c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
5578c2ecf20Sopenharmony_ci	struct r5conf *conf = ppl_conf->mddev->private;
5588c2ecf20Sopenharmony_ci	unsigned long flags;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	pr_debug("%s: seq: %llu\n", __func__, io->seq);
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	local_irq_save(flags);
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	spin_lock(&log->io_list_lock);
5658c2ecf20Sopenharmony_ci	list_del(&io->log_sibling);
5668c2ecf20Sopenharmony_ci	spin_unlock(&log->io_list_lock);
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	mempool_free(io, &ppl_conf->io_pool);
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	spin_lock(&ppl_conf->no_mem_stripes_lock);
5718c2ecf20Sopenharmony_ci	if (!list_empty(&ppl_conf->no_mem_stripes)) {
5728c2ecf20Sopenharmony_ci		struct stripe_head *sh;
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_ci		sh = list_first_entry(&ppl_conf->no_mem_stripes,
5758c2ecf20Sopenharmony_ci				      struct stripe_head, log_list);
5768c2ecf20Sopenharmony_ci		list_del_init(&sh->log_list);
5778c2ecf20Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
5788c2ecf20Sopenharmony_ci		raid5_release_stripe(sh);
5798c2ecf20Sopenharmony_ci	}
5808c2ecf20Sopenharmony_ci	spin_unlock(&ppl_conf->no_mem_stripes_lock);
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	local_irq_restore(flags);
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci	wake_up(&conf->wait_for_quiescent);
5858c2ecf20Sopenharmony_ci}
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_cistatic void ppl_flush_endio(struct bio *bio)
5888c2ecf20Sopenharmony_ci{
5898c2ecf20Sopenharmony_ci	struct ppl_io_unit *io = bio->bi_private;
5908c2ecf20Sopenharmony_ci	struct ppl_log *log = io->log;
5918c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
5928c2ecf20Sopenharmony_ci	struct r5conf *conf = ppl_conf->mddev->private;
5938c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci	pr_debug("%s: dev: %s\n", __func__, bio_devname(bio, b));
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci	if (bio->bi_status) {
5988c2ecf20Sopenharmony_ci		struct md_rdev *rdev;
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_ci		rcu_read_lock();
6018c2ecf20Sopenharmony_ci		rdev = md_find_rdev_rcu(conf->mddev, bio_dev(bio));
6028c2ecf20Sopenharmony_ci		if (rdev)
6038c2ecf20Sopenharmony_ci			md_error(rdev->mddev, rdev);
6048c2ecf20Sopenharmony_ci		rcu_read_unlock();
6058c2ecf20Sopenharmony_ci	}
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	bio_put(bio);
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&io->pending_flushes)) {
6108c2ecf20Sopenharmony_ci		ppl_io_unit_finished(io);
6118c2ecf20Sopenharmony_ci		md_wakeup_thread(conf->mddev->thread);
6128c2ecf20Sopenharmony_ci	}
6138c2ecf20Sopenharmony_ci}
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_cistatic void ppl_do_flush(struct ppl_io_unit *io)
6168c2ecf20Sopenharmony_ci{
6178c2ecf20Sopenharmony_ci	struct ppl_log *log = io->log;
6188c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
6198c2ecf20Sopenharmony_ci	struct r5conf *conf = ppl_conf->mddev->private;
6208c2ecf20Sopenharmony_ci	int raid_disks = conf->raid_disks;
6218c2ecf20Sopenharmony_ci	int flushed_disks = 0;
6228c2ecf20Sopenharmony_ci	int i;
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci	atomic_set(&io->pending_flushes, raid_disks);
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	for_each_set_bit(i, &log->disk_flush_bitmap, raid_disks) {
6278c2ecf20Sopenharmony_ci		struct md_rdev *rdev;
6288c2ecf20Sopenharmony_ci		struct block_device *bdev = NULL;
6298c2ecf20Sopenharmony_ci
6308c2ecf20Sopenharmony_ci		rcu_read_lock();
6318c2ecf20Sopenharmony_ci		rdev = rcu_dereference(conf->disks[i].rdev);
6328c2ecf20Sopenharmony_ci		if (rdev && !test_bit(Faulty, &rdev->flags))
6338c2ecf20Sopenharmony_ci			bdev = rdev->bdev;
6348c2ecf20Sopenharmony_ci		rcu_read_unlock();
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_ci		if (bdev) {
6378c2ecf20Sopenharmony_ci			struct bio *bio;
6388c2ecf20Sopenharmony_ci			char b[BDEVNAME_SIZE];
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci			bio = bio_alloc_bioset(GFP_NOIO, 0, &ppl_conf->flush_bs);
6418c2ecf20Sopenharmony_ci			bio_set_dev(bio, bdev);
6428c2ecf20Sopenharmony_ci			bio->bi_private = io;
6438c2ecf20Sopenharmony_ci			bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
6448c2ecf20Sopenharmony_ci			bio->bi_end_io = ppl_flush_endio;
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci			pr_debug("%s: dev: %s\n", __func__,
6478c2ecf20Sopenharmony_ci				 bio_devname(bio, b));
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci			submit_bio(bio);
6508c2ecf20Sopenharmony_ci			flushed_disks++;
6518c2ecf20Sopenharmony_ci		}
6528c2ecf20Sopenharmony_ci	}
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci	log->disk_flush_bitmap = 0;
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_ci	for (i = flushed_disks ; i < raid_disks; i++) {
6578c2ecf20Sopenharmony_ci		if (atomic_dec_and_test(&io->pending_flushes))
6588c2ecf20Sopenharmony_ci			ppl_io_unit_finished(io);
6598c2ecf20Sopenharmony_ci	}
6608c2ecf20Sopenharmony_ci}
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_cistatic inline bool ppl_no_io_unit_submitted(struct r5conf *conf,
6638c2ecf20Sopenharmony_ci					    struct ppl_log *log)
6648c2ecf20Sopenharmony_ci{
6658c2ecf20Sopenharmony_ci	struct ppl_io_unit *io;
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci	io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit,
6688c2ecf20Sopenharmony_ci				      log_sibling);
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci	return !io || !io->submitted;
6718c2ecf20Sopenharmony_ci}
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_civoid ppl_quiesce(struct r5conf *conf, int quiesce)
6748c2ecf20Sopenharmony_ci{
6758c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
6768c2ecf20Sopenharmony_ci	int i;
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	if (quiesce) {
6798c2ecf20Sopenharmony_ci		for (i = 0; i < ppl_conf->count; i++) {
6808c2ecf20Sopenharmony_ci			struct ppl_log *log = &ppl_conf->child_logs[i];
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci			spin_lock_irq(&log->io_list_lock);
6838c2ecf20Sopenharmony_ci			wait_event_lock_irq(conf->wait_for_quiescent,
6848c2ecf20Sopenharmony_ci					    ppl_no_io_unit_submitted(conf, log),
6858c2ecf20Sopenharmony_ci					    log->io_list_lock);
6868c2ecf20Sopenharmony_ci			spin_unlock_irq(&log->io_list_lock);
6878c2ecf20Sopenharmony_ci		}
6888c2ecf20Sopenharmony_ci	}
6898c2ecf20Sopenharmony_ci}
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_ciint ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
6928c2ecf20Sopenharmony_ci{
6938c2ecf20Sopenharmony_ci	if (bio->bi_iter.bi_size == 0) {
6948c2ecf20Sopenharmony_ci		bio_endio(bio);
6958c2ecf20Sopenharmony_ci		return 0;
6968c2ecf20Sopenharmony_ci	}
6978c2ecf20Sopenharmony_ci	bio->bi_opf &= ~REQ_PREFLUSH;
6988c2ecf20Sopenharmony_ci	return -EAGAIN;
6998c2ecf20Sopenharmony_ci}
7008c2ecf20Sopenharmony_ci
7018c2ecf20Sopenharmony_civoid ppl_stripe_write_finished(struct stripe_head *sh)
7028c2ecf20Sopenharmony_ci{
7038c2ecf20Sopenharmony_ci	struct ppl_io_unit *io;
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_ci	io = sh->ppl_io;
7068c2ecf20Sopenharmony_ci	sh->ppl_io = NULL;
7078c2ecf20Sopenharmony_ci
7088c2ecf20Sopenharmony_ci	if (io && atomic_dec_and_test(&io->pending_stripes)) {
7098c2ecf20Sopenharmony_ci		if (io->log->disk_flush_bitmap)
7108c2ecf20Sopenharmony_ci			ppl_do_flush(io);
7118c2ecf20Sopenharmony_ci		else
7128c2ecf20Sopenharmony_ci			ppl_io_unit_finished(io);
7138c2ecf20Sopenharmony_ci	}
7148c2ecf20Sopenharmony_ci}
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_cistatic void ppl_xor(int size, struct page *page1, struct page *page2)
7178c2ecf20Sopenharmony_ci{
7188c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
7198c2ecf20Sopenharmony_ci	struct dma_async_tx_descriptor *tx;
7208c2ecf20Sopenharmony_ci	struct page *xor_srcs[] = { page1, page2 };
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_ACK|ASYNC_TX_XOR_DROP_DST,
7238c2ecf20Sopenharmony_ci			  NULL, NULL, NULL, NULL);
7248c2ecf20Sopenharmony_ci	tx = async_xor(page1, xor_srcs, 0, 2, size, &submit);
7258c2ecf20Sopenharmony_ci
7268c2ecf20Sopenharmony_ci	async_tx_quiesce(&tx);
7278c2ecf20Sopenharmony_ci}
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci/*
7308c2ecf20Sopenharmony_ci * PPL recovery strategy: xor partial parity and data from all modified data
7318c2ecf20Sopenharmony_ci * disks within a stripe and write the result as the new stripe parity. If all
7328c2ecf20Sopenharmony_ci * stripe data disks are modified (full stripe write), no partial parity is
7338c2ecf20Sopenharmony_ci * available, so just xor the data disks.
7348c2ecf20Sopenharmony_ci *
7358c2ecf20Sopenharmony_ci * Recovery of a PPL entry shall occur only if all modified data disks are
7368c2ecf20Sopenharmony_ci * available and read from all of them succeeds.
7378c2ecf20Sopenharmony_ci *
7388c2ecf20Sopenharmony_ci * A PPL entry applies to a stripe, partial parity size for an entry is at most
7398c2ecf20Sopenharmony_ci * the size of the chunk. Examples of possible cases for a single entry:
7408c2ecf20Sopenharmony_ci *
7418c2ecf20Sopenharmony_ci * case 0: single data disk write:
7428c2ecf20Sopenharmony_ci *   data0    data1    data2     ppl        parity
7438c2ecf20Sopenharmony_ci * +--------+--------+--------+           +--------------------+
7448c2ecf20Sopenharmony_ci * | ------ | ------ | ------ | +----+    | (no change)        |
7458c2ecf20Sopenharmony_ci * | ------ | -data- | ------ | | pp | -> | data1 ^ pp         |
7468c2ecf20Sopenharmony_ci * | ------ | -data- | ------ | | pp | -> | data1 ^ pp         |
7478c2ecf20Sopenharmony_ci * | ------ | ------ | ------ | +----+    | (no change)        |
7488c2ecf20Sopenharmony_ci * +--------+--------+--------+           +--------------------+
7498c2ecf20Sopenharmony_ci * pp_size = data_size
7508c2ecf20Sopenharmony_ci *
7518c2ecf20Sopenharmony_ci * case 1: more than one data disk write:
7528c2ecf20Sopenharmony_ci *   data0    data1    data2     ppl        parity
7538c2ecf20Sopenharmony_ci * +--------+--------+--------+           +--------------------+
7548c2ecf20Sopenharmony_ci * | ------ | ------ | ------ | +----+    | (no change)        |
7558c2ecf20Sopenharmony_ci * | -data- | -data- | ------ | | pp | -> | data0 ^ data1 ^ pp |
7568c2ecf20Sopenharmony_ci * | -data- | -data- | ------ | | pp | -> | data0 ^ data1 ^ pp |
7578c2ecf20Sopenharmony_ci * | ------ | ------ | ------ | +----+    | (no change)        |
7588c2ecf20Sopenharmony_ci * +--------+--------+--------+           +--------------------+
7598c2ecf20Sopenharmony_ci * pp_size = data_size / modified_data_disks
7608c2ecf20Sopenharmony_ci *
7618c2ecf20Sopenharmony_ci * case 2: write to all data disks (also full stripe write):
7628c2ecf20Sopenharmony_ci *   data0    data1    data2                parity
7638c2ecf20Sopenharmony_ci * +--------+--------+--------+           +--------------------+
7648c2ecf20Sopenharmony_ci * | ------ | ------ | ------ |           | (no change)        |
7658c2ecf20Sopenharmony_ci * | -data- | -data- | -data- | --------> | xor all data       |
7668c2ecf20Sopenharmony_ci * | ------ | ------ | ------ | --------> | (no change)        |
7678c2ecf20Sopenharmony_ci * | ------ | ------ | ------ |           | (no change)        |
7688c2ecf20Sopenharmony_ci * +--------+--------+--------+           +--------------------+
7698c2ecf20Sopenharmony_ci * pp_size = 0
7708c2ecf20Sopenharmony_ci *
7718c2ecf20Sopenharmony_ci * The following cases are possible only in other implementations. The recovery
7728c2ecf20Sopenharmony_ci * code can handle them, but they are not generated at runtime because they can
7738c2ecf20Sopenharmony_ci * be reduced to cases 0, 1 and 2:
7748c2ecf20Sopenharmony_ci *
7758c2ecf20Sopenharmony_ci * case 3:
7768c2ecf20Sopenharmony_ci *   data0    data1    data2     ppl        parity
7778c2ecf20Sopenharmony_ci * +--------+--------+--------+ +----+    +--------------------+
7788c2ecf20Sopenharmony_ci * | ------ | -data- | -data- | | pp |    | data1 ^ data2 ^ pp |
7798c2ecf20Sopenharmony_ci * | ------ | -data- | -data- | | pp | -> | data1 ^ data2 ^ pp |
7808c2ecf20Sopenharmony_ci * | -data- | -data- | -data- | | -- | -> | xor all data       |
7818c2ecf20Sopenharmony_ci * | -data- | -data- | ------ | | pp |    | data0 ^ data1 ^ pp |
7828c2ecf20Sopenharmony_ci * +--------+--------+--------+ +----+    +--------------------+
7838c2ecf20Sopenharmony_ci * pp_size = chunk_size
7848c2ecf20Sopenharmony_ci *
7858c2ecf20Sopenharmony_ci * case 4:
7868c2ecf20Sopenharmony_ci *   data0    data1    data2     ppl        parity
7878c2ecf20Sopenharmony_ci * +--------+--------+--------+ +----+    +--------------------+
7888c2ecf20Sopenharmony_ci * | ------ | -data- | ------ | | pp |    | data1 ^ pp         |
7898c2ecf20Sopenharmony_ci * | ------ | ------ | ------ | | -- | -> | (no change)        |
7908c2ecf20Sopenharmony_ci * | ------ | ------ | ------ | | -- | -> | (no change)        |
7918c2ecf20Sopenharmony_ci * | -data- | ------ | ------ | | pp |    | data0 ^ pp         |
7928c2ecf20Sopenharmony_ci * +--------+--------+--------+ +----+    +--------------------+
7938c2ecf20Sopenharmony_ci * pp_size = chunk_size
7948c2ecf20Sopenharmony_ci */
7958c2ecf20Sopenharmony_cistatic int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
7968c2ecf20Sopenharmony_ci			     sector_t ppl_sector)
7978c2ecf20Sopenharmony_ci{
7988c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
7998c2ecf20Sopenharmony_ci	struct mddev *mddev = ppl_conf->mddev;
8008c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
8018c2ecf20Sopenharmony_ci	int block_size = ppl_conf->block_size;
8028c2ecf20Sopenharmony_ci	struct page *page1;
8038c2ecf20Sopenharmony_ci	struct page *page2;
8048c2ecf20Sopenharmony_ci	sector_t r_sector_first;
8058c2ecf20Sopenharmony_ci	sector_t r_sector_last;
8068c2ecf20Sopenharmony_ci	int strip_sectors;
8078c2ecf20Sopenharmony_ci	int data_disks;
8088c2ecf20Sopenharmony_ci	int i;
8098c2ecf20Sopenharmony_ci	int ret = 0;
8108c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
8118c2ecf20Sopenharmony_ci	unsigned int pp_size = le32_to_cpu(e->pp_size);
8128c2ecf20Sopenharmony_ci	unsigned int data_size = le32_to_cpu(e->data_size);
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_ci	page1 = alloc_page(GFP_KERNEL);
8158c2ecf20Sopenharmony_ci	page2 = alloc_page(GFP_KERNEL);
8168c2ecf20Sopenharmony_ci
8178c2ecf20Sopenharmony_ci	if (!page1 || !page2) {
8188c2ecf20Sopenharmony_ci		ret = -ENOMEM;
8198c2ecf20Sopenharmony_ci		goto out;
8208c2ecf20Sopenharmony_ci	}
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci	r_sector_first = le64_to_cpu(e->data_sector) * (block_size >> 9);
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci	if ((pp_size >> 9) < conf->chunk_sectors) {
8258c2ecf20Sopenharmony_ci		if (pp_size > 0) {
8268c2ecf20Sopenharmony_ci			data_disks = data_size / pp_size;
8278c2ecf20Sopenharmony_ci			strip_sectors = pp_size >> 9;
8288c2ecf20Sopenharmony_ci		} else {
8298c2ecf20Sopenharmony_ci			data_disks = conf->raid_disks - conf->max_degraded;
8308c2ecf20Sopenharmony_ci			strip_sectors = (data_size >> 9) / data_disks;
8318c2ecf20Sopenharmony_ci		}
8328c2ecf20Sopenharmony_ci		r_sector_last = r_sector_first +
8338c2ecf20Sopenharmony_ci				(data_disks - 1) * conf->chunk_sectors +
8348c2ecf20Sopenharmony_ci				strip_sectors;
8358c2ecf20Sopenharmony_ci	} else {
8368c2ecf20Sopenharmony_ci		data_disks = conf->raid_disks - conf->max_degraded;
8378c2ecf20Sopenharmony_ci		strip_sectors = conf->chunk_sectors;
8388c2ecf20Sopenharmony_ci		r_sector_last = r_sector_first + (data_size >> 9);
8398c2ecf20Sopenharmony_ci	}
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci	pr_debug("%s: array sector first: %llu last: %llu\n", __func__,
8428c2ecf20Sopenharmony_ci		 (unsigned long long)r_sector_first,
8438c2ecf20Sopenharmony_ci		 (unsigned long long)r_sector_last);
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_ci	/* if start and end is 4k aligned, use a 4k block */
8468c2ecf20Sopenharmony_ci	if (block_size == 512 &&
8478c2ecf20Sopenharmony_ci	    (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 &&
8488c2ecf20Sopenharmony_ci	    (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0)
8498c2ecf20Sopenharmony_ci		block_size = RAID5_STRIPE_SIZE(conf);
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci	/* iterate through blocks in strip */
8528c2ecf20Sopenharmony_ci	for (i = 0; i < strip_sectors; i += (block_size >> 9)) {
8538c2ecf20Sopenharmony_ci		bool update_parity = false;
8548c2ecf20Sopenharmony_ci		sector_t parity_sector;
8558c2ecf20Sopenharmony_ci		struct md_rdev *parity_rdev;
8568c2ecf20Sopenharmony_ci		struct stripe_head sh;
8578c2ecf20Sopenharmony_ci		int disk;
8588c2ecf20Sopenharmony_ci		int indent = 0;
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci		pr_debug("%s:%*s iter %d start\n", __func__, indent, "", i);
8618c2ecf20Sopenharmony_ci		indent += 2;
8628c2ecf20Sopenharmony_ci
8638c2ecf20Sopenharmony_ci		memset(page_address(page1), 0, PAGE_SIZE);
8648c2ecf20Sopenharmony_ci
8658c2ecf20Sopenharmony_ci		/* iterate through data member disks */
8668c2ecf20Sopenharmony_ci		for (disk = 0; disk < data_disks; disk++) {
8678c2ecf20Sopenharmony_ci			int dd_idx;
8688c2ecf20Sopenharmony_ci			struct md_rdev *rdev;
8698c2ecf20Sopenharmony_ci			sector_t sector;
8708c2ecf20Sopenharmony_ci			sector_t r_sector = r_sector_first + i +
8718c2ecf20Sopenharmony_ci					    (disk * conf->chunk_sectors);
8728c2ecf20Sopenharmony_ci
8738c2ecf20Sopenharmony_ci			pr_debug("%s:%*s data member disk %d start\n",
8748c2ecf20Sopenharmony_ci				 __func__, indent, "", disk);
8758c2ecf20Sopenharmony_ci			indent += 2;
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_ci			if (r_sector >= r_sector_last) {
8788c2ecf20Sopenharmony_ci				pr_debug("%s:%*s array sector %llu doesn't need parity update\n",
8798c2ecf20Sopenharmony_ci					 __func__, indent, "",
8808c2ecf20Sopenharmony_ci					 (unsigned long long)r_sector);
8818c2ecf20Sopenharmony_ci				indent -= 2;
8828c2ecf20Sopenharmony_ci				continue;
8838c2ecf20Sopenharmony_ci			}
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci			update_parity = true;
8868c2ecf20Sopenharmony_ci
8878c2ecf20Sopenharmony_ci			/* map raid sector to member disk */
8888c2ecf20Sopenharmony_ci			sector = raid5_compute_sector(conf, r_sector, 0,
8898c2ecf20Sopenharmony_ci						      &dd_idx, NULL);
8908c2ecf20Sopenharmony_ci			pr_debug("%s:%*s processing array sector %llu => data member disk %d, sector %llu\n",
8918c2ecf20Sopenharmony_ci				 __func__, indent, "",
8928c2ecf20Sopenharmony_ci				 (unsigned long long)r_sector, dd_idx,
8938c2ecf20Sopenharmony_ci				 (unsigned long long)sector);
8948c2ecf20Sopenharmony_ci
8958c2ecf20Sopenharmony_ci			rdev = conf->disks[dd_idx].rdev;
8968c2ecf20Sopenharmony_ci			if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
8978c2ecf20Sopenharmony_ci				      sector >= rdev->recovery_offset)) {
8988c2ecf20Sopenharmony_ci				pr_debug("%s:%*s data member disk %d missing\n",
8998c2ecf20Sopenharmony_ci					 __func__, indent, "", dd_idx);
9008c2ecf20Sopenharmony_ci				update_parity = false;
9018c2ecf20Sopenharmony_ci				break;
9028c2ecf20Sopenharmony_ci			}
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_ci			pr_debug("%s:%*s reading data member disk %s sector %llu\n",
9058c2ecf20Sopenharmony_ci				 __func__, indent, "", bdevname(rdev->bdev, b),
9068c2ecf20Sopenharmony_ci				 (unsigned long long)sector);
9078c2ecf20Sopenharmony_ci			if (!sync_page_io(rdev, sector, block_size, page2,
9088c2ecf20Sopenharmony_ci					REQ_OP_READ, 0, false)) {
9098c2ecf20Sopenharmony_ci				md_error(mddev, rdev);
9108c2ecf20Sopenharmony_ci				pr_debug("%s:%*s read failed!\n", __func__,
9118c2ecf20Sopenharmony_ci					 indent, "");
9128c2ecf20Sopenharmony_ci				ret = -EIO;
9138c2ecf20Sopenharmony_ci				goto out;
9148c2ecf20Sopenharmony_ci			}
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_ci			ppl_xor(block_size, page1, page2);
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_ci			indent -= 2;
9198c2ecf20Sopenharmony_ci		}
9208c2ecf20Sopenharmony_ci
9218c2ecf20Sopenharmony_ci		if (!update_parity)
9228c2ecf20Sopenharmony_ci			continue;
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci		if (pp_size > 0) {
9258c2ecf20Sopenharmony_ci			pr_debug("%s:%*s reading pp disk sector %llu\n",
9268c2ecf20Sopenharmony_ci				 __func__, indent, "",
9278c2ecf20Sopenharmony_ci				 (unsigned long long)(ppl_sector + i));
9288c2ecf20Sopenharmony_ci			if (!sync_page_io(log->rdev,
9298c2ecf20Sopenharmony_ci					ppl_sector - log->rdev->data_offset + i,
9308c2ecf20Sopenharmony_ci					block_size, page2, REQ_OP_READ, 0,
9318c2ecf20Sopenharmony_ci					false)) {
9328c2ecf20Sopenharmony_ci				pr_debug("%s:%*s read failed!\n", __func__,
9338c2ecf20Sopenharmony_ci					 indent, "");
9348c2ecf20Sopenharmony_ci				md_error(mddev, log->rdev);
9358c2ecf20Sopenharmony_ci				ret = -EIO;
9368c2ecf20Sopenharmony_ci				goto out;
9378c2ecf20Sopenharmony_ci			}
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci			ppl_xor(block_size, page1, page2);
9408c2ecf20Sopenharmony_ci		}
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ci		/* map raid sector to parity disk */
9438c2ecf20Sopenharmony_ci		parity_sector = raid5_compute_sector(conf, r_sector_first + i,
9448c2ecf20Sopenharmony_ci				0, &disk, &sh);
9458c2ecf20Sopenharmony_ci		BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
9468c2ecf20Sopenharmony_ci		parity_rdev = conf->disks[sh.pd_idx].rdev;
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_ci		BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
9498c2ecf20Sopenharmony_ci		pr_debug("%s:%*s write parity at sector %llu, disk %s\n",
9508c2ecf20Sopenharmony_ci			 __func__, indent, "",
9518c2ecf20Sopenharmony_ci			 (unsigned long long)parity_sector,
9528c2ecf20Sopenharmony_ci			 bdevname(parity_rdev->bdev, b));
9538c2ecf20Sopenharmony_ci		if (!sync_page_io(parity_rdev, parity_sector, block_size,
9548c2ecf20Sopenharmony_ci				page1, REQ_OP_WRITE, 0, false)) {
9558c2ecf20Sopenharmony_ci			pr_debug("%s:%*s parity write error!\n", __func__,
9568c2ecf20Sopenharmony_ci				 indent, "");
9578c2ecf20Sopenharmony_ci			md_error(mddev, parity_rdev);
9588c2ecf20Sopenharmony_ci			ret = -EIO;
9598c2ecf20Sopenharmony_ci			goto out;
9608c2ecf20Sopenharmony_ci		}
9618c2ecf20Sopenharmony_ci	}
9628c2ecf20Sopenharmony_ciout:
9638c2ecf20Sopenharmony_ci	if (page1)
9648c2ecf20Sopenharmony_ci		__free_page(page1);
9658c2ecf20Sopenharmony_ci	if (page2)
9668c2ecf20Sopenharmony_ci		__free_page(page2);
9678c2ecf20Sopenharmony_ci	return ret;
9688c2ecf20Sopenharmony_ci}
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_cistatic int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
9718c2ecf20Sopenharmony_ci		       sector_t offset)
9728c2ecf20Sopenharmony_ci{
9738c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
9748c2ecf20Sopenharmony_ci	struct md_rdev *rdev = log->rdev;
9758c2ecf20Sopenharmony_ci	struct mddev *mddev = rdev->mddev;
9768c2ecf20Sopenharmony_ci	sector_t ppl_sector = rdev->ppl.sector + offset +
9778c2ecf20Sopenharmony_ci			      (PPL_HEADER_SIZE >> 9);
9788c2ecf20Sopenharmony_ci	struct page *page;
9798c2ecf20Sopenharmony_ci	int i;
9808c2ecf20Sopenharmony_ci	int ret = 0;
9818c2ecf20Sopenharmony_ci
9828c2ecf20Sopenharmony_ci	page = alloc_page(GFP_KERNEL);
9838c2ecf20Sopenharmony_ci	if (!page)
9848c2ecf20Sopenharmony_ci		return -ENOMEM;
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_ci	/* iterate through all PPL entries saved */
9878c2ecf20Sopenharmony_ci	for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) {
9888c2ecf20Sopenharmony_ci		struct ppl_header_entry *e = &pplhdr->entries[i];
9898c2ecf20Sopenharmony_ci		u32 pp_size = le32_to_cpu(e->pp_size);
9908c2ecf20Sopenharmony_ci		sector_t sector = ppl_sector;
9918c2ecf20Sopenharmony_ci		int ppl_entry_sectors = pp_size >> 9;
9928c2ecf20Sopenharmony_ci		u32 crc, crc_stored;
9938c2ecf20Sopenharmony_ci
9948c2ecf20Sopenharmony_ci		pr_debug("%s: disk: %d entry: %d ppl_sector: %llu pp_size: %u\n",
9958c2ecf20Sopenharmony_ci			 __func__, rdev->raid_disk, i,
9968c2ecf20Sopenharmony_ci			 (unsigned long long)ppl_sector, pp_size);
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci		crc = ~0;
9998c2ecf20Sopenharmony_ci		crc_stored = le32_to_cpu(e->checksum);
10008c2ecf20Sopenharmony_ci
10018c2ecf20Sopenharmony_ci		/* read parial parity for this entry and calculate its checksum */
10028c2ecf20Sopenharmony_ci		while (pp_size) {
10038c2ecf20Sopenharmony_ci			int s = pp_size > PAGE_SIZE ? PAGE_SIZE : pp_size;
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_ci			if (!sync_page_io(rdev, sector - rdev->data_offset,
10068c2ecf20Sopenharmony_ci					s, page, REQ_OP_READ, 0, false)) {
10078c2ecf20Sopenharmony_ci				md_error(mddev, rdev);
10088c2ecf20Sopenharmony_ci				ret = -EIO;
10098c2ecf20Sopenharmony_ci				goto out;
10108c2ecf20Sopenharmony_ci			}
10118c2ecf20Sopenharmony_ci
10128c2ecf20Sopenharmony_ci			crc = crc32c_le(crc, page_address(page), s);
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_ci			pp_size -= s;
10158c2ecf20Sopenharmony_ci			sector += s >> 9;
10168c2ecf20Sopenharmony_ci		}
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci		crc = ~crc;
10198c2ecf20Sopenharmony_ci
10208c2ecf20Sopenharmony_ci		if (crc != crc_stored) {
10218c2ecf20Sopenharmony_ci			/*
10228c2ecf20Sopenharmony_ci			 * Don't recover this entry if the checksum does not
10238c2ecf20Sopenharmony_ci			 * match, but keep going and try to recover other
10248c2ecf20Sopenharmony_ci			 * entries.
10258c2ecf20Sopenharmony_ci			 */
10268c2ecf20Sopenharmony_ci			pr_debug("%s: ppl entry crc does not match: stored: 0x%x calculated: 0x%x\n",
10278c2ecf20Sopenharmony_ci				 __func__, crc_stored, crc);
10288c2ecf20Sopenharmony_ci			ppl_conf->mismatch_count++;
10298c2ecf20Sopenharmony_ci		} else {
10308c2ecf20Sopenharmony_ci			ret = ppl_recover_entry(log, e, ppl_sector);
10318c2ecf20Sopenharmony_ci			if (ret)
10328c2ecf20Sopenharmony_ci				goto out;
10338c2ecf20Sopenharmony_ci			ppl_conf->recovered_entries++;
10348c2ecf20Sopenharmony_ci		}
10358c2ecf20Sopenharmony_ci
10368c2ecf20Sopenharmony_ci		ppl_sector += ppl_entry_sectors;
10378c2ecf20Sopenharmony_ci	}
10388c2ecf20Sopenharmony_ci
10398c2ecf20Sopenharmony_ci	/* flush the disk cache after recovery if necessary */
10408c2ecf20Sopenharmony_ci	ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL);
10418c2ecf20Sopenharmony_ciout:
10428c2ecf20Sopenharmony_ci	__free_page(page);
10438c2ecf20Sopenharmony_ci	return ret;
10448c2ecf20Sopenharmony_ci}
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_cistatic int ppl_write_empty_header(struct ppl_log *log)
10478c2ecf20Sopenharmony_ci{
10488c2ecf20Sopenharmony_ci	struct page *page;
10498c2ecf20Sopenharmony_ci	struct ppl_header *pplhdr;
10508c2ecf20Sopenharmony_ci	struct md_rdev *rdev = log->rdev;
10518c2ecf20Sopenharmony_ci	int ret = 0;
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	pr_debug("%s: disk: %d ppl_sector: %llu\n", __func__,
10548c2ecf20Sopenharmony_ci		 rdev->raid_disk, (unsigned long long)rdev->ppl.sector);
10558c2ecf20Sopenharmony_ci
10568c2ecf20Sopenharmony_ci	page = alloc_page(GFP_NOIO | __GFP_ZERO);
10578c2ecf20Sopenharmony_ci	if (!page)
10588c2ecf20Sopenharmony_ci		return -ENOMEM;
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci	pplhdr = page_address(page);
10618c2ecf20Sopenharmony_ci	/* zero out PPL space to avoid collision with old PPLs */
10628c2ecf20Sopenharmony_ci	blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector,
10638c2ecf20Sopenharmony_ci			    log->rdev->ppl.size, GFP_NOIO, 0);
10648c2ecf20Sopenharmony_ci	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
10658c2ecf20Sopenharmony_ci	pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
10668c2ecf20Sopenharmony_ci	pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
10678c2ecf20Sopenharmony_ci
10688c2ecf20Sopenharmony_ci	if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
10698c2ecf20Sopenharmony_ci			  PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
10708c2ecf20Sopenharmony_ci			  REQ_FUA, 0, false)) {
10718c2ecf20Sopenharmony_ci		md_error(rdev->mddev, rdev);
10728c2ecf20Sopenharmony_ci		ret = -EIO;
10738c2ecf20Sopenharmony_ci	}
10748c2ecf20Sopenharmony_ci
10758c2ecf20Sopenharmony_ci	__free_page(page);
10768c2ecf20Sopenharmony_ci	return ret;
10778c2ecf20Sopenharmony_ci}
10788c2ecf20Sopenharmony_ci
10798c2ecf20Sopenharmony_cistatic int ppl_load_distributed(struct ppl_log *log)
10808c2ecf20Sopenharmony_ci{
10818c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = log->ppl_conf;
10828c2ecf20Sopenharmony_ci	struct md_rdev *rdev = log->rdev;
10838c2ecf20Sopenharmony_ci	struct mddev *mddev = rdev->mddev;
10848c2ecf20Sopenharmony_ci	struct page *page, *page2, *tmp;
10858c2ecf20Sopenharmony_ci	struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL;
10868c2ecf20Sopenharmony_ci	u32 crc, crc_stored;
10878c2ecf20Sopenharmony_ci	u32 signature;
10888c2ecf20Sopenharmony_ci	int ret = 0, i;
10898c2ecf20Sopenharmony_ci	sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0;
10908c2ecf20Sopenharmony_ci
10918c2ecf20Sopenharmony_ci	pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk);
10928c2ecf20Sopenharmony_ci	/* read PPL headers, find the recent one */
10938c2ecf20Sopenharmony_ci	page = alloc_page(GFP_KERNEL);
10948c2ecf20Sopenharmony_ci	if (!page)
10958c2ecf20Sopenharmony_ci		return -ENOMEM;
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci	page2 = alloc_page(GFP_KERNEL);
10988c2ecf20Sopenharmony_ci	if (!page2) {
10998c2ecf20Sopenharmony_ci		__free_page(page);
11008c2ecf20Sopenharmony_ci		return -ENOMEM;
11018c2ecf20Sopenharmony_ci	}
11028c2ecf20Sopenharmony_ci
11038c2ecf20Sopenharmony_ci	/* searching ppl area for latest ppl */
11048c2ecf20Sopenharmony_ci	while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) {
11058c2ecf20Sopenharmony_ci		if (!sync_page_io(rdev,
11068c2ecf20Sopenharmony_ci				  rdev->ppl.sector - rdev->data_offset +
11078c2ecf20Sopenharmony_ci				  pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ,
11088c2ecf20Sopenharmony_ci				  0, false)) {
11098c2ecf20Sopenharmony_ci			md_error(mddev, rdev);
11108c2ecf20Sopenharmony_ci			ret = -EIO;
11118c2ecf20Sopenharmony_ci			/* if not able to read - don't recover any PPL */
11128c2ecf20Sopenharmony_ci			pplhdr = NULL;
11138c2ecf20Sopenharmony_ci			break;
11148c2ecf20Sopenharmony_ci		}
11158c2ecf20Sopenharmony_ci		pplhdr = page_address(page);
11168c2ecf20Sopenharmony_ci
11178c2ecf20Sopenharmony_ci		/* check header validity */
11188c2ecf20Sopenharmony_ci		crc_stored = le32_to_cpu(pplhdr->checksum);
11198c2ecf20Sopenharmony_ci		pplhdr->checksum = 0;
11208c2ecf20Sopenharmony_ci		crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
11218c2ecf20Sopenharmony_ci
11228c2ecf20Sopenharmony_ci		if (crc_stored != crc) {
11238c2ecf20Sopenharmony_ci			pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
11248c2ecf20Sopenharmony_ci				 __func__, crc_stored, crc,
11258c2ecf20Sopenharmony_ci				 (unsigned long long)pplhdr_offset);
11268c2ecf20Sopenharmony_ci			pplhdr = prev_pplhdr;
11278c2ecf20Sopenharmony_ci			pplhdr_offset = prev_pplhdr_offset;
11288c2ecf20Sopenharmony_ci			break;
11298c2ecf20Sopenharmony_ci		}
11308c2ecf20Sopenharmony_ci
11318c2ecf20Sopenharmony_ci		signature = le32_to_cpu(pplhdr->signature);
11328c2ecf20Sopenharmony_ci
11338c2ecf20Sopenharmony_ci		if (mddev->external) {
11348c2ecf20Sopenharmony_ci			/*
11358c2ecf20Sopenharmony_ci			 * For external metadata the header signature is set and
11368c2ecf20Sopenharmony_ci			 * validated in userspace.
11378c2ecf20Sopenharmony_ci			 */
11388c2ecf20Sopenharmony_ci			ppl_conf->signature = signature;
11398c2ecf20Sopenharmony_ci		} else if (ppl_conf->signature != signature) {
11408c2ecf20Sopenharmony_ci			pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n",
11418c2ecf20Sopenharmony_ci				 __func__, signature, ppl_conf->signature,
11428c2ecf20Sopenharmony_ci				 (unsigned long long)pplhdr_offset);
11438c2ecf20Sopenharmony_ci			pplhdr = prev_pplhdr;
11448c2ecf20Sopenharmony_ci			pplhdr_offset = prev_pplhdr_offset;
11458c2ecf20Sopenharmony_ci			break;
11468c2ecf20Sopenharmony_ci		}
11478c2ecf20Sopenharmony_ci
11488c2ecf20Sopenharmony_ci		if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) >
11498c2ecf20Sopenharmony_ci		    le64_to_cpu(pplhdr->generation)) {
11508c2ecf20Sopenharmony_ci			/* previous was newest */
11518c2ecf20Sopenharmony_ci			pplhdr = prev_pplhdr;
11528c2ecf20Sopenharmony_ci			pplhdr_offset = prev_pplhdr_offset;
11538c2ecf20Sopenharmony_ci			break;
11548c2ecf20Sopenharmony_ci		}
11558c2ecf20Sopenharmony_ci
11568c2ecf20Sopenharmony_ci		prev_pplhdr_offset = pplhdr_offset;
11578c2ecf20Sopenharmony_ci		prev_pplhdr = pplhdr;
11588c2ecf20Sopenharmony_ci
11598c2ecf20Sopenharmony_ci		tmp = page;
11608c2ecf20Sopenharmony_ci		page = page2;
11618c2ecf20Sopenharmony_ci		page2 = tmp;
11628c2ecf20Sopenharmony_ci
11638c2ecf20Sopenharmony_ci		/* calculate next potential ppl offset */
11648c2ecf20Sopenharmony_ci		for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++)
11658c2ecf20Sopenharmony_ci			pplhdr_offset +=
11668c2ecf20Sopenharmony_ci			    le32_to_cpu(pplhdr->entries[i].pp_size) >> 9;
11678c2ecf20Sopenharmony_ci		pplhdr_offset += PPL_HEADER_SIZE >> 9;
11688c2ecf20Sopenharmony_ci	}
11698c2ecf20Sopenharmony_ci
11708c2ecf20Sopenharmony_ci	/* no valid ppl found */
11718c2ecf20Sopenharmony_ci	if (!pplhdr)
11728c2ecf20Sopenharmony_ci		ppl_conf->mismatch_count++;
11738c2ecf20Sopenharmony_ci	else
11748c2ecf20Sopenharmony_ci		pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n",
11758c2ecf20Sopenharmony_ci		    __func__, (unsigned long long)pplhdr_offset,
11768c2ecf20Sopenharmony_ci		    le64_to_cpu(pplhdr->generation));
11778c2ecf20Sopenharmony_ci
11788c2ecf20Sopenharmony_ci	/* attempt to recover from log if we are starting a dirty array */
11798c2ecf20Sopenharmony_ci	if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector)
11808c2ecf20Sopenharmony_ci		ret = ppl_recover(log, pplhdr, pplhdr_offset);
11818c2ecf20Sopenharmony_ci
11828c2ecf20Sopenharmony_ci	/* write empty header if we are starting the array */
11838c2ecf20Sopenharmony_ci	if (!ret && !mddev->pers)
11848c2ecf20Sopenharmony_ci		ret = ppl_write_empty_header(log);
11858c2ecf20Sopenharmony_ci
11868c2ecf20Sopenharmony_ci	__free_page(page);
11878c2ecf20Sopenharmony_ci	__free_page(page2);
11888c2ecf20Sopenharmony_ci
11898c2ecf20Sopenharmony_ci	pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
11908c2ecf20Sopenharmony_ci		 __func__, ret, ppl_conf->mismatch_count,
11918c2ecf20Sopenharmony_ci		 ppl_conf->recovered_entries);
11928c2ecf20Sopenharmony_ci	return ret;
11938c2ecf20Sopenharmony_ci}
11948c2ecf20Sopenharmony_ci
11958c2ecf20Sopenharmony_cistatic int ppl_load(struct ppl_conf *ppl_conf)
11968c2ecf20Sopenharmony_ci{
11978c2ecf20Sopenharmony_ci	int ret = 0;
11988c2ecf20Sopenharmony_ci	u32 signature = 0;
11998c2ecf20Sopenharmony_ci	bool signature_set = false;
12008c2ecf20Sopenharmony_ci	int i;
12018c2ecf20Sopenharmony_ci
12028c2ecf20Sopenharmony_ci	for (i = 0; i < ppl_conf->count; i++) {
12038c2ecf20Sopenharmony_ci		struct ppl_log *log = &ppl_conf->child_logs[i];
12048c2ecf20Sopenharmony_ci
12058c2ecf20Sopenharmony_ci		/* skip missing drive */
12068c2ecf20Sopenharmony_ci		if (!log->rdev)
12078c2ecf20Sopenharmony_ci			continue;
12088c2ecf20Sopenharmony_ci
12098c2ecf20Sopenharmony_ci		ret = ppl_load_distributed(log);
12108c2ecf20Sopenharmony_ci		if (ret)
12118c2ecf20Sopenharmony_ci			break;
12128c2ecf20Sopenharmony_ci
12138c2ecf20Sopenharmony_ci		/*
12148c2ecf20Sopenharmony_ci		 * For external metadata we can't check if the signature is
12158c2ecf20Sopenharmony_ci		 * correct on a single drive, but we can check if it is the same
12168c2ecf20Sopenharmony_ci		 * on all drives.
12178c2ecf20Sopenharmony_ci		 */
12188c2ecf20Sopenharmony_ci		if (ppl_conf->mddev->external) {
12198c2ecf20Sopenharmony_ci			if (!signature_set) {
12208c2ecf20Sopenharmony_ci				signature = ppl_conf->signature;
12218c2ecf20Sopenharmony_ci				signature_set = true;
12228c2ecf20Sopenharmony_ci			} else if (signature != ppl_conf->signature) {
12238c2ecf20Sopenharmony_ci				pr_warn("md/raid:%s: PPL header signature does not match on all member drives\n",
12248c2ecf20Sopenharmony_ci					mdname(ppl_conf->mddev));
12258c2ecf20Sopenharmony_ci				ret = -EINVAL;
12268c2ecf20Sopenharmony_ci				break;
12278c2ecf20Sopenharmony_ci			}
12288c2ecf20Sopenharmony_ci		}
12298c2ecf20Sopenharmony_ci	}
12308c2ecf20Sopenharmony_ci
12318c2ecf20Sopenharmony_ci	pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
12328c2ecf20Sopenharmony_ci		 __func__, ret, ppl_conf->mismatch_count,
12338c2ecf20Sopenharmony_ci		 ppl_conf->recovered_entries);
12348c2ecf20Sopenharmony_ci	return ret;
12358c2ecf20Sopenharmony_ci}
12368c2ecf20Sopenharmony_ci
12378c2ecf20Sopenharmony_cistatic void __ppl_exit_log(struct ppl_conf *ppl_conf)
12388c2ecf20Sopenharmony_ci{
12398c2ecf20Sopenharmony_ci	clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
12408c2ecf20Sopenharmony_ci	clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags);
12418c2ecf20Sopenharmony_ci
12428c2ecf20Sopenharmony_ci	kfree(ppl_conf->child_logs);
12438c2ecf20Sopenharmony_ci
12448c2ecf20Sopenharmony_ci	bioset_exit(&ppl_conf->bs);
12458c2ecf20Sopenharmony_ci	bioset_exit(&ppl_conf->flush_bs);
12468c2ecf20Sopenharmony_ci	mempool_exit(&ppl_conf->io_pool);
12478c2ecf20Sopenharmony_ci	kmem_cache_destroy(ppl_conf->io_kc);
12488c2ecf20Sopenharmony_ci
12498c2ecf20Sopenharmony_ci	kfree(ppl_conf);
12508c2ecf20Sopenharmony_ci}
12518c2ecf20Sopenharmony_ci
12528c2ecf20Sopenharmony_civoid ppl_exit_log(struct r5conf *conf)
12538c2ecf20Sopenharmony_ci{
12548c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_ci	if (ppl_conf) {
12578c2ecf20Sopenharmony_ci		__ppl_exit_log(ppl_conf);
12588c2ecf20Sopenharmony_ci		conf->log_private = NULL;
12598c2ecf20Sopenharmony_ci	}
12608c2ecf20Sopenharmony_ci}
12618c2ecf20Sopenharmony_ci
12628c2ecf20Sopenharmony_cistatic int ppl_validate_rdev(struct md_rdev *rdev)
12638c2ecf20Sopenharmony_ci{
12648c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
12658c2ecf20Sopenharmony_ci	int ppl_data_sectors;
12668c2ecf20Sopenharmony_ci	int ppl_size_new;
12678c2ecf20Sopenharmony_ci
12688c2ecf20Sopenharmony_ci	/*
12698c2ecf20Sopenharmony_ci	 * The configured PPL size must be enough to store
12708c2ecf20Sopenharmony_ci	 * the header and (at the very least) partial parity
12718c2ecf20Sopenharmony_ci	 * for one stripe. Round it down to ensure the data
12728c2ecf20Sopenharmony_ci	 * space is cleanly divisible by stripe size.
12738c2ecf20Sopenharmony_ci	 */
12748c2ecf20Sopenharmony_ci	ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9);
12758c2ecf20Sopenharmony_ci
12768c2ecf20Sopenharmony_ci	if (ppl_data_sectors > 0)
12778c2ecf20Sopenharmony_ci		ppl_data_sectors = rounddown(ppl_data_sectors,
12788c2ecf20Sopenharmony_ci				RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private));
12798c2ecf20Sopenharmony_ci
12808c2ecf20Sopenharmony_ci	if (ppl_data_sectors <= 0) {
12818c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: PPL space too small on %s\n",
12828c2ecf20Sopenharmony_ci			mdname(rdev->mddev), bdevname(rdev->bdev, b));
12838c2ecf20Sopenharmony_ci		return -ENOSPC;
12848c2ecf20Sopenharmony_ci	}
12858c2ecf20Sopenharmony_ci
12868c2ecf20Sopenharmony_ci	ppl_size_new = ppl_data_sectors + (PPL_HEADER_SIZE >> 9);
12878c2ecf20Sopenharmony_ci
12888c2ecf20Sopenharmony_ci	if ((rdev->ppl.sector < rdev->data_offset &&
12898c2ecf20Sopenharmony_ci	     rdev->ppl.sector + ppl_size_new > rdev->data_offset) ||
12908c2ecf20Sopenharmony_ci	    (rdev->ppl.sector >= rdev->data_offset &&
12918c2ecf20Sopenharmony_ci	     rdev->data_offset + rdev->sectors > rdev->ppl.sector)) {
12928c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: PPL space overlaps with data on %s\n",
12938c2ecf20Sopenharmony_ci			mdname(rdev->mddev), bdevname(rdev->bdev, b));
12948c2ecf20Sopenharmony_ci		return -EINVAL;
12958c2ecf20Sopenharmony_ci	}
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci	if (!rdev->mddev->external &&
12988c2ecf20Sopenharmony_ci	    ((rdev->ppl.offset > 0 && rdev->ppl.offset < (rdev->sb_size >> 9)) ||
12998c2ecf20Sopenharmony_ci	     (rdev->ppl.offset <= 0 && rdev->ppl.offset + ppl_size_new > 0))) {
13008c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: PPL space overlaps with superblock on %s\n",
13018c2ecf20Sopenharmony_ci			mdname(rdev->mddev), bdevname(rdev->bdev, b));
13028c2ecf20Sopenharmony_ci		return -EINVAL;
13038c2ecf20Sopenharmony_ci	}
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_ci	rdev->ppl.size = ppl_size_new;
13068c2ecf20Sopenharmony_ci
13078c2ecf20Sopenharmony_ci	return 0;
13088c2ecf20Sopenharmony_ci}
13098c2ecf20Sopenharmony_ci
13108c2ecf20Sopenharmony_cistatic void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
13118c2ecf20Sopenharmony_ci{
13128c2ecf20Sopenharmony_ci	struct request_queue *q;
13138c2ecf20Sopenharmony_ci
13148c2ecf20Sopenharmony_ci	if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
13158c2ecf20Sopenharmony_ci				      PPL_HEADER_SIZE) * 2) {
13168c2ecf20Sopenharmony_ci		log->use_multippl = true;
13178c2ecf20Sopenharmony_ci		set_bit(MD_HAS_MULTIPLE_PPLS,
13188c2ecf20Sopenharmony_ci			&log->ppl_conf->mddev->flags);
13198c2ecf20Sopenharmony_ci		log->entry_space = PPL_SPACE_SIZE;
13208c2ecf20Sopenharmony_ci	} else {
13218c2ecf20Sopenharmony_ci		log->use_multippl = false;
13228c2ecf20Sopenharmony_ci		log->entry_space = (log->rdev->ppl.size << 9) -
13238c2ecf20Sopenharmony_ci				   PPL_HEADER_SIZE;
13248c2ecf20Sopenharmony_ci	}
13258c2ecf20Sopenharmony_ci	log->next_io_sector = rdev->ppl.sector;
13268c2ecf20Sopenharmony_ci
13278c2ecf20Sopenharmony_ci	q = bdev_get_queue(rdev->bdev);
13288c2ecf20Sopenharmony_ci	if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
13298c2ecf20Sopenharmony_ci		log->wb_cache_on = true;
13308c2ecf20Sopenharmony_ci}
13318c2ecf20Sopenharmony_ci
13328c2ecf20Sopenharmony_ciint ppl_init_log(struct r5conf *conf)
13338c2ecf20Sopenharmony_ci{
13348c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf;
13358c2ecf20Sopenharmony_ci	struct mddev *mddev = conf->mddev;
13368c2ecf20Sopenharmony_ci	int ret = 0;
13378c2ecf20Sopenharmony_ci	int max_disks;
13388c2ecf20Sopenharmony_ci	int i;
13398c2ecf20Sopenharmony_ci
13408c2ecf20Sopenharmony_ci	pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n",
13418c2ecf20Sopenharmony_ci		 mdname(conf->mddev));
13428c2ecf20Sopenharmony_ci
13438c2ecf20Sopenharmony_ci	if (PAGE_SIZE != 4096)
13448c2ecf20Sopenharmony_ci		return -EINVAL;
13458c2ecf20Sopenharmony_ci
13468c2ecf20Sopenharmony_ci	if (mddev->level != 5) {
13478c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s PPL is not compatible with raid level %d\n",
13488c2ecf20Sopenharmony_ci			mdname(mddev), mddev->level);
13498c2ecf20Sopenharmony_ci		return -EINVAL;
13508c2ecf20Sopenharmony_ci	}
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	if (mddev->bitmap_info.file || mddev->bitmap_info.offset) {
13538c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s PPL is not compatible with bitmap\n",
13548c2ecf20Sopenharmony_ci			mdname(mddev));
13558c2ecf20Sopenharmony_ci		return -EINVAL;
13568c2ecf20Sopenharmony_ci	}
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_ci	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
13598c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s PPL is not compatible with journal\n",
13608c2ecf20Sopenharmony_ci			mdname(mddev));
13618c2ecf20Sopenharmony_ci		return -EINVAL;
13628c2ecf20Sopenharmony_ci	}
13638c2ecf20Sopenharmony_ci
13648c2ecf20Sopenharmony_ci	max_disks = sizeof_field(struct ppl_log, disk_flush_bitmap) *
13658c2ecf20Sopenharmony_ci		BITS_PER_BYTE;
13668c2ecf20Sopenharmony_ci	if (conf->raid_disks > max_disks) {
13678c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n",
13688c2ecf20Sopenharmony_ci			mdname(mddev), max_disks);
13698c2ecf20Sopenharmony_ci		return -EINVAL;
13708c2ecf20Sopenharmony_ci	}
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_ci	ppl_conf = kzalloc(sizeof(struct ppl_conf), GFP_KERNEL);
13738c2ecf20Sopenharmony_ci	if (!ppl_conf)
13748c2ecf20Sopenharmony_ci		return -ENOMEM;
13758c2ecf20Sopenharmony_ci
13768c2ecf20Sopenharmony_ci	ppl_conf->mddev = mddev;
13778c2ecf20Sopenharmony_ci
13788c2ecf20Sopenharmony_ci	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
13798c2ecf20Sopenharmony_ci	if (!ppl_conf->io_kc) {
13808c2ecf20Sopenharmony_ci		ret = -ENOMEM;
13818c2ecf20Sopenharmony_ci		goto err;
13828c2ecf20Sopenharmony_ci	}
13838c2ecf20Sopenharmony_ci
13848c2ecf20Sopenharmony_ci	ret = mempool_init(&ppl_conf->io_pool, conf->raid_disks, ppl_io_pool_alloc,
13858c2ecf20Sopenharmony_ci			   ppl_io_pool_free, ppl_conf->io_kc);
13868c2ecf20Sopenharmony_ci	if (ret)
13878c2ecf20Sopenharmony_ci		goto err;
13888c2ecf20Sopenharmony_ci
13898c2ecf20Sopenharmony_ci	ret = bioset_init(&ppl_conf->bs, conf->raid_disks, 0, BIOSET_NEED_BVECS);
13908c2ecf20Sopenharmony_ci	if (ret)
13918c2ecf20Sopenharmony_ci		goto err;
13928c2ecf20Sopenharmony_ci
13938c2ecf20Sopenharmony_ci	ret = bioset_init(&ppl_conf->flush_bs, conf->raid_disks, 0, 0);
13948c2ecf20Sopenharmony_ci	if (ret)
13958c2ecf20Sopenharmony_ci		goto err;
13968c2ecf20Sopenharmony_ci
13978c2ecf20Sopenharmony_ci	ppl_conf->count = conf->raid_disks;
13988c2ecf20Sopenharmony_ci	ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log),
13998c2ecf20Sopenharmony_ci				       GFP_KERNEL);
14008c2ecf20Sopenharmony_ci	if (!ppl_conf->child_logs) {
14018c2ecf20Sopenharmony_ci		ret = -ENOMEM;
14028c2ecf20Sopenharmony_ci		goto err;
14038c2ecf20Sopenharmony_ci	}
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_ci	atomic64_set(&ppl_conf->seq, 0);
14068c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
14078c2ecf20Sopenharmony_ci	spin_lock_init(&ppl_conf->no_mem_stripes_lock);
14088c2ecf20Sopenharmony_ci	ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET;
14098c2ecf20Sopenharmony_ci
14108c2ecf20Sopenharmony_ci	if (!mddev->external) {
14118c2ecf20Sopenharmony_ci		ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
14128c2ecf20Sopenharmony_ci		ppl_conf->block_size = 512;
14138c2ecf20Sopenharmony_ci	} else {
14148c2ecf20Sopenharmony_ci		ppl_conf->block_size = queue_logical_block_size(mddev->queue);
14158c2ecf20Sopenharmony_ci	}
14168c2ecf20Sopenharmony_ci
14178c2ecf20Sopenharmony_ci	for (i = 0; i < ppl_conf->count; i++) {
14188c2ecf20Sopenharmony_ci		struct ppl_log *log = &ppl_conf->child_logs[i];
14198c2ecf20Sopenharmony_ci		struct md_rdev *rdev = conf->disks[i].rdev;
14208c2ecf20Sopenharmony_ci
14218c2ecf20Sopenharmony_ci		mutex_init(&log->io_mutex);
14228c2ecf20Sopenharmony_ci		spin_lock_init(&log->io_list_lock);
14238c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&log->io_list);
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_ci		log->ppl_conf = ppl_conf;
14268c2ecf20Sopenharmony_ci		log->rdev = rdev;
14278c2ecf20Sopenharmony_ci
14288c2ecf20Sopenharmony_ci		if (rdev) {
14298c2ecf20Sopenharmony_ci			ret = ppl_validate_rdev(rdev);
14308c2ecf20Sopenharmony_ci			if (ret)
14318c2ecf20Sopenharmony_ci				goto err;
14328c2ecf20Sopenharmony_ci
14338c2ecf20Sopenharmony_ci			ppl_init_child_log(log, rdev);
14348c2ecf20Sopenharmony_ci		}
14358c2ecf20Sopenharmony_ci	}
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_ci	/* load and possibly recover the logs from the member disks */
14388c2ecf20Sopenharmony_ci	ret = ppl_load(ppl_conf);
14398c2ecf20Sopenharmony_ci
14408c2ecf20Sopenharmony_ci	if (ret) {
14418c2ecf20Sopenharmony_ci		goto err;
14428c2ecf20Sopenharmony_ci	} else if (!mddev->pers && mddev->recovery_cp == 0 &&
14438c2ecf20Sopenharmony_ci		   ppl_conf->recovered_entries > 0 &&
14448c2ecf20Sopenharmony_ci		   ppl_conf->mismatch_count == 0) {
14458c2ecf20Sopenharmony_ci		/*
14468c2ecf20Sopenharmony_ci		 * If we are starting a dirty array and the recovery succeeds
14478c2ecf20Sopenharmony_ci		 * without any issues, set the array as clean.
14488c2ecf20Sopenharmony_ci		 */
14498c2ecf20Sopenharmony_ci		mddev->recovery_cp = MaxSector;
14508c2ecf20Sopenharmony_ci		set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
14518c2ecf20Sopenharmony_ci	} else if (mddev->pers && ppl_conf->mismatch_count > 0) {
14528c2ecf20Sopenharmony_ci		/* no mismatch allowed when enabling PPL for a running array */
14538c2ecf20Sopenharmony_ci		ret = -EINVAL;
14548c2ecf20Sopenharmony_ci		goto err;
14558c2ecf20Sopenharmony_ci	}
14568c2ecf20Sopenharmony_ci
14578c2ecf20Sopenharmony_ci	conf->log_private = ppl_conf;
14588c2ecf20Sopenharmony_ci	set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
14598c2ecf20Sopenharmony_ci
14608c2ecf20Sopenharmony_ci	return 0;
14618c2ecf20Sopenharmony_cierr:
14628c2ecf20Sopenharmony_ci	__ppl_exit_log(ppl_conf);
14638c2ecf20Sopenharmony_ci	return ret;
14648c2ecf20Sopenharmony_ci}
14658c2ecf20Sopenharmony_ci
14668c2ecf20Sopenharmony_ciint ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
14678c2ecf20Sopenharmony_ci{
14688c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = conf->log_private;
14698c2ecf20Sopenharmony_ci	struct ppl_log *log;
14708c2ecf20Sopenharmony_ci	int ret = 0;
14718c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
14728c2ecf20Sopenharmony_ci
14738c2ecf20Sopenharmony_ci	if (!rdev)
14748c2ecf20Sopenharmony_ci		return -EINVAL;
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_ci	pr_debug("%s: disk: %d operation: %s dev: %s\n",
14778c2ecf20Sopenharmony_ci		 __func__, rdev->raid_disk, add ? "add" : "remove",
14788c2ecf20Sopenharmony_ci		 bdevname(rdev->bdev, b));
14798c2ecf20Sopenharmony_ci
14808c2ecf20Sopenharmony_ci	if (rdev->raid_disk < 0)
14818c2ecf20Sopenharmony_ci		return 0;
14828c2ecf20Sopenharmony_ci
14838c2ecf20Sopenharmony_ci	if (rdev->raid_disk >= ppl_conf->count)
14848c2ecf20Sopenharmony_ci		return -ENODEV;
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci	log = &ppl_conf->child_logs[rdev->raid_disk];
14878c2ecf20Sopenharmony_ci
14888c2ecf20Sopenharmony_ci	mutex_lock(&log->io_mutex);
14898c2ecf20Sopenharmony_ci	if (add) {
14908c2ecf20Sopenharmony_ci		ret = ppl_validate_rdev(rdev);
14918c2ecf20Sopenharmony_ci		if (!ret) {
14928c2ecf20Sopenharmony_ci			log->rdev = rdev;
14938c2ecf20Sopenharmony_ci			ret = ppl_write_empty_header(log);
14948c2ecf20Sopenharmony_ci			ppl_init_child_log(log, rdev);
14958c2ecf20Sopenharmony_ci		}
14968c2ecf20Sopenharmony_ci	} else {
14978c2ecf20Sopenharmony_ci		log->rdev = NULL;
14988c2ecf20Sopenharmony_ci	}
14998c2ecf20Sopenharmony_ci	mutex_unlock(&log->io_mutex);
15008c2ecf20Sopenharmony_ci
15018c2ecf20Sopenharmony_ci	return ret;
15028c2ecf20Sopenharmony_ci}
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_cistatic ssize_t
15058c2ecf20Sopenharmony_cippl_write_hint_show(struct mddev *mddev, char *buf)
15068c2ecf20Sopenharmony_ci{
15078c2ecf20Sopenharmony_ci	size_t ret = 0;
15088c2ecf20Sopenharmony_ci	struct r5conf *conf;
15098c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf = NULL;
15108c2ecf20Sopenharmony_ci
15118c2ecf20Sopenharmony_ci	spin_lock(&mddev->lock);
15128c2ecf20Sopenharmony_ci	conf = mddev->private;
15138c2ecf20Sopenharmony_ci	if (conf && raid5_has_ppl(conf))
15148c2ecf20Sopenharmony_ci		ppl_conf = conf->log_private;
15158c2ecf20Sopenharmony_ci	ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0);
15168c2ecf20Sopenharmony_ci	spin_unlock(&mddev->lock);
15178c2ecf20Sopenharmony_ci
15188c2ecf20Sopenharmony_ci	return ret;
15198c2ecf20Sopenharmony_ci}
15208c2ecf20Sopenharmony_ci
15218c2ecf20Sopenharmony_cistatic ssize_t
15228c2ecf20Sopenharmony_cippl_write_hint_store(struct mddev *mddev, const char *page, size_t len)
15238c2ecf20Sopenharmony_ci{
15248c2ecf20Sopenharmony_ci	struct r5conf *conf;
15258c2ecf20Sopenharmony_ci	struct ppl_conf *ppl_conf;
15268c2ecf20Sopenharmony_ci	int err = 0;
15278c2ecf20Sopenharmony_ci	unsigned short new;
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_ci	if (len >= PAGE_SIZE)
15308c2ecf20Sopenharmony_ci		return -EINVAL;
15318c2ecf20Sopenharmony_ci	if (kstrtou16(page, 10, &new))
15328c2ecf20Sopenharmony_ci		return -EINVAL;
15338c2ecf20Sopenharmony_ci
15348c2ecf20Sopenharmony_ci	err = mddev_lock(mddev);
15358c2ecf20Sopenharmony_ci	if (err)
15368c2ecf20Sopenharmony_ci		return err;
15378c2ecf20Sopenharmony_ci
15388c2ecf20Sopenharmony_ci	conf = mddev->private;
15398c2ecf20Sopenharmony_ci	if (!conf) {
15408c2ecf20Sopenharmony_ci		err = -ENODEV;
15418c2ecf20Sopenharmony_ci	} else if (raid5_has_ppl(conf)) {
15428c2ecf20Sopenharmony_ci		ppl_conf = conf->log_private;
15438c2ecf20Sopenharmony_ci		if (!ppl_conf)
15448c2ecf20Sopenharmony_ci			err = -EINVAL;
15458c2ecf20Sopenharmony_ci		else
15468c2ecf20Sopenharmony_ci			ppl_conf->write_hint = new;
15478c2ecf20Sopenharmony_ci	} else {
15488c2ecf20Sopenharmony_ci		err = -EINVAL;
15498c2ecf20Sopenharmony_ci	}
15508c2ecf20Sopenharmony_ci
15518c2ecf20Sopenharmony_ci	mddev_unlock(mddev);
15528c2ecf20Sopenharmony_ci
15538c2ecf20Sopenharmony_ci	return err ?: len;
15548c2ecf20Sopenharmony_ci}
15558c2ecf20Sopenharmony_ci
15568c2ecf20Sopenharmony_cistruct md_sysfs_entry
15578c2ecf20Sopenharmony_cippl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR,
15588c2ecf20Sopenharmony_ci			ppl_write_hint_show,
15598c2ecf20Sopenharmony_ci			ppl_write_hint_store);
1560