162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Partial Parity Log for closing the RAID5 write hole 462306a36Sopenharmony_ci * Copyright (c) 2017, Intel Corporation. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include <linux/kernel.h> 862306a36Sopenharmony_ci#include <linux/blkdev.h> 962306a36Sopenharmony_ci#include <linux/slab.h> 1062306a36Sopenharmony_ci#include <linux/crc32c.h> 1162306a36Sopenharmony_ci#include <linux/async_tx.h> 1262306a36Sopenharmony_ci#include <linux/raid/md_p.h> 1362306a36Sopenharmony_ci#include "md.h" 1462306a36Sopenharmony_ci#include "raid5.h" 1562306a36Sopenharmony_ci#include "raid5-log.h" 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci/* 1862306a36Sopenharmony_ci * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for 1962306a36Sopenharmony_ci * partial parity data. The header contains an array of entries 2062306a36Sopenharmony_ci * (struct ppl_header_entry) which describe the logged write requests. 2162306a36Sopenharmony_ci * Partial parity for the entries comes after the header, written in the same 2262306a36Sopenharmony_ci * sequence as the entries: 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * Header 2562306a36Sopenharmony_ci * entry0 2662306a36Sopenharmony_ci * ... 2762306a36Sopenharmony_ci * entryN 2862306a36Sopenharmony_ci * PP data 2962306a36Sopenharmony_ci * PP for entry0 3062306a36Sopenharmony_ci * ... 3162306a36Sopenharmony_ci * PP for entryN 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * An entry describes one or more consecutive stripe_heads, up to a full 3462306a36Sopenharmony_ci * stripe. The modifed raid data chunks form an m-by-n matrix, where m is the 3562306a36Sopenharmony_ci * number of stripe_heads in the entry and n is the number of modified data 3662306a36Sopenharmony_ci * disks. Every stripe_head in the entry must write to the same data disks. 3762306a36Sopenharmony_ci * An example of a valid case described by a single entry (writes to the first 3862306a36Sopenharmony_ci * stripe of a 4 disk array, 16k chunk size): 3962306a36Sopenharmony_ci * 4062306a36Sopenharmony_ci * sh->sector dd0 dd1 dd2 ppl 4162306a36Sopenharmony_ci * +-----+-----+-----+ 4262306a36Sopenharmony_ci * 0 | --- | --- | --- | +----+ 4362306a36Sopenharmony_ci * 8 | -W- | -W- | --- | | pp | data_sector = 8 4462306a36Sopenharmony_ci * 16 | -W- | -W- | --- | | pp | data_size = 3 * 2 * 4k 4562306a36Sopenharmony_ci * 24 | -W- | -W- | --- | | pp | pp_size = 3 * 4k 4662306a36Sopenharmony_ci * +-----+-----+-----+ +----+ 4762306a36Sopenharmony_ci * 4862306a36Sopenharmony_ci * data_sector is the first raid sector of the modified data, data_size is the 4962306a36Sopenharmony_ci * total size of modified data and pp_size is the size of partial parity for 5062306a36Sopenharmony_ci * this entry. Entries for full stripe writes contain no partial parity 5162306a36Sopenharmony_ci * (pp_size = 0), they only mark the stripes for which parity should be 5262306a36Sopenharmony_ci * recalculated after an unclean shutdown. Every entry holds a checksum of its 5362306a36Sopenharmony_ci * partial parity, the header also has a checksum of the header itself. 5462306a36Sopenharmony_ci * 5562306a36Sopenharmony_ci * A write request is always logged to the PPL instance stored on the parity 5662306a36Sopenharmony_ci * disk of the corresponding stripe. For each member disk there is one ppl_log 5762306a36Sopenharmony_ci * used to handle logging for this disk, independently from others. They are 5862306a36Sopenharmony_ci * grouped in child_logs array in struct ppl_conf, which is assigned to 5962306a36Sopenharmony_ci * r5conf->log_private. 6062306a36Sopenharmony_ci * 6162306a36Sopenharmony_ci * ppl_io_unit represents a full PPL write, header_page contains the ppl_header. 6262306a36Sopenharmony_ci * PPL entries for logged stripes are added in ppl_log_stripe(). A stripe_head 6362306a36Sopenharmony_ci * can be appended to the last entry if it meets the conditions for a valid 6462306a36Sopenharmony_ci * entry described above, otherwise a new entry is added. Checksums of entries 6562306a36Sopenharmony_ci * are calculated incrementally as stripes containing partial parity are being 6662306a36Sopenharmony_ci * added. ppl_submit_iounit() calculates the checksum of the header and submits 6762306a36Sopenharmony_ci * a bio containing the header page and partial parity pages (sh->ppl_page) for 6862306a36Sopenharmony_ci * all stripes of the io_unit. When the PPL write completes, the stripes 6962306a36Sopenharmony_ci * associated with the io_unit are released and raid5d starts writing their data 7062306a36Sopenharmony_ci * and parity. When all stripes are written, the io_unit is freed and the next 7162306a36Sopenharmony_ci * can be submitted. 7262306a36Sopenharmony_ci * 7362306a36Sopenharmony_ci * An io_unit is used to gather stripes until it is submitted or becomes full 7462306a36Sopenharmony_ci * (if the maximum number of entries or size of PPL is reached). Another io_unit 7562306a36Sopenharmony_ci * can't be submitted until the previous has completed (PPL and stripe 7662306a36Sopenharmony_ci * data+parity is written). The log->io_list tracks all io_units of a log 7762306a36Sopenharmony_ci * (for a single member disk). New io_units are added to the end of the list 7862306a36Sopenharmony_ci * and the first io_unit is submitted, if it is not submitted already. 7962306a36Sopenharmony_ci * The current io_unit accepting new stripes is always at the end of the list. 8062306a36Sopenharmony_ci * 8162306a36Sopenharmony_ci * If write-back cache is enabled for any of the disks in the array, its data 8262306a36Sopenharmony_ci * must be flushed before next io_unit is submitted. 8362306a36Sopenharmony_ci */ 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci#define PPL_SPACE_SIZE (128 * 1024) 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cistruct ppl_conf { 8862306a36Sopenharmony_ci struct mddev *mddev; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci /* array of child logs, one for each raid disk */ 9162306a36Sopenharmony_ci struct ppl_log *child_logs; 9262306a36Sopenharmony_ci int count; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci int block_size; /* the logical block size used for data_sector 9562306a36Sopenharmony_ci * in ppl_header_entry */ 9662306a36Sopenharmony_ci u32 signature; /* raid array identifier */ 9762306a36Sopenharmony_ci atomic64_t seq; /* current log write sequence number */ 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci struct kmem_cache *io_kc; 10062306a36Sopenharmony_ci mempool_t io_pool; 10162306a36Sopenharmony_ci struct bio_set bs; 10262306a36Sopenharmony_ci struct bio_set flush_bs; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci /* used only for recovery */ 10562306a36Sopenharmony_ci int recovered_entries; 10662306a36Sopenharmony_ci int mismatch_count; 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci /* stripes to retry if failed to allocate io_unit */ 10962306a36Sopenharmony_ci struct list_head no_mem_stripes; 11062306a36Sopenharmony_ci spinlock_t no_mem_stripes_lock; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci unsigned short write_hint; 11362306a36Sopenharmony_ci}; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_cistruct ppl_log { 11662306a36Sopenharmony_ci struct ppl_conf *ppl_conf; /* shared between all log instances */ 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci struct md_rdev *rdev; /* array member disk associated with 11962306a36Sopenharmony_ci * this log instance */ 12062306a36Sopenharmony_ci struct mutex io_mutex; 12162306a36Sopenharmony_ci struct ppl_io_unit *current_io; /* current io_unit accepting new data 12262306a36Sopenharmony_ci * always at the end of io_list */ 12362306a36Sopenharmony_ci spinlock_t io_list_lock; 12462306a36Sopenharmony_ci struct list_head io_list; /* all io_units of this log */ 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci sector_t next_io_sector; 12762306a36Sopenharmony_ci unsigned int entry_space; 12862306a36Sopenharmony_ci bool use_multippl; 12962306a36Sopenharmony_ci bool wb_cache_on; 13062306a36Sopenharmony_ci unsigned long disk_flush_bitmap; 13162306a36Sopenharmony_ci}; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci#define PPL_IO_INLINE_BVECS 32 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cistruct ppl_io_unit { 13662306a36Sopenharmony_ci struct ppl_log *log; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci struct page *header_page; /* for ppl_header */ 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci unsigned int entries_count; /* number of entries in ppl_header */ 14162306a36Sopenharmony_ci unsigned int pp_size; /* total size current of partial parity */ 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci u64 seq; /* sequence number of this log write */ 14462306a36Sopenharmony_ci struct list_head log_sibling; /* log->io_list */ 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci struct list_head stripe_list; /* stripes added to the io_unit */ 14762306a36Sopenharmony_ci atomic_t pending_stripes; /* how many stripes not written to raid */ 14862306a36Sopenharmony_ci atomic_t pending_flushes; /* how many disk flushes are in progress */ 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci bool submitted; /* true if write to log started */ 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci /* inline bio and its biovec for submitting the iounit */ 15362306a36Sopenharmony_ci struct bio bio; 15462306a36Sopenharmony_ci struct bio_vec biovec[PPL_IO_INLINE_BVECS]; 15562306a36Sopenharmony_ci}; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_cistruct dma_async_tx_descriptor * 15862306a36Sopenharmony_ciops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu, 15962306a36Sopenharmony_ci struct dma_async_tx_descriptor *tx) 16062306a36Sopenharmony_ci{ 16162306a36Sopenharmony_ci int disks = sh->disks; 16262306a36Sopenharmony_ci struct page **srcs = percpu->scribble; 16362306a36Sopenharmony_ci int count = 0, pd_idx = sh->pd_idx, i; 16462306a36Sopenharmony_ci struct async_submit_ctl submit; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci /* 16962306a36Sopenharmony_ci * Partial parity is the XOR of stripe data chunks that are not changed 17062306a36Sopenharmony_ci * during the write request. Depending on available data 17162306a36Sopenharmony_ci * (read-modify-write vs. reconstruct-write case) we calculate it 17262306a36Sopenharmony_ci * differently. 17362306a36Sopenharmony_ci */ 17462306a36Sopenharmony_ci if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { 17562306a36Sopenharmony_ci /* 17662306a36Sopenharmony_ci * rmw: xor old data and parity from updated disks 17762306a36Sopenharmony_ci * This is calculated earlier by ops_run_prexor5() so just copy 17862306a36Sopenharmony_ci * the parity dev page. 17962306a36Sopenharmony_ci */ 18062306a36Sopenharmony_ci srcs[count++] = sh->dev[pd_idx].page; 18162306a36Sopenharmony_ci } else if (sh->reconstruct_state == reconstruct_state_drain_run) { 18262306a36Sopenharmony_ci /* rcw: xor data from all not updated disks */ 18362306a36Sopenharmony_ci for (i = disks; i--;) { 18462306a36Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 18562306a36Sopenharmony_ci if (test_bit(R5_UPTODATE, &dev->flags)) 18662306a36Sopenharmony_ci srcs[count++] = dev->page; 18762306a36Sopenharmony_ci } 18862306a36Sopenharmony_ci } else { 18962306a36Sopenharmony_ci return tx; 19062306a36Sopenharmony_ci } 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx, 19362306a36Sopenharmony_ci NULL, sh, (void *) (srcs + sh->disks + 2)); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci if (count == 1) 19662306a36Sopenharmony_ci tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE, 19762306a36Sopenharmony_ci &submit); 19862306a36Sopenharmony_ci else 19962306a36Sopenharmony_ci tx = async_xor(sh->ppl_page, srcs, 0, count, PAGE_SIZE, 20062306a36Sopenharmony_ci &submit); 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci return tx; 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_cistatic void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data) 20662306a36Sopenharmony_ci{ 20762306a36Sopenharmony_ci struct kmem_cache *kc = pool_data; 20862306a36Sopenharmony_ci struct ppl_io_unit *io; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci io = kmem_cache_alloc(kc, gfp_mask); 21162306a36Sopenharmony_ci if (!io) 21262306a36Sopenharmony_ci return NULL; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci io->header_page = alloc_page(gfp_mask); 21562306a36Sopenharmony_ci if (!io->header_page) { 21662306a36Sopenharmony_ci kmem_cache_free(kc, io); 21762306a36Sopenharmony_ci return NULL; 21862306a36Sopenharmony_ci } 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci return io; 22162306a36Sopenharmony_ci} 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_cistatic void ppl_io_pool_free(void *element, void *pool_data) 22462306a36Sopenharmony_ci{ 22562306a36Sopenharmony_ci struct kmem_cache *kc = pool_data; 22662306a36Sopenharmony_ci struct ppl_io_unit *io = element; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci __free_page(io->header_page); 22962306a36Sopenharmony_ci kmem_cache_free(kc, io); 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_cistatic struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, 23362306a36Sopenharmony_ci struct stripe_head *sh) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 23662306a36Sopenharmony_ci struct ppl_io_unit *io; 23762306a36Sopenharmony_ci struct ppl_header *pplhdr; 23862306a36Sopenharmony_ci struct page *header_page; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci io = mempool_alloc(&ppl_conf->io_pool, GFP_NOWAIT); 24162306a36Sopenharmony_ci if (!io) 24262306a36Sopenharmony_ci return NULL; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci header_page = io->header_page; 24562306a36Sopenharmony_ci memset(io, 0, sizeof(*io)); 24662306a36Sopenharmony_ci io->header_page = header_page; 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci io->log = log; 24962306a36Sopenharmony_ci INIT_LIST_HEAD(&io->log_sibling); 25062306a36Sopenharmony_ci INIT_LIST_HEAD(&io->stripe_list); 25162306a36Sopenharmony_ci atomic_set(&io->pending_stripes, 0); 25262306a36Sopenharmony_ci atomic_set(&io->pending_flushes, 0); 25362306a36Sopenharmony_ci bio_init(&io->bio, log->rdev->bdev, io->biovec, PPL_IO_INLINE_BVECS, 25462306a36Sopenharmony_ci REQ_OP_WRITE | REQ_FUA); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci pplhdr = page_address(io->header_page); 25762306a36Sopenharmony_ci clear_page(pplhdr); 25862306a36Sopenharmony_ci memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); 25962306a36Sopenharmony_ci pplhdr->signature = cpu_to_le32(ppl_conf->signature); 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci io->seq = atomic64_add_return(1, &ppl_conf->seq); 26262306a36Sopenharmony_ci pplhdr->generation = cpu_to_le64(io->seq); 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci return io; 26562306a36Sopenharmony_ci} 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_cistatic int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh) 26862306a36Sopenharmony_ci{ 26962306a36Sopenharmony_ci struct ppl_io_unit *io = log->current_io; 27062306a36Sopenharmony_ci struct ppl_header_entry *e = NULL; 27162306a36Sopenharmony_ci struct ppl_header *pplhdr; 27262306a36Sopenharmony_ci int i; 27362306a36Sopenharmony_ci sector_t data_sector = 0; 27462306a36Sopenharmony_ci int data_disks = 0; 27562306a36Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector); 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci /* check if current io_unit is full */ 28062306a36Sopenharmony_ci if (io && (io->pp_size == log->entry_space || 28162306a36Sopenharmony_ci io->entries_count == PPL_HDR_MAX_ENTRIES)) { 28262306a36Sopenharmony_ci pr_debug("%s: add io_unit blocked by seq: %llu\n", 28362306a36Sopenharmony_ci __func__, io->seq); 28462306a36Sopenharmony_ci io = NULL; 28562306a36Sopenharmony_ci } 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci /* add a new unit if there is none or the current is full */ 28862306a36Sopenharmony_ci if (!io) { 28962306a36Sopenharmony_ci io = ppl_new_iounit(log, sh); 29062306a36Sopenharmony_ci if (!io) 29162306a36Sopenharmony_ci return -ENOMEM; 29262306a36Sopenharmony_ci spin_lock_irq(&log->io_list_lock); 29362306a36Sopenharmony_ci list_add_tail(&io->log_sibling, &log->io_list); 29462306a36Sopenharmony_ci spin_unlock_irq(&log->io_list_lock); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci log->current_io = io; 29762306a36Sopenharmony_ci } 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci for (i = 0; i < sh->disks; i++) { 30062306a36Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci if (i != sh->pd_idx && test_bit(R5_Wantwrite, &dev->flags)) { 30362306a36Sopenharmony_ci if (!data_disks || dev->sector < data_sector) 30462306a36Sopenharmony_ci data_sector = dev->sector; 30562306a36Sopenharmony_ci data_disks++; 30662306a36Sopenharmony_ci } 30762306a36Sopenharmony_ci } 30862306a36Sopenharmony_ci BUG_ON(!data_disks); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci pr_debug("%s: seq: %llu data_sector: %llu data_disks: %d\n", __func__, 31162306a36Sopenharmony_ci io->seq, (unsigned long long)data_sector, data_disks); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci pplhdr = page_address(io->header_page); 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci if (io->entries_count > 0) { 31662306a36Sopenharmony_ci struct ppl_header_entry *last = 31762306a36Sopenharmony_ci &pplhdr->entries[io->entries_count - 1]; 31862306a36Sopenharmony_ci struct stripe_head *sh_last = list_last_entry( 31962306a36Sopenharmony_ci &io->stripe_list, struct stripe_head, log_list); 32062306a36Sopenharmony_ci u64 data_sector_last = le64_to_cpu(last->data_sector); 32162306a36Sopenharmony_ci u32 data_size_last = le32_to_cpu(last->data_size); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci /* 32462306a36Sopenharmony_ci * Check if we can append the stripe to the last entry. It must 32562306a36Sopenharmony_ci * be just after the last logged stripe and write to the same 32662306a36Sopenharmony_ci * disks. Use bit shift and logarithm to avoid 64-bit division. 32762306a36Sopenharmony_ci */ 32862306a36Sopenharmony_ci if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) && 32962306a36Sopenharmony_ci (data_sector >> ilog2(conf->chunk_sectors) == 33062306a36Sopenharmony_ci data_sector_last >> ilog2(conf->chunk_sectors)) && 33162306a36Sopenharmony_ci ((data_sector - data_sector_last) * data_disks == 33262306a36Sopenharmony_ci data_size_last >> 9)) 33362306a36Sopenharmony_ci e = last; 33462306a36Sopenharmony_ci } 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci if (!e) { 33762306a36Sopenharmony_ci e = &pplhdr->entries[io->entries_count++]; 33862306a36Sopenharmony_ci e->data_sector = cpu_to_le64(data_sector); 33962306a36Sopenharmony_ci e->parity_disk = cpu_to_le32(sh->pd_idx); 34062306a36Sopenharmony_ci e->checksum = cpu_to_le32(~0); 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci le32_add_cpu(&e->data_size, data_disks << PAGE_SHIFT); 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci /* don't write any PP if full stripe write */ 34662306a36Sopenharmony_ci if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) { 34762306a36Sopenharmony_ci le32_add_cpu(&e->pp_size, PAGE_SIZE); 34862306a36Sopenharmony_ci io->pp_size += PAGE_SIZE; 34962306a36Sopenharmony_ci e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum), 35062306a36Sopenharmony_ci page_address(sh->ppl_page), 35162306a36Sopenharmony_ci PAGE_SIZE)); 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci list_add_tail(&sh->log_list, &io->stripe_list); 35562306a36Sopenharmony_ci atomic_inc(&io->pending_stripes); 35662306a36Sopenharmony_ci sh->ppl_io = io; 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci return 0; 35962306a36Sopenharmony_ci} 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ciint ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh) 36262306a36Sopenharmony_ci{ 36362306a36Sopenharmony_ci struct ppl_conf *ppl_conf = conf->log_private; 36462306a36Sopenharmony_ci struct ppl_io_unit *io = sh->ppl_io; 36562306a36Sopenharmony_ci struct ppl_log *log; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page || 36862306a36Sopenharmony_ci !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) || 36962306a36Sopenharmony_ci !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) { 37062306a36Sopenharmony_ci clear_bit(STRIPE_LOG_TRAPPED, &sh->state); 37162306a36Sopenharmony_ci return -EAGAIN; 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci log = &ppl_conf->child_logs[sh->pd_idx]; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci mutex_lock(&log->io_mutex); 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) { 37962306a36Sopenharmony_ci mutex_unlock(&log->io_mutex); 38062306a36Sopenharmony_ci return -EAGAIN; 38162306a36Sopenharmony_ci } 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci set_bit(STRIPE_LOG_TRAPPED, &sh->state); 38462306a36Sopenharmony_ci clear_bit(STRIPE_DELAYED, &sh->state); 38562306a36Sopenharmony_ci atomic_inc(&sh->count); 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci if (ppl_log_stripe(log, sh)) { 38862306a36Sopenharmony_ci spin_lock_irq(&ppl_conf->no_mem_stripes_lock); 38962306a36Sopenharmony_ci list_add_tail(&sh->log_list, &ppl_conf->no_mem_stripes); 39062306a36Sopenharmony_ci spin_unlock_irq(&ppl_conf->no_mem_stripes_lock); 39162306a36Sopenharmony_ci } 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci mutex_unlock(&log->io_mutex); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci return 0; 39662306a36Sopenharmony_ci} 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_cistatic void ppl_log_endio(struct bio *bio) 39962306a36Sopenharmony_ci{ 40062306a36Sopenharmony_ci struct ppl_io_unit *io = bio->bi_private; 40162306a36Sopenharmony_ci struct ppl_log *log = io->log; 40262306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 40362306a36Sopenharmony_ci struct stripe_head *sh, *next; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci pr_debug("%s: seq: %llu\n", __func__, io->seq); 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci if (bio->bi_status) 40862306a36Sopenharmony_ci md_error(ppl_conf->mddev, log->rdev); 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { 41162306a36Sopenharmony_ci list_del_init(&sh->log_list); 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 41462306a36Sopenharmony_ci raid5_release_stripe(sh); 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_cistatic void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci pr_debug("%s: seq: %llu size: %u sector: %llu dev: %pg\n", 42162306a36Sopenharmony_ci __func__, io->seq, bio->bi_iter.bi_size, 42262306a36Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector, 42362306a36Sopenharmony_ci bio->bi_bdev); 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci submit_bio(bio); 42662306a36Sopenharmony_ci} 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_cistatic void ppl_submit_iounit(struct ppl_io_unit *io) 42962306a36Sopenharmony_ci{ 43062306a36Sopenharmony_ci struct ppl_log *log = io->log; 43162306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 43262306a36Sopenharmony_ci struct ppl_header *pplhdr = page_address(io->header_page); 43362306a36Sopenharmony_ci struct bio *bio = &io->bio; 43462306a36Sopenharmony_ci struct stripe_head *sh; 43562306a36Sopenharmony_ci int i; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci bio->bi_private = io; 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) { 44062306a36Sopenharmony_ci ppl_log_endio(bio); 44162306a36Sopenharmony_ci return; 44262306a36Sopenharmony_ci } 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci for (i = 0; i < io->entries_count; i++) { 44562306a36Sopenharmony_ci struct ppl_header_entry *e = &pplhdr->entries[i]; 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci pr_debug("%s: seq: %llu entry: %d data_sector: %llu pp_size: %u data_size: %u\n", 44862306a36Sopenharmony_ci __func__, io->seq, i, le64_to_cpu(e->data_sector), 44962306a36Sopenharmony_ci le32_to_cpu(e->pp_size), le32_to_cpu(e->data_size)); 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci e->data_sector = cpu_to_le64(le64_to_cpu(e->data_sector) >> 45262306a36Sopenharmony_ci ilog2(ppl_conf->block_size >> 9)); 45362306a36Sopenharmony_ci e->checksum = cpu_to_le32(~le32_to_cpu(e->checksum)); 45462306a36Sopenharmony_ci } 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci pplhdr->entries_count = cpu_to_le32(io->entries_count); 45762306a36Sopenharmony_ci pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE)); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci /* Rewind the buffer if current PPL is larger then remaining space */ 46062306a36Sopenharmony_ci if (log->use_multippl && 46162306a36Sopenharmony_ci log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector < 46262306a36Sopenharmony_ci (PPL_HEADER_SIZE + io->pp_size) >> 9) 46362306a36Sopenharmony_ci log->next_io_sector = log->rdev->ppl.sector; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci bio->bi_end_io = ppl_log_endio; 46762306a36Sopenharmony_ci bio->bi_iter.bi_sector = log->next_io_sector; 46862306a36Sopenharmony_ci __bio_add_page(bio, io->header_page, PAGE_SIZE, 0); 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci pr_debug("%s: log->current_io_sector: %llu\n", __func__, 47162306a36Sopenharmony_ci (unsigned long long)log->next_io_sector); 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci if (log->use_multippl) 47462306a36Sopenharmony_ci log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9; 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci WARN_ON(log->disk_flush_bitmap != 0); 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci list_for_each_entry(sh, &io->stripe_list, log_list) { 47962306a36Sopenharmony_ci for (i = 0; i < sh->disks; i++) { 48062306a36Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci if ((ppl_conf->child_logs[i].wb_cache_on) && 48362306a36Sopenharmony_ci (test_bit(R5_Wantwrite, &dev->flags))) { 48462306a36Sopenharmony_ci set_bit(i, &log->disk_flush_bitmap); 48562306a36Sopenharmony_ci } 48662306a36Sopenharmony_ci } 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci /* entries for full stripe writes have no partial parity */ 48962306a36Sopenharmony_ci if (test_bit(STRIPE_FULL_WRITE, &sh->state)) 49062306a36Sopenharmony_ci continue; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) { 49362306a36Sopenharmony_ci struct bio *prev = bio; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci bio = bio_alloc_bioset(prev->bi_bdev, BIO_MAX_VECS, 49662306a36Sopenharmony_ci prev->bi_opf, GFP_NOIO, 49762306a36Sopenharmony_ci &ppl_conf->bs); 49862306a36Sopenharmony_ci bio->bi_iter.bi_sector = bio_end_sector(prev); 49962306a36Sopenharmony_ci __bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci bio_chain(bio, prev); 50262306a36Sopenharmony_ci ppl_submit_iounit_bio(io, prev); 50362306a36Sopenharmony_ci } 50462306a36Sopenharmony_ci } 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci ppl_submit_iounit_bio(io, bio); 50762306a36Sopenharmony_ci} 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_cistatic void ppl_submit_current_io(struct ppl_log *log) 51062306a36Sopenharmony_ci{ 51162306a36Sopenharmony_ci struct ppl_io_unit *io; 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci spin_lock_irq(&log->io_list_lock); 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit, 51662306a36Sopenharmony_ci log_sibling); 51762306a36Sopenharmony_ci if (io && io->submitted) 51862306a36Sopenharmony_ci io = NULL; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci spin_unlock_irq(&log->io_list_lock); 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci if (io) { 52362306a36Sopenharmony_ci io->submitted = true; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci if (io == log->current_io) 52662306a36Sopenharmony_ci log->current_io = NULL; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci ppl_submit_iounit(io); 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci} 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_civoid ppl_write_stripe_run(struct r5conf *conf) 53362306a36Sopenharmony_ci{ 53462306a36Sopenharmony_ci struct ppl_conf *ppl_conf = conf->log_private; 53562306a36Sopenharmony_ci struct ppl_log *log; 53662306a36Sopenharmony_ci int i; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci for (i = 0; i < ppl_conf->count; i++) { 53962306a36Sopenharmony_ci log = &ppl_conf->child_logs[i]; 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci mutex_lock(&log->io_mutex); 54262306a36Sopenharmony_ci ppl_submit_current_io(log); 54362306a36Sopenharmony_ci mutex_unlock(&log->io_mutex); 54462306a36Sopenharmony_ci } 54562306a36Sopenharmony_ci} 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_cistatic void ppl_io_unit_finished(struct ppl_io_unit *io) 54862306a36Sopenharmony_ci{ 54962306a36Sopenharmony_ci struct ppl_log *log = io->log; 55062306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 55162306a36Sopenharmony_ci struct r5conf *conf = ppl_conf->mddev->private; 55262306a36Sopenharmony_ci unsigned long flags; 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci pr_debug("%s: seq: %llu\n", __func__, io->seq); 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci local_irq_save(flags); 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci spin_lock(&log->io_list_lock); 55962306a36Sopenharmony_ci list_del(&io->log_sibling); 56062306a36Sopenharmony_ci spin_unlock(&log->io_list_lock); 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci mempool_free(io, &ppl_conf->io_pool); 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci spin_lock(&ppl_conf->no_mem_stripes_lock); 56562306a36Sopenharmony_ci if (!list_empty(&ppl_conf->no_mem_stripes)) { 56662306a36Sopenharmony_ci struct stripe_head *sh; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci sh = list_first_entry(&ppl_conf->no_mem_stripes, 56962306a36Sopenharmony_ci struct stripe_head, log_list); 57062306a36Sopenharmony_ci list_del_init(&sh->log_list); 57162306a36Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 57262306a36Sopenharmony_ci raid5_release_stripe(sh); 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci spin_unlock(&ppl_conf->no_mem_stripes_lock); 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci local_irq_restore(flags); 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci wake_up(&conf->wait_for_quiescent); 57962306a36Sopenharmony_ci} 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_cistatic void ppl_flush_endio(struct bio *bio) 58262306a36Sopenharmony_ci{ 58362306a36Sopenharmony_ci struct ppl_io_unit *io = bio->bi_private; 58462306a36Sopenharmony_ci struct ppl_log *log = io->log; 58562306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 58662306a36Sopenharmony_ci struct r5conf *conf = ppl_conf->mddev->private; 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci pr_debug("%s: dev: %pg\n", __func__, bio->bi_bdev); 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci if (bio->bi_status) { 59162306a36Sopenharmony_ci struct md_rdev *rdev; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci rcu_read_lock(); 59462306a36Sopenharmony_ci rdev = md_find_rdev_rcu(conf->mddev, bio_dev(bio)); 59562306a36Sopenharmony_ci if (rdev) 59662306a36Sopenharmony_ci md_error(rdev->mddev, rdev); 59762306a36Sopenharmony_ci rcu_read_unlock(); 59862306a36Sopenharmony_ci } 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci bio_put(bio); 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci if (atomic_dec_and_test(&io->pending_flushes)) { 60362306a36Sopenharmony_ci ppl_io_unit_finished(io); 60462306a36Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 60562306a36Sopenharmony_ci } 60662306a36Sopenharmony_ci} 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_cistatic void ppl_do_flush(struct ppl_io_unit *io) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci struct ppl_log *log = io->log; 61162306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 61262306a36Sopenharmony_ci struct r5conf *conf = ppl_conf->mddev->private; 61362306a36Sopenharmony_ci int raid_disks = conf->raid_disks; 61462306a36Sopenharmony_ci int flushed_disks = 0; 61562306a36Sopenharmony_ci int i; 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci atomic_set(&io->pending_flushes, raid_disks); 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci for_each_set_bit(i, &log->disk_flush_bitmap, raid_disks) { 62062306a36Sopenharmony_ci struct md_rdev *rdev; 62162306a36Sopenharmony_ci struct block_device *bdev = NULL; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci rcu_read_lock(); 62462306a36Sopenharmony_ci rdev = rcu_dereference(conf->disks[i].rdev); 62562306a36Sopenharmony_ci if (rdev && !test_bit(Faulty, &rdev->flags)) 62662306a36Sopenharmony_ci bdev = rdev->bdev; 62762306a36Sopenharmony_ci rcu_read_unlock(); 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci if (bdev) { 63062306a36Sopenharmony_ci struct bio *bio; 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci bio = bio_alloc_bioset(bdev, 0, 63362306a36Sopenharmony_ci REQ_OP_WRITE | REQ_PREFLUSH, 63462306a36Sopenharmony_ci GFP_NOIO, &ppl_conf->flush_bs); 63562306a36Sopenharmony_ci bio->bi_private = io; 63662306a36Sopenharmony_ci bio->bi_end_io = ppl_flush_endio; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci pr_debug("%s: dev: %ps\n", __func__, bio->bi_bdev); 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci submit_bio(bio); 64162306a36Sopenharmony_ci flushed_disks++; 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci } 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci log->disk_flush_bitmap = 0; 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci for (i = flushed_disks ; i < raid_disks; i++) { 64862306a36Sopenharmony_ci if (atomic_dec_and_test(&io->pending_flushes)) 64962306a36Sopenharmony_ci ppl_io_unit_finished(io); 65062306a36Sopenharmony_ci } 65162306a36Sopenharmony_ci} 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_cistatic inline bool ppl_no_io_unit_submitted(struct r5conf *conf, 65462306a36Sopenharmony_ci struct ppl_log *log) 65562306a36Sopenharmony_ci{ 65662306a36Sopenharmony_ci struct ppl_io_unit *io; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit, 65962306a36Sopenharmony_ci log_sibling); 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci return !io || !io->submitted; 66262306a36Sopenharmony_ci} 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_civoid ppl_quiesce(struct r5conf *conf, int quiesce) 66562306a36Sopenharmony_ci{ 66662306a36Sopenharmony_ci struct ppl_conf *ppl_conf = conf->log_private; 66762306a36Sopenharmony_ci int i; 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci if (quiesce) { 67062306a36Sopenharmony_ci for (i = 0; i < ppl_conf->count; i++) { 67162306a36Sopenharmony_ci struct ppl_log *log = &ppl_conf->child_logs[i]; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci spin_lock_irq(&log->io_list_lock); 67462306a36Sopenharmony_ci wait_event_lock_irq(conf->wait_for_quiescent, 67562306a36Sopenharmony_ci ppl_no_io_unit_submitted(conf, log), 67662306a36Sopenharmony_ci log->io_list_lock); 67762306a36Sopenharmony_ci spin_unlock_irq(&log->io_list_lock); 67862306a36Sopenharmony_ci } 67962306a36Sopenharmony_ci } 68062306a36Sopenharmony_ci} 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ciint ppl_handle_flush_request(struct bio *bio) 68362306a36Sopenharmony_ci{ 68462306a36Sopenharmony_ci if (bio->bi_iter.bi_size == 0) { 68562306a36Sopenharmony_ci bio_endio(bio); 68662306a36Sopenharmony_ci return 0; 68762306a36Sopenharmony_ci } 68862306a36Sopenharmony_ci bio->bi_opf &= ~REQ_PREFLUSH; 68962306a36Sopenharmony_ci return -EAGAIN; 69062306a36Sopenharmony_ci} 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_civoid ppl_stripe_write_finished(struct stripe_head *sh) 69362306a36Sopenharmony_ci{ 69462306a36Sopenharmony_ci struct ppl_io_unit *io; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci io = sh->ppl_io; 69762306a36Sopenharmony_ci sh->ppl_io = NULL; 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci if (io && atomic_dec_and_test(&io->pending_stripes)) { 70062306a36Sopenharmony_ci if (io->log->disk_flush_bitmap) 70162306a36Sopenharmony_ci ppl_do_flush(io); 70262306a36Sopenharmony_ci else 70362306a36Sopenharmony_ci ppl_io_unit_finished(io); 70462306a36Sopenharmony_ci } 70562306a36Sopenharmony_ci} 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_cistatic void ppl_xor(int size, struct page *page1, struct page *page2) 70862306a36Sopenharmony_ci{ 70962306a36Sopenharmony_ci struct async_submit_ctl submit; 71062306a36Sopenharmony_ci struct dma_async_tx_descriptor *tx; 71162306a36Sopenharmony_ci struct page *xor_srcs[] = { page1, page2 }; 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_ACK|ASYNC_TX_XOR_DROP_DST, 71462306a36Sopenharmony_ci NULL, NULL, NULL, NULL); 71562306a36Sopenharmony_ci tx = async_xor(page1, xor_srcs, 0, 2, size, &submit); 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci async_tx_quiesce(&tx); 71862306a36Sopenharmony_ci} 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci/* 72162306a36Sopenharmony_ci * PPL recovery strategy: xor partial parity and data from all modified data 72262306a36Sopenharmony_ci * disks within a stripe and write the result as the new stripe parity. If all 72362306a36Sopenharmony_ci * stripe data disks are modified (full stripe write), no partial parity is 72462306a36Sopenharmony_ci * available, so just xor the data disks. 72562306a36Sopenharmony_ci * 72662306a36Sopenharmony_ci * Recovery of a PPL entry shall occur only if all modified data disks are 72762306a36Sopenharmony_ci * available and read from all of them succeeds. 72862306a36Sopenharmony_ci * 72962306a36Sopenharmony_ci * A PPL entry applies to a stripe, partial parity size for an entry is at most 73062306a36Sopenharmony_ci * the size of the chunk. Examples of possible cases for a single entry: 73162306a36Sopenharmony_ci * 73262306a36Sopenharmony_ci * case 0: single data disk write: 73362306a36Sopenharmony_ci * data0 data1 data2 ppl parity 73462306a36Sopenharmony_ci * +--------+--------+--------+ +--------------------+ 73562306a36Sopenharmony_ci * | ------ | ------ | ------ | +----+ | (no change) | 73662306a36Sopenharmony_ci * | ------ | -data- | ------ | | pp | -> | data1 ^ pp | 73762306a36Sopenharmony_ci * | ------ | -data- | ------ | | pp | -> | data1 ^ pp | 73862306a36Sopenharmony_ci * | ------ | ------ | ------ | +----+ | (no change) | 73962306a36Sopenharmony_ci * +--------+--------+--------+ +--------------------+ 74062306a36Sopenharmony_ci * pp_size = data_size 74162306a36Sopenharmony_ci * 74262306a36Sopenharmony_ci * case 1: more than one data disk write: 74362306a36Sopenharmony_ci * data0 data1 data2 ppl parity 74462306a36Sopenharmony_ci * +--------+--------+--------+ +--------------------+ 74562306a36Sopenharmony_ci * | ------ | ------ | ------ | +----+ | (no change) | 74662306a36Sopenharmony_ci * | -data- | -data- | ------ | | pp | -> | data0 ^ data1 ^ pp | 74762306a36Sopenharmony_ci * | -data- | -data- | ------ | | pp | -> | data0 ^ data1 ^ pp | 74862306a36Sopenharmony_ci * | ------ | ------ | ------ | +----+ | (no change) | 74962306a36Sopenharmony_ci * +--------+--------+--------+ +--------------------+ 75062306a36Sopenharmony_ci * pp_size = data_size / modified_data_disks 75162306a36Sopenharmony_ci * 75262306a36Sopenharmony_ci * case 2: write to all data disks (also full stripe write): 75362306a36Sopenharmony_ci * data0 data1 data2 parity 75462306a36Sopenharmony_ci * +--------+--------+--------+ +--------------------+ 75562306a36Sopenharmony_ci * | ------ | ------ | ------ | | (no change) | 75662306a36Sopenharmony_ci * | -data- | -data- | -data- | --------> | xor all data | 75762306a36Sopenharmony_ci * | ------ | ------ | ------ | --------> | (no change) | 75862306a36Sopenharmony_ci * | ------ | ------ | ------ | | (no change) | 75962306a36Sopenharmony_ci * +--------+--------+--------+ +--------------------+ 76062306a36Sopenharmony_ci * pp_size = 0 76162306a36Sopenharmony_ci * 76262306a36Sopenharmony_ci * The following cases are possible only in other implementations. The recovery 76362306a36Sopenharmony_ci * code can handle them, but they are not generated at runtime because they can 76462306a36Sopenharmony_ci * be reduced to cases 0, 1 and 2: 76562306a36Sopenharmony_ci * 76662306a36Sopenharmony_ci * case 3: 76762306a36Sopenharmony_ci * data0 data1 data2 ppl parity 76862306a36Sopenharmony_ci * +--------+--------+--------+ +----+ +--------------------+ 76962306a36Sopenharmony_ci * | ------ | -data- | -data- | | pp | | data1 ^ data2 ^ pp | 77062306a36Sopenharmony_ci * | ------ | -data- | -data- | | pp | -> | data1 ^ data2 ^ pp | 77162306a36Sopenharmony_ci * | -data- | -data- | -data- | | -- | -> | xor all data | 77262306a36Sopenharmony_ci * | -data- | -data- | ------ | | pp | | data0 ^ data1 ^ pp | 77362306a36Sopenharmony_ci * +--------+--------+--------+ +----+ +--------------------+ 77462306a36Sopenharmony_ci * pp_size = chunk_size 77562306a36Sopenharmony_ci * 77662306a36Sopenharmony_ci * case 4: 77762306a36Sopenharmony_ci * data0 data1 data2 ppl parity 77862306a36Sopenharmony_ci * +--------+--------+--------+ +----+ +--------------------+ 77962306a36Sopenharmony_ci * | ------ | -data- | ------ | | pp | | data1 ^ pp | 78062306a36Sopenharmony_ci * | ------ | ------ | ------ | | -- | -> | (no change) | 78162306a36Sopenharmony_ci * | ------ | ------ | ------ | | -- | -> | (no change) | 78262306a36Sopenharmony_ci * | -data- | ------ | ------ | | pp | | data0 ^ pp | 78362306a36Sopenharmony_ci * +--------+--------+--------+ +----+ +--------------------+ 78462306a36Sopenharmony_ci * pp_size = chunk_size 78562306a36Sopenharmony_ci */ 78662306a36Sopenharmony_cistatic int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, 78762306a36Sopenharmony_ci sector_t ppl_sector) 78862306a36Sopenharmony_ci{ 78962306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 79062306a36Sopenharmony_ci struct mddev *mddev = ppl_conf->mddev; 79162306a36Sopenharmony_ci struct r5conf *conf = mddev->private; 79262306a36Sopenharmony_ci int block_size = ppl_conf->block_size; 79362306a36Sopenharmony_ci struct page *page1; 79462306a36Sopenharmony_ci struct page *page2; 79562306a36Sopenharmony_ci sector_t r_sector_first; 79662306a36Sopenharmony_ci sector_t r_sector_last; 79762306a36Sopenharmony_ci int strip_sectors; 79862306a36Sopenharmony_ci int data_disks; 79962306a36Sopenharmony_ci int i; 80062306a36Sopenharmony_ci int ret = 0; 80162306a36Sopenharmony_ci unsigned int pp_size = le32_to_cpu(e->pp_size); 80262306a36Sopenharmony_ci unsigned int data_size = le32_to_cpu(e->data_size); 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci page1 = alloc_page(GFP_KERNEL); 80562306a36Sopenharmony_ci page2 = alloc_page(GFP_KERNEL); 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci if (!page1 || !page2) { 80862306a36Sopenharmony_ci ret = -ENOMEM; 80962306a36Sopenharmony_ci goto out; 81062306a36Sopenharmony_ci } 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci r_sector_first = le64_to_cpu(e->data_sector) * (block_size >> 9); 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci if ((pp_size >> 9) < conf->chunk_sectors) { 81562306a36Sopenharmony_ci if (pp_size > 0) { 81662306a36Sopenharmony_ci data_disks = data_size / pp_size; 81762306a36Sopenharmony_ci strip_sectors = pp_size >> 9; 81862306a36Sopenharmony_ci } else { 81962306a36Sopenharmony_ci data_disks = conf->raid_disks - conf->max_degraded; 82062306a36Sopenharmony_ci strip_sectors = (data_size >> 9) / data_disks; 82162306a36Sopenharmony_ci } 82262306a36Sopenharmony_ci r_sector_last = r_sector_first + 82362306a36Sopenharmony_ci (data_disks - 1) * conf->chunk_sectors + 82462306a36Sopenharmony_ci strip_sectors; 82562306a36Sopenharmony_ci } else { 82662306a36Sopenharmony_ci data_disks = conf->raid_disks - conf->max_degraded; 82762306a36Sopenharmony_ci strip_sectors = conf->chunk_sectors; 82862306a36Sopenharmony_ci r_sector_last = r_sector_first + (data_size >> 9); 82962306a36Sopenharmony_ci } 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci pr_debug("%s: array sector first: %llu last: %llu\n", __func__, 83262306a36Sopenharmony_ci (unsigned long long)r_sector_first, 83362306a36Sopenharmony_ci (unsigned long long)r_sector_last); 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ci /* if start and end is 4k aligned, use a 4k block */ 83662306a36Sopenharmony_ci if (block_size == 512 && 83762306a36Sopenharmony_ci (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 && 83862306a36Sopenharmony_ci (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0) 83962306a36Sopenharmony_ci block_size = RAID5_STRIPE_SIZE(conf); 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci /* iterate through blocks in strip */ 84262306a36Sopenharmony_ci for (i = 0; i < strip_sectors; i += (block_size >> 9)) { 84362306a36Sopenharmony_ci bool update_parity = false; 84462306a36Sopenharmony_ci sector_t parity_sector; 84562306a36Sopenharmony_ci struct md_rdev *parity_rdev; 84662306a36Sopenharmony_ci struct stripe_head sh; 84762306a36Sopenharmony_ci int disk; 84862306a36Sopenharmony_ci int indent = 0; 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci pr_debug("%s:%*s iter %d start\n", __func__, indent, "", i); 85162306a36Sopenharmony_ci indent += 2; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci memset(page_address(page1), 0, PAGE_SIZE); 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci /* iterate through data member disks */ 85662306a36Sopenharmony_ci for (disk = 0; disk < data_disks; disk++) { 85762306a36Sopenharmony_ci int dd_idx; 85862306a36Sopenharmony_ci struct md_rdev *rdev; 85962306a36Sopenharmony_ci sector_t sector; 86062306a36Sopenharmony_ci sector_t r_sector = r_sector_first + i + 86162306a36Sopenharmony_ci (disk * conf->chunk_sectors); 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci pr_debug("%s:%*s data member disk %d start\n", 86462306a36Sopenharmony_ci __func__, indent, "", disk); 86562306a36Sopenharmony_ci indent += 2; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci if (r_sector >= r_sector_last) { 86862306a36Sopenharmony_ci pr_debug("%s:%*s array sector %llu doesn't need parity update\n", 86962306a36Sopenharmony_ci __func__, indent, "", 87062306a36Sopenharmony_ci (unsigned long long)r_sector); 87162306a36Sopenharmony_ci indent -= 2; 87262306a36Sopenharmony_ci continue; 87362306a36Sopenharmony_ci } 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci update_parity = true; 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci /* map raid sector to member disk */ 87862306a36Sopenharmony_ci sector = raid5_compute_sector(conf, r_sector, 0, 87962306a36Sopenharmony_ci &dd_idx, NULL); 88062306a36Sopenharmony_ci pr_debug("%s:%*s processing array sector %llu => data member disk %d, sector %llu\n", 88162306a36Sopenharmony_ci __func__, indent, "", 88262306a36Sopenharmony_ci (unsigned long long)r_sector, dd_idx, 88362306a36Sopenharmony_ci (unsigned long long)sector); 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci /* Array has not started so rcu dereference is safe */ 88662306a36Sopenharmony_ci rdev = rcu_dereference_protected( 88762306a36Sopenharmony_ci conf->disks[dd_idx].rdev, 1); 88862306a36Sopenharmony_ci if (!rdev || (!test_bit(In_sync, &rdev->flags) && 88962306a36Sopenharmony_ci sector >= rdev->recovery_offset)) { 89062306a36Sopenharmony_ci pr_debug("%s:%*s data member disk %d missing\n", 89162306a36Sopenharmony_ci __func__, indent, "", dd_idx); 89262306a36Sopenharmony_ci update_parity = false; 89362306a36Sopenharmony_ci break; 89462306a36Sopenharmony_ci } 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci pr_debug("%s:%*s reading data member disk %pg sector %llu\n", 89762306a36Sopenharmony_ci __func__, indent, "", rdev->bdev, 89862306a36Sopenharmony_ci (unsigned long long)sector); 89962306a36Sopenharmony_ci if (!sync_page_io(rdev, sector, block_size, page2, 90062306a36Sopenharmony_ci REQ_OP_READ, false)) { 90162306a36Sopenharmony_ci md_error(mddev, rdev); 90262306a36Sopenharmony_ci pr_debug("%s:%*s read failed!\n", __func__, 90362306a36Sopenharmony_ci indent, ""); 90462306a36Sopenharmony_ci ret = -EIO; 90562306a36Sopenharmony_ci goto out; 90662306a36Sopenharmony_ci } 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci ppl_xor(block_size, page1, page2); 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci indent -= 2; 91162306a36Sopenharmony_ci } 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci if (!update_parity) 91462306a36Sopenharmony_ci continue; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci if (pp_size > 0) { 91762306a36Sopenharmony_ci pr_debug("%s:%*s reading pp disk sector %llu\n", 91862306a36Sopenharmony_ci __func__, indent, "", 91962306a36Sopenharmony_ci (unsigned long long)(ppl_sector + i)); 92062306a36Sopenharmony_ci if (!sync_page_io(log->rdev, 92162306a36Sopenharmony_ci ppl_sector - log->rdev->data_offset + i, 92262306a36Sopenharmony_ci block_size, page2, REQ_OP_READ, 92362306a36Sopenharmony_ci false)) { 92462306a36Sopenharmony_ci pr_debug("%s:%*s read failed!\n", __func__, 92562306a36Sopenharmony_ci indent, ""); 92662306a36Sopenharmony_ci md_error(mddev, log->rdev); 92762306a36Sopenharmony_ci ret = -EIO; 92862306a36Sopenharmony_ci goto out; 92962306a36Sopenharmony_ci } 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ci ppl_xor(block_size, page1, page2); 93262306a36Sopenharmony_ci } 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci /* map raid sector to parity disk */ 93562306a36Sopenharmony_ci parity_sector = raid5_compute_sector(conf, r_sector_first + i, 93662306a36Sopenharmony_ci 0, &disk, &sh); 93762306a36Sopenharmony_ci BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk)); 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci /* Array has not started so rcu dereference is safe */ 94062306a36Sopenharmony_ci parity_rdev = rcu_dereference_protected( 94162306a36Sopenharmony_ci conf->disks[sh.pd_idx].rdev, 1); 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev); 94462306a36Sopenharmony_ci pr_debug("%s:%*s write parity at sector %llu, disk %pg\n", 94562306a36Sopenharmony_ci __func__, indent, "", 94662306a36Sopenharmony_ci (unsigned long long)parity_sector, 94762306a36Sopenharmony_ci parity_rdev->bdev); 94862306a36Sopenharmony_ci if (!sync_page_io(parity_rdev, parity_sector, block_size, 94962306a36Sopenharmony_ci page1, REQ_OP_WRITE, false)) { 95062306a36Sopenharmony_ci pr_debug("%s:%*s parity write error!\n", __func__, 95162306a36Sopenharmony_ci indent, ""); 95262306a36Sopenharmony_ci md_error(mddev, parity_rdev); 95362306a36Sopenharmony_ci ret = -EIO; 95462306a36Sopenharmony_ci goto out; 95562306a36Sopenharmony_ci } 95662306a36Sopenharmony_ci } 95762306a36Sopenharmony_ciout: 95862306a36Sopenharmony_ci if (page1) 95962306a36Sopenharmony_ci __free_page(page1); 96062306a36Sopenharmony_ci if (page2) 96162306a36Sopenharmony_ci __free_page(page2); 96262306a36Sopenharmony_ci return ret; 96362306a36Sopenharmony_ci} 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_cistatic int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr, 96662306a36Sopenharmony_ci sector_t offset) 96762306a36Sopenharmony_ci{ 96862306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 96962306a36Sopenharmony_ci struct md_rdev *rdev = log->rdev; 97062306a36Sopenharmony_ci struct mddev *mddev = rdev->mddev; 97162306a36Sopenharmony_ci sector_t ppl_sector = rdev->ppl.sector + offset + 97262306a36Sopenharmony_ci (PPL_HEADER_SIZE >> 9); 97362306a36Sopenharmony_ci struct page *page; 97462306a36Sopenharmony_ci int i; 97562306a36Sopenharmony_ci int ret = 0; 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci page = alloc_page(GFP_KERNEL); 97862306a36Sopenharmony_ci if (!page) 97962306a36Sopenharmony_ci return -ENOMEM; 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci /* iterate through all PPL entries saved */ 98262306a36Sopenharmony_ci for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) { 98362306a36Sopenharmony_ci struct ppl_header_entry *e = &pplhdr->entries[i]; 98462306a36Sopenharmony_ci u32 pp_size = le32_to_cpu(e->pp_size); 98562306a36Sopenharmony_ci sector_t sector = ppl_sector; 98662306a36Sopenharmony_ci int ppl_entry_sectors = pp_size >> 9; 98762306a36Sopenharmony_ci u32 crc, crc_stored; 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci pr_debug("%s: disk: %d entry: %d ppl_sector: %llu pp_size: %u\n", 99062306a36Sopenharmony_ci __func__, rdev->raid_disk, i, 99162306a36Sopenharmony_ci (unsigned long long)ppl_sector, pp_size); 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci crc = ~0; 99462306a36Sopenharmony_ci crc_stored = le32_to_cpu(e->checksum); 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci /* read parial parity for this entry and calculate its checksum */ 99762306a36Sopenharmony_ci while (pp_size) { 99862306a36Sopenharmony_ci int s = pp_size > PAGE_SIZE ? PAGE_SIZE : pp_size; 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci if (!sync_page_io(rdev, sector - rdev->data_offset, 100162306a36Sopenharmony_ci s, page, REQ_OP_READ, false)) { 100262306a36Sopenharmony_ci md_error(mddev, rdev); 100362306a36Sopenharmony_ci ret = -EIO; 100462306a36Sopenharmony_ci goto out; 100562306a36Sopenharmony_ci } 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci crc = crc32c_le(crc, page_address(page), s); 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci pp_size -= s; 101062306a36Sopenharmony_ci sector += s >> 9; 101162306a36Sopenharmony_ci } 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci crc = ~crc; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci if (crc != crc_stored) { 101662306a36Sopenharmony_ci /* 101762306a36Sopenharmony_ci * Don't recover this entry if the checksum does not 101862306a36Sopenharmony_ci * match, but keep going and try to recover other 101962306a36Sopenharmony_ci * entries. 102062306a36Sopenharmony_ci */ 102162306a36Sopenharmony_ci pr_debug("%s: ppl entry crc does not match: stored: 0x%x calculated: 0x%x\n", 102262306a36Sopenharmony_ci __func__, crc_stored, crc); 102362306a36Sopenharmony_ci ppl_conf->mismatch_count++; 102462306a36Sopenharmony_ci } else { 102562306a36Sopenharmony_ci ret = ppl_recover_entry(log, e, ppl_sector); 102662306a36Sopenharmony_ci if (ret) 102762306a36Sopenharmony_ci goto out; 102862306a36Sopenharmony_ci ppl_conf->recovered_entries++; 102962306a36Sopenharmony_ci } 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci ppl_sector += ppl_entry_sectors; 103262306a36Sopenharmony_ci } 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci /* flush the disk cache after recovery if necessary */ 103562306a36Sopenharmony_ci ret = blkdev_issue_flush(rdev->bdev); 103662306a36Sopenharmony_ciout: 103762306a36Sopenharmony_ci __free_page(page); 103862306a36Sopenharmony_ci return ret; 103962306a36Sopenharmony_ci} 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_cistatic int ppl_write_empty_header(struct ppl_log *log) 104262306a36Sopenharmony_ci{ 104362306a36Sopenharmony_ci struct page *page; 104462306a36Sopenharmony_ci struct ppl_header *pplhdr; 104562306a36Sopenharmony_ci struct md_rdev *rdev = log->rdev; 104662306a36Sopenharmony_ci int ret = 0; 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci pr_debug("%s: disk: %d ppl_sector: %llu\n", __func__, 104962306a36Sopenharmony_ci rdev->raid_disk, (unsigned long long)rdev->ppl.sector); 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci page = alloc_page(GFP_NOIO | __GFP_ZERO); 105262306a36Sopenharmony_ci if (!page) 105362306a36Sopenharmony_ci return -ENOMEM; 105462306a36Sopenharmony_ci 105562306a36Sopenharmony_ci pplhdr = page_address(page); 105662306a36Sopenharmony_ci /* zero out PPL space to avoid collision with old PPLs */ 105762306a36Sopenharmony_ci blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector, 105862306a36Sopenharmony_ci log->rdev->ppl.size, GFP_NOIO, 0); 105962306a36Sopenharmony_ci memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); 106062306a36Sopenharmony_ci pplhdr->signature = cpu_to_le32(log->ppl_conf->signature); 106162306a36Sopenharmony_ci pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE)); 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, 106462306a36Sopenharmony_ci PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC | 106562306a36Sopenharmony_ci REQ_FUA, false)) { 106662306a36Sopenharmony_ci md_error(rdev->mddev, rdev); 106762306a36Sopenharmony_ci ret = -EIO; 106862306a36Sopenharmony_ci } 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci __free_page(page); 107162306a36Sopenharmony_ci return ret; 107262306a36Sopenharmony_ci} 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_cistatic int ppl_load_distributed(struct ppl_log *log) 107562306a36Sopenharmony_ci{ 107662306a36Sopenharmony_ci struct ppl_conf *ppl_conf = log->ppl_conf; 107762306a36Sopenharmony_ci struct md_rdev *rdev = log->rdev; 107862306a36Sopenharmony_ci struct mddev *mddev = rdev->mddev; 107962306a36Sopenharmony_ci struct page *page, *page2; 108062306a36Sopenharmony_ci struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL; 108162306a36Sopenharmony_ci u32 crc, crc_stored; 108262306a36Sopenharmony_ci u32 signature; 108362306a36Sopenharmony_ci int ret = 0, i; 108462306a36Sopenharmony_ci sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0; 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk); 108762306a36Sopenharmony_ci /* read PPL headers, find the recent one */ 108862306a36Sopenharmony_ci page = alloc_page(GFP_KERNEL); 108962306a36Sopenharmony_ci if (!page) 109062306a36Sopenharmony_ci return -ENOMEM; 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci page2 = alloc_page(GFP_KERNEL); 109362306a36Sopenharmony_ci if (!page2) { 109462306a36Sopenharmony_ci __free_page(page); 109562306a36Sopenharmony_ci return -ENOMEM; 109662306a36Sopenharmony_ci } 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci /* searching ppl area for latest ppl */ 109962306a36Sopenharmony_ci while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) { 110062306a36Sopenharmony_ci if (!sync_page_io(rdev, 110162306a36Sopenharmony_ci rdev->ppl.sector - rdev->data_offset + 110262306a36Sopenharmony_ci pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ, 110362306a36Sopenharmony_ci false)) { 110462306a36Sopenharmony_ci md_error(mddev, rdev); 110562306a36Sopenharmony_ci ret = -EIO; 110662306a36Sopenharmony_ci /* if not able to read - don't recover any PPL */ 110762306a36Sopenharmony_ci pplhdr = NULL; 110862306a36Sopenharmony_ci break; 110962306a36Sopenharmony_ci } 111062306a36Sopenharmony_ci pplhdr = page_address(page); 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci /* check header validity */ 111362306a36Sopenharmony_ci crc_stored = le32_to_cpu(pplhdr->checksum); 111462306a36Sopenharmony_ci pplhdr->checksum = 0; 111562306a36Sopenharmony_ci crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE); 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci if (crc_stored != crc) { 111862306a36Sopenharmony_ci pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n", 111962306a36Sopenharmony_ci __func__, crc_stored, crc, 112062306a36Sopenharmony_ci (unsigned long long)pplhdr_offset); 112162306a36Sopenharmony_ci pplhdr = prev_pplhdr; 112262306a36Sopenharmony_ci pplhdr_offset = prev_pplhdr_offset; 112362306a36Sopenharmony_ci break; 112462306a36Sopenharmony_ci } 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci signature = le32_to_cpu(pplhdr->signature); 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci if (mddev->external) { 112962306a36Sopenharmony_ci /* 113062306a36Sopenharmony_ci * For external metadata the header signature is set and 113162306a36Sopenharmony_ci * validated in userspace. 113262306a36Sopenharmony_ci */ 113362306a36Sopenharmony_ci ppl_conf->signature = signature; 113462306a36Sopenharmony_ci } else if (ppl_conf->signature != signature) { 113562306a36Sopenharmony_ci pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n", 113662306a36Sopenharmony_ci __func__, signature, ppl_conf->signature, 113762306a36Sopenharmony_ci (unsigned long long)pplhdr_offset); 113862306a36Sopenharmony_ci pplhdr = prev_pplhdr; 113962306a36Sopenharmony_ci pplhdr_offset = prev_pplhdr_offset; 114062306a36Sopenharmony_ci break; 114162306a36Sopenharmony_ci } 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) > 114462306a36Sopenharmony_ci le64_to_cpu(pplhdr->generation)) { 114562306a36Sopenharmony_ci /* previous was newest */ 114662306a36Sopenharmony_ci pplhdr = prev_pplhdr; 114762306a36Sopenharmony_ci pplhdr_offset = prev_pplhdr_offset; 114862306a36Sopenharmony_ci break; 114962306a36Sopenharmony_ci } 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci prev_pplhdr_offset = pplhdr_offset; 115262306a36Sopenharmony_ci prev_pplhdr = pplhdr; 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci swap(page, page2); 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci /* calculate next potential ppl offset */ 115762306a36Sopenharmony_ci for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) 115862306a36Sopenharmony_ci pplhdr_offset += 115962306a36Sopenharmony_ci le32_to_cpu(pplhdr->entries[i].pp_size) >> 9; 116062306a36Sopenharmony_ci pplhdr_offset += PPL_HEADER_SIZE >> 9; 116162306a36Sopenharmony_ci } 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci /* no valid ppl found */ 116462306a36Sopenharmony_ci if (!pplhdr) 116562306a36Sopenharmony_ci ppl_conf->mismatch_count++; 116662306a36Sopenharmony_ci else 116762306a36Sopenharmony_ci pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n", 116862306a36Sopenharmony_ci __func__, (unsigned long long)pplhdr_offset, 116962306a36Sopenharmony_ci le64_to_cpu(pplhdr->generation)); 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci /* attempt to recover from log if we are starting a dirty array */ 117262306a36Sopenharmony_ci if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector) 117362306a36Sopenharmony_ci ret = ppl_recover(log, pplhdr, pplhdr_offset); 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci /* write empty header if we are starting the array */ 117662306a36Sopenharmony_ci if (!ret && !mddev->pers) 117762306a36Sopenharmony_ci ret = ppl_write_empty_header(log); 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci __free_page(page); 118062306a36Sopenharmony_ci __free_page(page2); 118162306a36Sopenharmony_ci 118262306a36Sopenharmony_ci pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n", 118362306a36Sopenharmony_ci __func__, ret, ppl_conf->mismatch_count, 118462306a36Sopenharmony_ci ppl_conf->recovered_entries); 118562306a36Sopenharmony_ci return ret; 118662306a36Sopenharmony_ci} 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_cistatic int ppl_load(struct ppl_conf *ppl_conf) 118962306a36Sopenharmony_ci{ 119062306a36Sopenharmony_ci int ret = 0; 119162306a36Sopenharmony_ci u32 signature = 0; 119262306a36Sopenharmony_ci bool signature_set = false; 119362306a36Sopenharmony_ci int i; 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_ci for (i = 0; i < ppl_conf->count; i++) { 119662306a36Sopenharmony_ci struct ppl_log *log = &ppl_conf->child_logs[i]; 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci /* skip missing drive */ 119962306a36Sopenharmony_ci if (!log->rdev) 120062306a36Sopenharmony_ci continue; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci ret = ppl_load_distributed(log); 120362306a36Sopenharmony_ci if (ret) 120462306a36Sopenharmony_ci break; 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_ci /* 120762306a36Sopenharmony_ci * For external metadata we can't check if the signature is 120862306a36Sopenharmony_ci * correct on a single drive, but we can check if it is the same 120962306a36Sopenharmony_ci * on all drives. 121062306a36Sopenharmony_ci */ 121162306a36Sopenharmony_ci if (ppl_conf->mddev->external) { 121262306a36Sopenharmony_ci if (!signature_set) { 121362306a36Sopenharmony_ci signature = ppl_conf->signature; 121462306a36Sopenharmony_ci signature_set = true; 121562306a36Sopenharmony_ci } else if (signature != ppl_conf->signature) { 121662306a36Sopenharmony_ci pr_warn("md/raid:%s: PPL header signature does not match on all member drives\n", 121762306a36Sopenharmony_ci mdname(ppl_conf->mddev)); 121862306a36Sopenharmony_ci ret = -EINVAL; 121962306a36Sopenharmony_ci break; 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci } 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_ci pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n", 122562306a36Sopenharmony_ci __func__, ret, ppl_conf->mismatch_count, 122662306a36Sopenharmony_ci ppl_conf->recovered_entries); 122762306a36Sopenharmony_ci return ret; 122862306a36Sopenharmony_ci} 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_cistatic void __ppl_exit_log(struct ppl_conf *ppl_conf) 123162306a36Sopenharmony_ci{ 123262306a36Sopenharmony_ci clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); 123362306a36Sopenharmony_ci clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags); 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci kfree(ppl_conf->child_logs); 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_ci bioset_exit(&ppl_conf->bs); 123862306a36Sopenharmony_ci bioset_exit(&ppl_conf->flush_bs); 123962306a36Sopenharmony_ci mempool_exit(&ppl_conf->io_pool); 124062306a36Sopenharmony_ci kmem_cache_destroy(ppl_conf->io_kc); 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci kfree(ppl_conf); 124362306a36Sopenharmony_ci} 124462306a36Sopenharmony_ci 124562306a36Sopenharmony_civoid ppl_exit_log(struct r5conf *conf) 124662306a36Sopenharmony_ci{ 124762306a36Sopenharmony_ci struct ppl_conf *ppl_conf = conf->log_private; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci if (ppl_conf) { 125062306a36Sopenharmony_ci __ppl_exit_log(ppl_conf); 125162306a36Sopenharmony_ci conf->log_private = NULL; 125262306a36Sopenharmony_ci } 125362306a36Sopenharmony_ci} 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_cistatic int ppl_validate_rdev(struct md_rdev *rdev) 125662306a36Sopenharmony_ci{ 125762306a36Sopenharmony_ci int ppl_data_sectors; 125862306a36Sopenharmony_ci int ppl_size_new; 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci /* 126162306a36Sopenharmony_ci * The configured PPL size must be enough to store 126262306a36Sopenharmony_ci * the header and (at the very least) partial parity 126362306a36Sopenharmony_ci * for one stripe. Round it down to ensure the data 126462306a36Sopenharmony_ci * space is cleanly divisible by stripe size. 126562306a36Sopenharmony_ci */ 126662306a36Sopenharmony_ci ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9); 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci if (ppl_data_sectors > 0) 126962306a36Sopenharmony_ci ppl_data_sectors = rounddown(ppl_data_sectors, 127062306a36Sopenharmony_ci RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private)); 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci if (ppl_data_sectors <= 0) { 127362306a36Sopenharmony_ci pr_warn("md/raid:%s: PPL space too small on %pg\n", 127462306a36Sopenharmony_ci mdname(rdev->mddev), rdev->bdev); 127562306a36Sopenharmony_ci return -ENOSPC; 127662306a36Sopenharmony_ci } 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci ppl_size_new = ppl_data_sectors + (PPL_HEADER_SIZE >> 9); 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci if ((rdev->ppl.sector < rdev->data_offset && 128162306a36Sopenharmony_ci rdev->ppl.sector + ppl_size_new > rdev->data_offset) || 128262306a36Sopenharmony_ci (rdev->ppl.sector >= rdev->data_offset && 128362306a36Sopenharmony_ci rdev->data_offset + rdev->sectors > rdev->ppl.sector)) { 128462306a36Sopenharmony_ci pr_warn("md/raid:%s: PPL space overlaps with data on %pg\n", 128562306a36Sopenharmony_ci mdname(rdev->mddev), rdev->bdev); 128662306a36Sopenharmony_ci return -EINVAL; 128762306a36Sopenharmony_ci } 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci if (!rdev->mddev->external && 129062306a36Sopenharmony_ci ((rdev->ppl.offset > 0 && rdev->ppl.offset < (rdev->sb_size >> 9)) || 129162306a36Sopenharmony_ci (rdev->ppl.offset <= 0 && rdev->ppl.offset + ppl_size_new > 0))) { 129262306a36Sopenharmony_ci pr_warn("md/raid:%s: PPL space overlaps with superblock on %pg\n", 129362306a36Sopenharmony_ci mdname(rdev->mddev), rdev->bdev); 129462306a36Sopenharmony_ci return -EINVAL; 129562306a36Sopenharmony_ci } 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci rdev->ppl.size = ppl_size_new; 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci return 0; 130062306a36Sopenharmony_ci} 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_cistatic void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) 130362306a36Sopenharmony_ci{ 130462306a36Sopenharmony_ci if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE + 130562306a36Sopenharmony_ci PPL_HEADER_SIZE) * 2) { 130662306a36Sopenharmony_ci log->use_multippl = true; 130762306a36Sopenharmony_ci set_bit(MD_HAS_MULTIPLE_PPLS, 130862306a36Sopenharmony_ci &log->ppl_conf->mddev->flags); 130962306a36Sopenharmony_ci log->entry_space = PPL_SPACE_SIZE; 131062306a36Sopenharmony_ci } else { 131162306a36Sopenharmony_ci log->use_multippl = false; 131262306a36Sopenharmony_ci log->entry_space = (log->rdev->ppl.size << 9) - 131362306a36Sopenharmony_ci PPL_HEADER_SIZE; 131462306a36Sopenharmony_ci } 131562306a36Sopenharmony_ci log->next_io_sector = rdev->ppl.sector; 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci if (bdev_write_cache(rdev->bdev)) 131862306a36Sopenharmony_ci log->wb_cache_on = true; 131962306a36Sopenharmony_ci} 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ciint ppl_init_log(struct r5conf *conf) 132262306a36Sopenharmony_ci{ 132362306a36Sopenharmony_ci struct ppl_conf *ppl_conf; 132462306a36Sopenharmony_ci struct mddev *mddev = conf->mddev; 132562306a36Sopenharmony_ci int ret = 0; 132662306a36Sopenharmony_ci int max_disks; 132762306a36Sopenharmony_ci int i; 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n", 133062306a36Sopenharmony_ci mdname(conf->mddev)); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci if (PAGE_SIZE != 4096) 133362306a36Sopenharmony_ci return -EINVAL; 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci if (mddev->level != 5) { 133662306a36Sopenharmony_ci pr_warn("md/raid:%s PPL is not compatible with raid level %d\n", 133762306a36Sopenharmony_ci mdname(mddev), mddev->level); 133862306a36Sopenharmony_ci return -EINVAL; 133962306a36Sopenharmony_ci } 134062306a36Sopenharmony_ci 134162306a36Sopenharmony_ci if (mddev->bitmap_info.file || mddev->bitmap_info.offset) { 134262306a36Sopenharmony_ci pr_warn("md/raid:%s PPL is not compatible with bitmap\n", 134362306a36Sopenharmony_ci mdname(mddev)); 134462306a36Sopenharmony_ci return -EINVAL; 134562306a36Sopenharmony_ci } 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 134862306a36Sopenharmony_ci pr_warn("md/raid:%s PPL is not compatible with journal\n", 134962306a36Sopenharmony_ci mdname(mddev)); 135062306a36Sopenharmony_ci return -EINVAL; 135162306a36Sopenharmony_ci } 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci max_disks = sizeof_field(struct ppl_log, disk_flush_bitmap) * 135462306a36Sopenharmony_ci BITS_PER_BYTE; 135562306a36Sopenharmony_ci if (conf->raid_disks > max_disks) { 135662306a36Sopenharmony_ci pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n", 135762306a36Sopenharmony_ci mdname(mddev), max_disks); 135862306a36Sopenharmony_ci return -EINVAL; 135962306a36Sopenharmony_ci } 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci ppl_conf = kzalloc(sizeof(struct ppl_conf), GFP_KERNEL); 136262306a36Sopenharmony_ci if (!ppl_conf) 136362306a36Sopenharmony_ci return -ENOMEM; 136462306a36Sopenharmony_ci 136562306a36Sopenharmony_ci ppl_conf->mddev = mddev; 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_ci ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0); 136862306a36Sopenharmony_ci if (!ppl_conf->io_kc) { 136962306a36Sopenharmony_ci ret = -ENOMEM; 137062306a36Sopenharmony_ci goto err; 137162306a36Sopenharmony_ci } 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci ret = mempool_init(&ppl_conf->io_pool, conf->raid_disks, ppl_io_pool_alloc, 137462306a36Sopenharmony_ci ppl_io_pool_free, ppl_conf->io_kc); 137562306a36Sopenharmony_ci if (ret) 137662306a36Sopenharmony_ci goto err; 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci ret = bioset_init(&ppl_conf->bs, conf->raid_disks, 0, BIOSET_NEED_BVECS); 137962306a36Sopenharmony_ci if (ret) 138062306a36Sopenharmony_ci goto err; 138162306a36Sopenharmony_ci 138262306a36Sopenharmony_ci ret = bioset_init(&ppl_conf->flush_bs, conf->raid_disks, 0, 0); 138362306a36Sopenharmony_ci if (ret) 138462306a36Sopenharmony_ci goto err; 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci ppl_conf->count = conf->raid_disks; 138762306a36Sopenharmony_ci ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log), 138862306a36Sopenharmony_ci GFP_KERNEL); 138962306a36Sopenharmony_ci if (!ppl_conf->child_logs) { 139062306a36Sopenharmony_ci ret = -ENOMEM; 139162306a36Sopenharmony_ci goto err; 139262306a36Sopenharmony_ci } 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci atomic64_set(&ppl_conf->seq, 0); 139562306a36Sopenharmony_ci INIT_LIST_HEAD(&ppl_conf->no_mem_stripes); 139662306a36Sopenharmony_ci spin_lock_init(&ppl_conf->no_mem_stripes_lock); 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci if (!mddev->external) { 139962306a36Sopenharmony_ci ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); 140062306a36Sopenharmony_ci ppl_conf->block_size = 512; 140162306a36Sopenharmony_ci } else { 140262306a36Sopenharmony_ci ppl_conf->block_size = queue_logical_block_size(mddev->queue); 140362306a36Sopenharmony_ci } 140462306a36Sopenharmony_ci 140562306a36Sopenharmony_ci for (i = 0; i < ppl_conf->count; i++) { 140662306a36Sopenharmony_ci struct ppl_log *log = &ppl_conf->child_logs[i]; 140762306a36Sopenharmony_ci /* Array has not started so rcu dereference is safe */ 140862306a36Sopenharmony_ci struct md_rdev *rdev = 140962306a36Sopenharmony_ci rcu_dereference_protected(conf->disks[i].rdev, 1); 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci mutex_init(&log->io_mutex); 141262306a36Sopenharmony_ci spin_lock_init(&log->io_list_lock); 141362306a36Sopenharmony_ci INIT_LIST_HEAD(&log->io_list); 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci log->ppl_conf = ppl_conf; 141662306a36Sopenharmony_ci log->rdev = rdev; 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci if (rdev) { 141962306a36Sopenharmony_ci ret = ppl_validate_rdev(rdev); 142062306a36Sopenharmony_ci if (ret) 142162306a36Sopenharmony_ci goto err; 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci ppl_init_child_log(log, rdev); 142462306a36Sopenharmony_ci } 142562306a36Sopenharmony_ci } 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci /* load and possibly recover the logs from the member disks */ 142862306a36Sopenharmony_ci ret = ppl_load(ppl_conf); 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ci if (ret) { 143162306a36Sopenharmony_ci goto err; 143262306a36Sopenharmony_ci } else if (!mddev->pers && mddev->recovery_cp == 0 && 143362306a36Sopenharmony_ci ppl_conf->recovered_entries > 0 && 143462306a36Sopenharmony_ci ppl_conf->mismatch_count == 0) { 143562306a36Sopenharmony_ci /* 143662306a36Sopenharmony_ci * If we are starting a dirty array and the recovery succeeds 143762306a36Sopenharmony_ci * without any issues, set the array as clean. 143862306a36Sopenharmony_ci */ 143962306a36Sopenharmony_ci mddev->recovery_cp = MaxSector; 144062306a36Sopenharmony_ci set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); 144162306a36Sopenharmony_ci } else if (mddev->pers && ppl_conf->mismatch_count > 0) { 144262306a36Sopenharmony_ci /* no mismatch allowed when enabling PPL for a running array */ 144362306a36Sopenharmony_ci ret = -EINVAL; 144462306a36Sopenharmony_ci goto err; 144562306a36Sopenharmony_ci } 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci conf->log_private = ppl_conf; 144862306a36Sopenharmony_ci set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_ci return 0; 145162306a36Sopenharmony_cierr: 145262306a36Sopenharmony_ci __ppl_exit_log(ppl_conf); 145362306a36Sopenharmony_ci return ret; 145462306a36Sopenharmony_ci} 145562306a36Sopenharmony_ci 145662306a36Sopenharmony_ciint ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add) 145762306a36Sopenharmony_ci{ 145862306a36Sopenharmony_ci struct ppl_conf *ppl_conf = conf->log_private; 145962306a36Sopenharmony_ci struct ppl_log *log; 146062306a36Sopenharmony_ci int ret = 0; 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci if (!rdev) 146362306a36Sopenharmony_ci return -EINVAL; 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci pr_debug("%s: disk: %d operation: %s dev: %pg\n", 146662306a36Sopenharmony_ci __func__, rdev->raid_disk, add ? "add" : "remove", 146762306a36Sopenharmony_ci rdev->bdev); 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_ci if (rdev->raid_disk < 0) 147062306a36Sopenharmony_ci return 0; 147162306a36Sopenharmony_ci 147262306a36Sopenharmony_ci if (rdev->raid_disk >= ppl_conf->count) 147362306a36Sopenharmony_ci return -ENODEV; 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci log = &ppl_conf->child_logs[rdev->raid_disk]; 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci mutex_lock(&log->io_mutex); 147862306a36Sopenharmony_ci if (add) { 147962306a36Sopenharmony_ci ret = ppl_validate_rdev(rdev); 148062306a36Sopenharmony_ci if (!ret) { 148162306a36Sopenharmony_ci log->rdev = rdev; 148262306a36Sopenharmony_ci ret = ppl_write_empty_header(log); 148362306a36Sopenharmony_ci ppl_init_child_log(log, rdev); 148462306a36Sopenharmony_ci } 148562306a36Sopenharmony_ci } else { 148662306a36Sopenharmony_ci log->rdev = NULL; 148762306a36Sopenharmony_ci } 148862306a36Sopenharmony_ci mutex_unlock(&log->io_mutex); 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci return ret; 149162306a36Sopenharmony_ci} 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_cistatic ssize_t 149462306a36Sopenharmony_cippl_write_hint_show(struct mddev *mddev, char *buf) 149562306a36Sopenharmony_ci{ 149662306a36Sopenharmony_ci return sprintf(buf, "%d\n", 0); 149762306a36Sopenharmony_ci} 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_cistatic ssize_t 150062306a36Sopenharmony_cippl_write_hint_store(struct mddev *mddev, const char *page, size_t len) 150162306a36Sopenharmony_ci{ 150262306a36Sopenharmony_ci struct r5conf *conf; 150362306a36Sopenharmony_ci int err = 0; 150462306a36Sopenharmony_ci unsigned short new; 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci if (len >= PAGE_SIZE) 150762306a36Sopenharmony_ci return -EINVAL; 150862306a36Sopenharmony_ci if (kstrtou16(page, 10, &new)) 150962306a36Sopenharmony_ci return -EINVAL; 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_ci err = mddev_lock(mddev); 151262306a36Sopenharmony_ci if (err) 151362306a36Sopenharmony_ci return err; 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci conf = mddev->private; 151662306a36Sopenharmony_ci if (!conf) 151762306a36Sopenharmony_ci err = -ENODEV; 151862306a36Sopenharmony_ci else if (!raid5_has_ppl(conf) || !conf->log_private) 151962306a36Sopenharmony_ci err = -EINVAL; 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ci mddev_unlock(mddev); 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_ci return err ?: len; 152462306a36Sopenharmony_ci} 152562306a36Sopenharmony_ci 152662306a36Sopenharmony_cistruct md_sysfs_entry 152762306a36Sopenharmony_cippl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR, 152862306a36Sopenharmony_ci ppl_write_hint_show, 152962306a36Sopenharmony_ci ppl_write_hint_store); 1530