18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci#ifndef BITMAP_H 88c2ecf20Sopenharmony_ci#define BITMAP_H 1 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#define BITMAP_MAJOR_LO 3 118c2ecf20Sopenharmony_ci/* version 4 insists the bitmap is in little-endian order 128c2ecf20Sopenharmony_ci * with version 3, it is host-endian which is non-portable 138c2ecf20Sopenharmony_ci * Version 5 is currently set only for clustered devices 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci#define BITMAP_MAJOR_HI 4 168c2ecf20Sopenharmony_ci#define BITMAP_MAJOR_CLUSTERED 5 178c2ecf20Sopenharmony_ci#define BITMAP_MAJOR_HOSTENDIAN 3 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci/* 208c2ecf20Sopenharmony_ci * in-memory bitmap: 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * Use 16 bit block counters to track pending writes to each "chunk". 238c2ecf20Sopenharmony_ci * The 2 high order bits are special-purpose, the first is a flag indicating 248c2ecf20Sopenharmony_ci * whether a resync is needed. The second is a flag indicating whether a 258c2ecf20Sopenharmony_ci * resync is active. 268c2ecf20Sopenharmony_ci * This means that the counter is actually 14 bits: 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * +--------+--------+------------------------------------------------+ 298c2ecf20Sopenharmony_ci * | resync | resync | counter | 308c2ecf20Sopenharmony_ci * | needed | active | | 318c2ecf20Sopenharmony_ci * | (0-1) | (0-1) | (0-16383) | 328c2ecf20Sopenharmony_ci * +--------+--------+------------------------------------------------+ 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * The "resync needed" bit is set when: 358c2ecf20Sopenharmony_ci * a '1' bit is read from storage at startup. 368c2ecf20Sopenharmony_ci * a write request fails on some drives 378c2ecf20Sopenharmony_ci * a resync is aborted on a chunk with 'resync active' set 388c2ecf20Sopenharmony_ci * It is cleared (and resync-active set) when a resync starts across all drives 398c2ecf20Sopenharmony_ci * of the chunk. 408c2ecf20Sopenharmony_ci * 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * The "resync active" bit is set when: 438c2ecf20Sopenharmony_ci * a resync is started on all drives, and resync_needed is set. 448c2ecf20Sopenharmony_ci * resync_needed will be cleared (as long as resync_active wasn't already set). 458c2ecf20Sopenharmony_ci * It is cleared when a resync completes. 468c2ecf20Sopenharmony_ci * 478c2ecf20Sopenharmony_ci * The counter counts pending write requests, plus the on-disk bit. 488c2ecf20Sopenharmony_ci * When the counter is '1' and the resync bits are clear, the on-disk 498c2ecf20Sopenharmony_ci * bit can be cleared as well, thus setting the counter to 0. 508c2ecf20Sopenharmony_ci * When we set a bit, or in the counter (to start a write), if the fields is 518c2ecf20Sopenharmony_ci * 0, we first set the disk bit and set the counter to 1. 528c2ecf20Sopenharmony_ci * 538c2ecf20Sopenharmony_ci * If the counter is 0, the on-disk bit is clear and the stripe is clean 548c2ecf20Sopenharmony_ci * Anything that dirties the stripe pushes the counter to 2 (at least) 558c2ecf20Sopenharmony_ci * and sets the on-disk bit (lazily). 568c2ecf20Sopenharmony_ci * If a periodic sweep find the counter at 2, it is decremented to 1. 578c2ecf20Sopenharmony_ci * If the sweep find the counter at 1, the on-disk bit is cleared and the 588c2ecf20Sopenharmony_ci * counter goes to zero. 598c2ecf20Sopenharmony_ci * 608c2ecf20Sopenharmony_ci * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block 618c2ecf20Sopenharmony_ci * counters as a fallback when "page" memory cannot be allocated: 628c2ecf20Sopenharmony_ci * 638c2ecf20Sopenharmony_ci * Normal case (page memory allocated): 648c2ecf20Sopenharmony_ci * 658c2ecf20Sopenharmony_ci * page pointer (32-bit) 668c2ecf20Sopenharmony_ci * 678c2ecf20Sopenharmony_ci * [ ] ------+ 688c2ecf20Sopenharmony_ci * | 698c2ecf20Sopenharmony_ci * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) 708c2ecf20Sopenharmony_ci * c1 c2 c2048 718c2ecf20Sopenharmony_ci * 728c2ecf20Sopenharmony_ci * Hijacked case (page memory allocation failed): 738c2ecf20Sopenharmony_ci * 748c2ecf20Sopenharmony_ci * hijacked page pointer (32-bit) 758c2ecf20Sopenharmony_ci * 768c2ecf20Sopenharmony_ci * [ ][ ] (no page memory allocated) 778c2ecf20Sopenharmony_ci * counter #1 (16-bit) counter #2 (16-bit) 788c2ecf20Sopenharmony_ci * 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci#ifdef __KERNEL__ 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci#define PAGE_BITS (PAGE_SIZE << 3) 848c2ecf20Sopenharmony_ci#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_citypedef __u16 bitmap_counter_t; 878c2ecf20Sopenharmony_ci#define COUNTER_BITS 16 888c2ecf20Sopenharmony_ci#define COUNTER_BIT_SHIFT 4 898c2ecf20Sopenharmony_ci#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) 928c2ecf20Sopenharmony_ci#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) 938c2ecf20Sopenharmony_ci#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) 948c2ecf20Sopenharmony_ci#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) 958c2ecf20Sopenharmony_ci#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) 968c2ecf20Sopenharmony_ci#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci/* how many counters per page? */ 998c2ecf20Sopenharmony_ci#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) 1008c2ecf20Sopenharmony_ci/* same, except a shift value for more efficient bitops */ 1018c2ecf20Sopenharmony_ci#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) 1028c2ecf20Sopenharmony_ci/* same, except a mask value for more efficient bitops */ 1038c2ecf20Sopenharmony_ci#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci#define BITMAP_BLOCK_SHIFT 9 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci#endif 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci/* 1108c2ecf20Sopenharmony_ci * bitmap structures: 1118c2ecf20Sopenharmony_ci */ 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci#define BITMAP_MAGIC 0x6d746962 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci/* use these for bitmap->flags and bitmap->sb->state bit-fields */ 1168c2ecf20Sopenharmony_cienum bitmap_state { 1178c2ecf20Sopenharmony_ci BITMAP_STALE = 1, /* the bitmap file is out of date or had -EIO */ 1188c2ecf20Sopenharmony_ci BITMAP_WRITE_ERROR = 2, /* A write error has occurred */ 1198c2ecf20Sopenharmony_ci BITMAP_HOSTENDIAN =15, 1208c2ecf20Sopenharmony_ci}; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci/* the superblock at the front of the bitmap file -- little endian */ 1238c2ecf20Sopenharmony_citypedef struct bitmap_super_s { 1248c2ecf20Sopenharmony_ci __le32 magic; /* 0 BITMAP_MAGIC */ 1258c2ecf20Sopenharmony_ci __le32 version; /* 4 the bitmap major for now, could change... */ 1268c2ecf20Sopenharmony_ci __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ 1278c2ecf20Sopenharmony_ci __le64 events; /* 24 event counter for the bitmap (1)*/ 1288c2ecf20Sopenharmony_ci __le64 events_cleared;/*32 event counter when last bit cleared (2) */ 1298c2ecf20Sopenharmony_ci __le64 sync_size; /* 40 the size of the md device's sync range(3) */ 1308c2ecf20Sopenharmony_ci __le32 state; /* 48 bitmap state information */ 1318c2ecf20Sopenharmony_ci __le32 chunksize; /* 52 the bitmap chunk size in bytes */ 1328c2ecf20Sopenharmony_ci __le32 daemon_sleep; /* 56 seconds between disk flushes */ 1338c2ecf20Sopenharmony_ci __le32 write_behind; /* 60 number of outstanding write-behind writes */ 1348c2ecf20Sopenharmony_ci __le32 sectors_reserved; /* 64 number of 512-byte sectors that are 1358c2ecf20Sopenharmony_ci * reserved for the bitmap. */ 1368c2ecf20Sopenharmony_ci __le32 nodes; /* 68 the maximum number of nodes in cluster. */ 1378c2ecf20Sopenharmony_ci __u8 cluster_name[64]; /* 72 cluster name to which this md belongs */ 1388c2ecf20Sopenharmony_ci __u8 pad[256 - 136]; /* set to zero */ 1398c2ecf20Sopenharmony_ci} bitmap_super_t; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci/* notes: 1428c2ecf20Sopenharmony_ci * (1) This event counter is updated before the eventcounter in the md superblock 1438c2ecf20Sopenharmony_ci * When a bitmap is loaded, it is only accepted if this event counter is equal 1448c2ecf20Sopenharmony_ci * to, or one greater than, the event counter in the superblock. 1458c2ecf20Sopenharmony_ci * (2) This event counter is updated when the other one is *if*and*only*if* the 1468c2ecf20Sopenharmony_ci * array is not degraded. As bits are not cleared when the array is degraded, 1478c2ecf20Sopenharmony_ci * this represents the last time that any bits were cleared. 1488c2ecf20Sopenharmony_ci * If a device is being added that has an event count with this value or 1498c2ecf20Sopenharmony_ci * higher, it is accepted as conforming to the bitmap. 1508c2ecf20Sopenharmony_ci * (3)This is the number of sectors represented by the bitmap, and is the range that 1518c2ecf20Sopenharmony_ci * resync happens across. For raid1 and raid5/6 it is the size of individual 1528c2ecf20Sopenharmony_ci * devices. For raid10 it is the size of the array. 1538c2ecf20Sopenharmony_ci */ 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci#ifdef __KERNEL__ 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci/* the in-memory bitmap is represented by bitmap_pages */ 1588c2ecf20Sopenharmony_cistruct bitmap_page { 1598c2ecf20Sopenharmony_ci /* 1608c2ecf20Sopenharmony_ci * map points to the actual memory page 1618c2ecf20Sopenharmony_ci */ 1628c2ecf20Sopenharmony_ci char *map; 1638c2ecf20Sopenharmony_ci /* 1648c2ecf20Sopenharmony_ci * in emergencies (when map cannot be alloced), hijack the map 1658c2ecf20Sopenharmony_ci * pointer and use it as two counters itself 1668c2ecf20Sopenharmony_ci */ 1678c2ecf20Sopenharmony_ci unsigned int hijacked:1; 1688c2ecf20Sopenharmony_ci /* 1698c2ecf20Sopenharmony_ci * If any counter in this page is '1' or '2' - and so could be 1708c2ecf20Sopenharmony_ci * cleared then that page is marked as 'pending' 1718c2ecf20Sopenharmony_ci */ 1728c2ecf20Sopenharmony_ci unsigned int pending:1; 1738c2ecf20Sopenharmony_ci /* 1748c2ecf20Sopenharmony_ci * count of dirty bits on the page 1758c2ecf20Sopenharmony_ci */ 1768c2ecf20Sopenharmony_ci unsigned int count:30; 1778c2ecf20Sopenharmony_ci}; 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci/* the main bitmap structure - one per mddev */ 1808c2ecf20Sopenharmony_cistruct bitmap { 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci struct bitmap_counts { 1838c2ecf20Sopenharmony_ci spinlock_t lock; 1848c2ecf20Sopenharmony_ci struct bitmap_page *bp; 1858c2ecf20Sopenharmony_ci unsigned long pages; /* total number of pages 1868c2ecf20Sopenharmony_ci * in the bitmap */ 1878c2ecf20Sopenharmony_ci unsigned long missing_pages; /* number of pages 1888c2ecf20Sopenharmony_ci * not yet allocated */ 1898c2ecf20Sopenharmony_ci unsigned long chunkshift; /* chunksize = 2^chunkshift 1908c2ecf20Sopenharmony_ci * (for bitops) */ 1918c2ecf20Sopenharmony_ci unsigned long chunks; /* Total number of data 1928c2ecf20Sopenharmony_ci * chunks for the array */ 1938c2ecf20Sopenharmony_ci } counts; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci struct mddev *mddev; /* the md device that the bitmap is for */ 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci __u64 events_cleared; 1988c2ecf20Sopenharmony_ci int need_sync; 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci struct bitmap_storage { 2018c2ecf20Sopenharmony_ci struct file *file; /* backing disk file */ 2028c2ecf20Sopenharmony_ci struct page *sb_page; /* cached copy of the bitmap 2038c2ecf20Sopenharmony_ci * file superblock */ 2048c2ecf20Sopenharmony_ci struct page **filemap; /* list of cache pages for 2058c2ecf20Sopenharmony_ci * the file */ 2068c2ecf20Sopenharmony_ci unsigned long *filemap_attr; /* attributes associated 2078c2ecf20Sopenharmony_ci * w/ filemap pages */ 2088c2ecf20Sopenharmony_ci unsigned long file_pages; /* number of pages in the file*/ 2098c2ecf20Sopenharmony_ci unsigned long bytes; /* total bytes in the bitmap */ 2108c2ecf20Sopenharmony_ci } storage; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci unsigned long flags; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci int allclean; 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci atomic_t behind_writes; 2178c2ecf20Sopenharmony_ci unsigned long behind_writes_used; /* highest actual value at runtime */ 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci /* 2208c2ecf20Sopenharmony_ci * the bitmap daemon - periodically wakes up and sweeps the bitmap 2218c2ecf20Sopenharmony_ci * file, cleaning up bits and flushing out pages to disk as necessary 2228c2ecf20Sopenharmony_ci */ 2238c2ecf20Sopenharmony_ci unsigned long daemon_lastrun; /* jiffies of last run */ 2248c2ecf20Sopenharmony_ci unsigned long last_end_sync; /* when we lasted called end_sync to 2258c2ecf20Sopenharmony_ci * update bitmap with resync progress */ 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci atomic_t pending_writes; /* pending writes to the bitmap file */ 2288c2ecf20Sopenharmony_ci wait_queue_head_t write_wait; 2298c2ecf20Sopenharmony_ci wait_queue_head_t overflow_wait; 2308c2ecf20Sopenharmony_ci wait_queue_head_t behind_wait; 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci struct kernfs_node *sysfs_can_clear; 2338c2ecf20Sopenharmony_ci int cluster_slot; /* Slot offset for clustered env */ 2348c2ecf20Sopenharmony_ci}; 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci/* the bitmap API */ 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci/* these are used only by md/bitmap */ 2398c2ecf20Sopenharmony_cistruct bitmap *md_bitmap_create(struct mddev *mddev, int slot); 2408c2ecf20Sopenharmony_ciint md_bitmap_load(struct mddev *mddev); 2418c2ecf20Sopenharmony_civoid md_bitmap_flush(struct mddev *mddev); 2428c2ecf20Sopenharmony_civoid md_bitmap_destroy(struct mddev *mddev); 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_civoid md_bitmap_print_sb(struct bitmap *bitmap); 2458c2ecf20Sopenharmony_civoid md_bitmap_update_sb(struct bitmap *bitmap); 2468c2ecf20Sopenharmony_civoid md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ciint md_bitmap_setallbits(struct bitmap *bitmap); 2498c2ecf20Sopenharmony_civoid md_bitmap_write_all(struct bitmap *bitmap); 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_civoid md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e); 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci/* these are exported */ 2548c2ecf20Sopenharmony_ciint md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, 2558c2ecf20Sopenharmony_ci unsigned long sectors, int behind); 2568c2ecf20Sopenharmony_civoid md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, 2578c2ecf20Sopenharmony_ci unsigned long sectors, int success, int behind); 2588c2ecf20Sopenharmony_ciint md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); 2598c2ecf20Sopenharmony_civoid md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); 2608c2ecf20Sopenharmony_civoid md_bitmap_close_sync(struct bitmap *bitmap); 2618c2ecf20Sopenharmony_civoid md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force); 2628c2ecf20Sopenharmony_civoid md_bitmap_sync_with_cluster(struct mddev *mddev, 2638c2ecf20Sopenharmony_ci sector_t old_lo, sector_t old_hi, 2648c2ecf20Sopenharmony_ci sector_t new_lo, sector_t new_hi); 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_civoid md_bitmap_unplug(struct bitmap *bitmap); 2678c2ecf20Sopenharmony_civoid md_bitmap_daemon_work(struct mddev *mddev); 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ciint md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, 2708c2ecf20Sopenharmony_ci int chunksize, int init); 2718c2ecf20Sopenharmony_cistruct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot); 2728c2ecf20Sopenharmony_ciint md_bitmap_copy_from_slot(struct mddev *mddev, int slot, 2738c2ecf20Sopenharmony_ci sector_t *lo, sector_t *hi, bool clear_bits); 2748c2ecf20Sopenharmony_civoid md_bitmap_free(struct bitmap *bitmap); 2758c2ecf20Sopenharmony_civoid md_bitmap_wait_behind_writes(struct mddev *mddev); 2768c2ecf20Sopenharmony_ci#endif 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci#endif 279