162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci drbd_actlog.c 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 862306a36Sopenharmony_ci Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 962306a36Sopenharmony_ci Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/slab.h> 1562306a36Sopenharmony_ci#include <linux/crc32c.h> 1662306a36Sopenharmony_ci#include <linux/drbd.h> 1762306a36Sopenharmony_ci#include <linux/drbd_limits.h> 1862306a36Sopenharmony_ci#include "drbd_int.h" 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cienum al_transaction_types { 2262306a36Sopenharmony_ci AL_TR_UPDATE = 0, 2362306a36Sopenharmony_ci AL_TR_INITIALIZED = 0xffff 2462306a36Sopenharmony_ci}; 2562306a36Sopenharmony_ci/* all fields on disc in big endian */ 2662306a36Sopenharmony_cistruct __packed al_transaction_on_disk { 2762306a36Sopenharmony_ci /* don't we all like magic */ 2862306a36Sopenharmony_ci __be32 magic; 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci /* to identify the most recent transaction block 3162306a36Sopenharmony_ci * in the on disk ring buffer */ 3262306a36Sopenharmony_ci __be32 tr_number; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci /* checksum on the full 4k block, with this field set to 0. */ 3562306a36Sopenharmony_ci __be32 crc32c; 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci /* type of transaction, special transaction types like: 3862306a36Sopenharmony_ci * purge-all, set-all-idle, set-all-active, ... to-be-defined 3962306a36Sopenharmony_ci * see also enum al_transaction_types */ 4062306a36Sopenharmony_ci __be16 transaction_type; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci /* we currently allow only a few thousand extents, 4362306a36Sopenharmony_ci * so 16bit will be enough for the slot number. */ 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci /* how many updates in this transaction */ 4662306a36Sopenharmony_ci __be16 n_updates; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci /* maximum slot number, "al-extents" in drbd.conf speak. 4962306a36Sopenharmony_ci * Having this in each transaction should make reconfiguration 5062306a36Sopenharmony_ci * of that parameter easier. */ 5162306a36Sopenharmony_ci __be16 context_size; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci /* slot number the context starts with */ 5462306a36Sopenharmony_ci __be16 context_start_slot_nr; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci /* Some reserved bytes. Expected usage is a 64bit counter of 5762306a36Sopenharmony_ci * sectors-written since device creation, and other data generation tag 5862306a36Sopenharmony_ci * supporting usage */ 5962306a36Sopenharmony_ci __be32 __reserved[4]; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci /* --- 36 byte used --- */ 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes 6462306a36Sopenharmony_ci * in one transaction, then use the remaining byte in the 4k block for 6562306a36Sopenharmony_ci * context information. "Flexible" number of updates per transaction 6662306a36Sopenharmony_ci * does not help, as we have to account for the case when all update 6762306a36Sopenharmony_ci * slots are used anyways, so it would only complicate code without 6862306a36Sopenharmony_ci * additional benefit. 6962306a36Sopenharmony_ci */ 7062306a36Sopenharmony_ci __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci /* but the extent number is 32bit, which at an extent size of 4 MiB 7362306a36Sopenharmony_ci * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ 7462306a36Sopenharmony_ci __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci /* --- 420 bytes used (36 + 64*6) --- */ 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci /* 4096 - 420 = 3676 = 919 * 4 */ 7962306a36Sopenharmony_ci __be32 context[AL_CONTEXT_PER_TRANSACTION]; 8062306a36Sopenharmony_ci}; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_civoid *drbd_md_get_buffer(struct drbd_device *device, const char *intent) 8362306a36Sopenharmony_ci{ 8462306a36Sopenharmony_ci int r; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci wait_event(device->misc_wait, 8762306a36Sopenharmony_ci (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 || 8862306a36Sopenharmony_ci device->state.disk <= D_FAILED); 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci if (r) 9162306a36Sopenharmony_ci return NULL; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci device->md_io.current_use = intent; 9462306a36Sopenharmony_ci device->md_io.start_jif = jiffies; 9562306a36Sopenharmony_ci device->md_io.submit_jif = device->md_io.start_jif - 1; 9662306a36Sopenharmony_ci return page_address(device->md_io.page); 9762306a36Sopenharmony_ci} 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_civoid drbd_md_put_buffer(struct drbd_device *device) 10062306a36Sopenharmony_ci{ 10162306a36Sopenharmony_ci if (atomic_dec_and_test(&device->md_io.in_use)) 10262306a36Sopenharmony_ci wake_up(&device->misc_wait); 10362306a36Sopenharmony_ci} 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_civoid wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, 10662306a36Sopenharmony_ci unsigned int *done) 10762306a36Sopenharmony_ci{ 10862306a36Sopenharmony_ci long dt; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci rcu_read_lock(); 11162306a36Sopenharmony_ci dt = rcu_dereference(bdev->disk_conf)->disk_timeout; 11262306a36Sopenharmony_ci rcu_read_unlock(); 11362306a36Sopenharmony_ci dt = dt * HZ / 10; 11462306a36Sopenharmony_ci if (dt == 0) 11562306a36Sopenharmony_ci dt = MAX_SCHEDULE_TIMEOUT; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci dt = wait_event_timeout(device->misc_wait, 11862306a36Sopenharmony_ci *done || test_bit(FORCE_DETACH, &device->flags), dt); 11962306a36Sopenharmony_ci if (dt == 0) { 12062306a36Sopenharmony_ci drbd_err(device, "meta-data IO operation timed out\n"); 12162306a36Sopenharmony_ci drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH); 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci} 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cistatic int _drbd_md_sync_page_io(struct drbd_device *device, 12662306a36Sopenharmony_ci struct drbd_backing_dev *bdev, 12762306a36Sopenharmony_ci sector_t sector, enum req_op op) 12862306a36Sopenharmony_ci{ 12962306a36Sopenharmony_ci struct bio *bio; 13062306a36Sopenharmony_ci /* we do all our meta data IO in aligned 4k blocks. */ 13162306a36Sopenharmony_ci const int size = 4096; 13262306a36Sopenharmony_ci int err; 13362306a36Sopenharmony_ci blk_opf_t op_flags = 0; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci device->md_io.done = 0; 13662306a36Sopenharmony_ci device->md_io.error = -ENODEV; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags)) 13962306a36Sopenharmony_ci op_flags |= REQ_FUA | REQ_PREFLUSH; 14062306a36Sopenharmony_ci op_flags |= REQ_SYNC; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci bio = bio_alloc_bioset(bdev->md_bdev, 1, op | op_flags, GFP_NOIO, 14362306a36Sopenharmony_ci &drbd_md_io_bio_set); 14462306a36Sopenharmony_ci bio->bi_iter.bi_sector = sector; 14562306a36Sopenharmony_ci err = -EIO; 14662306a36Sopenharmony_ci if (bio_add_page(bio, device->md_io.page, size, 0) != size) 14762306a36Sopenharmony_ci goto out; 14862306a36Sopenharmony_ci bio->bi_private = device; 14962306a36Sopenharmony_ci bio->bi_end_io = drbd_md_endio; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL) 15262306a36Sopenharmony_ci /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ 15362306a36Sopenharmony_ci ; 15462306a36Sopenharmony_ci else if (!get_ldev_if_state(device, D_ATTACHING)) { 15562306a36Sopenharmony_ci /* Corresponding put_ldev in drbd_md_endio() */ 15662306a36Sopenharmony_ci drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); 15762306a36Sopenharmony_ci err = -ENODEV; 15862306a36Sopenharmony_ci goto out; 15962306a36Sopenharmony_ci } 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci bio_get(bio); /* one bio_put() is in the completion handler */ 16262306a36Sopenharmony_ci atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */ 16362306a36Sopenharmony_ci device->md_io.submit_jif = jiffies; 16462306a36Sopenharmony_ci if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 16562306a36Sopenharmony_ci bio_io_error(bio); 16662306a36Sopenharmony_ci else 16762306a36Sopenharmony_ci submit_bio(bio); 16862306a36Sopenharmony_ci wait_until_done_or_force_detached(device, bdev, &device->md_io.done); 16962306a36Sopenharmony_ci if (!bio->bi_status) 17062306a36Sopenharmony_ci err = device->md_io.error; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci out: 17362306a36Sopenharmony_ci bio_put(bio); 17462306a36Sopenharmony_ci return err; 17562306a36Sopenharmony_ci} 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ciint drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, 17862306a36Sopenharmony_ci sector_t sector, enum req_op op) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci int err; 18162306a36Sopenharmony_ci D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci BUG_ON(!bdev->md_bdev); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", 18662306a36Sopenharmony_ci current->comm, current->pid, __func__, 18762306a36Sopenharmony_ci (unsigned long long)sector, (op == REQ_OP_WRITE) ? "WRITE" : "READ", 18862306a36Sopenharmony_ci (void*)_RET_IP_ ); 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci if (sector < drbd_md_first_sector(bdev) || 19162306a36Sopenharmony_ci sector + 7 > drbd_md_last_sector(bdev)) 19262306a36Sopenharmony_ci drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n", 19362306a36Sopenharmony_ci current->comm, current->pid, __func__, 19462306a36Sopenharmony_ci (unsigned long long)sector, 19562306a36Sopenharmony_ci (op == REQ_OP_WRITE) ? "WRITE" : "READ"); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci err = _drbd_md_sync_page_io(device, bdev, sector, op); 19862306a36Sopenharmony_ci if (err) { 19962306a36Sopenharmony_ci drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", 20062306a36Sopenharmony_ci (unsigned long long)sector, 20162306a36Sopenharmony_ci (op == REQ_OP_WRITE) ? "WRITE" : "READ", err); 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci return err; 20462306a36Sopenharmony_ci} 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_cistatic struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr) 20762306a36Sopenharmony_ci{ 20862306a36Sopenharmony_ci struct lc_element *tmp; 20962306a36Sopenharmony_ci tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 21062306a36Sopenharmony_ci if (unlikely(tmp != NULL)) { 21162306a36Sopenharmony_ci struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 21262306a36Sopenharmony_ci if (test_bit(BME_NO_WRITES, &bm_ext->flags)) 21362306a36Sopenharmony_ci return bm_ext; 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci return NULL; 21662306a36Sopenharmony_ci} 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_cistatic struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci struct lc_element *al_ext; 22162306a36Sopenharmony_ci struct bm_extent *bm_ext; 22262306a36Sopenharmony_ci int wake; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 22562306a36Sopenharmony_ci bm_ext = find_active_resync_extent(device, enr); 22662306a36Sopenharmony_ci if (bm_ext) { 22762306a36Sopenharmony_ci wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); 22862306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 22962306a36Sopenharmony_ci if (wake) 23062306a36Sopenharmony_ci wake_up(&device->al_wait); 23162306a36Sopenharmony_ci return NULL; 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci if (nonblock) 23462306a36Sopenharmony_ci al_ext = lc_try_get(device->act_log, enr); 23562306a36Sopenharmony_ci else 23662306a36Sopenharmony_ci al_ext = lc_get(device->act_log, enr); 23762306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 23862306a36Sopenharmony_ci return al_ext; 23962306a36Sopenharmony_ci} 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_cibool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci /* for bios crossing activity log extent boundaries, 24462306a36Sopenharmony_ci * we may need to activate two extents in one go */ 24562306a36Sopenharmony_ci unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 24662306a36Sopenharmony_ci unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci D_ASSERT(device, first <= last); 24962306a36Sopenharmony_ci D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci /* FIXME figure out a fast path for bios crossing AL extent boundaries */ 25262306a36Sopenharmony_ci if (first != last) 25362306a36Sopenharmony_ci return false; 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci return _al_get(device, first, true); 25662306a36Sopenharmony_ci} 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_cibool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci /* for bios crossing activity log extent boundaries, 26162306a36Sopenharmony_ci * we may need to activate two extents in one go */ 26262306a36Sopenharmony_ci unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 26362306a36Sopenharmony_ci unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 26462306a36Sopenharmony_ci unsigned enr; 26562306a36Sopenharmony_ci bool need_transaction = false; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci D_ASSERT(device, first <= last); 26862306a36Sopenharmony_ci D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci for (enr = first; enr <= last; enr++) { 27162306a36Sopenharmony_ci struct lc_element *al_ext; 27262306a36Sopenharmony_ci wait_event(device->al_wait, 27362306a36Sopenharmony_ci (al_ext = _al_get(device, enr, false)) != NULL); 27462306a36Sopenharmony_ci if (al_ext->lc_number != enr) 27562306a36Sopenharmony_ci need_transaction = true; 27662306a36Sopenharmony_ci } 27762306a36Sopenharmony_ci return need_transaction; 27862306a36Sopenharmony_ci} 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) 28162306a36Sopenharmony_ci/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT 28262306a36Sopenharmony_ci * are still coupled, or assume too much about their relation. 28362306a36Sopenharmony_ci * Code below will not work if this is violated. 28462306a36Sopenharmony_ci * Will be cleaned up with some followup patch. 28562306a36Sopenharmony_ci */ 28662306a36Sopenharmony_ci# error FIXME 28762306a36Sopenharmony_ci#endif 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_cistatic unsigned int al_extent_to_bm_page(unsigned int al_enr) 29062306a36Sopenharmony_ci{ 29162306a36Sopenharmony_ci return al_enr >> 29262306a36Sopenharmony_ci /* bit to page */ 29362306a36Sopenharmony_ci ((PAGE_SHIFT + 3) - 29462306a36Sopenharmony_ci /* al extent number to bit */ 29562306a36Sopenharmony_ci (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); 29662306a36Sopenharmony_ci} 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_cistatic sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) 29962306a36Sopenharmony_ci{ 30062306a36Sopenharmony_ci const unsigned int stripes = device->ldev->md.al_stripes; 30162306a36Sopenharmony_ci const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci /* transaction number, modulo on-disk ring buffer wrap around */ 30462306a36Sopenharmony_ci unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci /* ... to aligned 4k on disk block */ 30762306a36Sopenharmony_ci t = ((t % stripes) * stripe_size_4kB) + t/stripes; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci /* ... to 512 byte sector in activity log */ 31062306a36Sopenharmony_ci t *= 8; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci /* ... plus offset to the on disk position */ 31362306a36Sopenharmony_ci return device->ldev->md.md_offset + device->ldev->md.al_offset + t; 31462306a36Sopenharmony_ci} 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_cistatic int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer) 31762306a36Sopenharmony_ci{ 31862306a36Sopenharmony_ci struct lc_element *e; 31962306a36Sopenharmony_ci sector_t sector; 32062306a36Sopenharmony_ci int i, mx; 32162306a36Sopenharmony_ci unsigned extent_nr; 32262306a36Sopenharmony_ci unsigned crc = 0; 32362306a36Sopenharmony_ci int err = 0; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci memset(buffer, 0, sizeof(*buffer)); 32662306a36Sopenharmony_ci buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); 32762306a36Sopenharmony_ci buffer->tr_number = cpu_to_be32(device->al_tr_number); 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci i = 0; 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci drbd_bm_reset_al_hints(device); 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci /* Even though no one can start to change this list 33462306a36Sopenharmony_ci * once we set the LC_LOCKED -- from drbd_al_begin_io(), 33562306a36Sopenharmony_ci * lc_try_lock_for_transaction() --, someone may still 33662306a36Sopenharmony_ci * be in the process of changing it. */ 33762306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 33862306a36Sopenharmony_ci list_for_each_entry(e, &device->act_log->to_be_changed, list) { 33962306a36Sopenharmony_ci if (i == AL_UPDATES_PER_TRANSACTION) { 34062306a36Sopenharmony_ci i++; 34162306a36Sopenharmony_ci break; 34262306a36Sopenharmony_ci } 34362306a36Sopenharmony_ci buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); 34462306a36Sopenharmony_ci buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); 34562306a36Sopenharmony_ci if (e->lc_number != LC_FREE) 34662306a36Sopenharmony_ci drbd_bm_mark_for_writeout(device, 34762306a36Sopenharmony_ci al_extent_to_bm_page(e->lc_number)); 34862306a36Sopenharmony_ci i++; 34962306a36Sopenharmony_ci } 35062306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 35162306a36Sopenharmony_ci BUG_ON(i > AL_UPDATES_PER_TRANSACTION); 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci buffer->n_updates = cpu_to_be16(i); 35462306a36Sopenharmony_ci for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { 35562306a36Sopenharmony_ci buffer->update_slot_nr[i] = cpu_to_be16(-1); 35662306a36Sopenharmony_ci buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); 35762306a36Sopenharmony_ci } 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci buffer->context_size = cpu_to_be16(device->act_log->nr_elements); 36062306a36Sopenharmony_ci buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, 36362306a36Sopenharmony_ci device->act_log->nr_elements - device->al_tr_cycle); 36462306a36Sopenharmony_ci for (i = 0; i < mx; i++) { 36562306a36Sopenharmony_ci unsigned idx = device->al_tr_cycle + i; 36662306a36Sopenharmony_ci extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; 36762306a36Sopenharmony_ci buffer->context[i] = cpu_to_be32(extent_nr); 36862306a36Sopenharmony_ci } 36962306a36Sopenharmony_ci for (; i < AL_CONTEXT_PER_TRANSACTION; i++) 37062306a36Sopenharmony_ci buffer->context[i] = cpu_to_be32(LC_FREE); 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; 37362306a36Sopenharmony_ci if (device->al_tr_cycle >= device->act_log->nr_elements) 37462306a36Sopenharmony_ci device->al_tr_cycle = 0; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci sector = al_tr_number_to_on_disk_sector(device); 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci crc = crc32c(0, buffer, 4096); 37962306a36Sopenharmony_ci buffer->crc32c = cpu_to_be32(crc); 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci if (drbd_bm_write_hinted(device)) 38262306a36Sopenharmony_ci err = -EIO; 38362306a36Sopenharmony_ci else { 38462306a36Sopenharmony_ci bool write_al_updates; 38562306a36Sopenharmony_ci rcu_read_lock(); 38662306a36Sopenharmony_ci write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 38762306a36Sopenharmony_ci rcu_read_unlock(); 38862306a36Sopenharmony_ci if (write_al_updates) { 38962306a36Sopenharmony_ci if (drbd_md_sync_page_io(device, device->ldev, sector, REQ_OP_WRITE)) { 39062306a36Sopenharmony_ci err = -EIO; 39162306a36Sopenharmony_ci drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); 39262306a36Sopenharmony_ci } else { 39362306a36Sopenharmony_ci device->al_tr_number++; 39462306a36Sopenharmony_ci device->al_writ_cnt++; 39562306a36Sopenharmony_ci } 39662306a36Sopenharmony_ci } 39762306a36Sopenharmony_ci } 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci return err; 40062306a36Sopenharmony_ci} 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_cistatic int al_write_transaction(struct drbd_device *device) 40362306a36Sopenharmony_ci{ 40462306a36Sopenharmony_ci struct al_transaction_on_disk *buffer; 40562306a36Sopenharmony_ci int err; 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci if (!get_ldev(device)) { 40862306a36Sopenharmony_ci drbd_err(device, "disk is %s, cannot start al transaction\n", 40962306a36Sopenharmony_ci drbd_disk_str(device->state.disk)); 41062306a36Sopenharmony_ci return -EIO; 41162306a36Sopenharmony_ci } 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci /* The bitmap write may have failed, causing a state change. */ 41462306a36Sopenharmony_ci if (device->state.disk < D_INCONSISTENT) { 41562306a36Sopenharmony_ci drbd_err(device, 41662306a36Sopenharmony_ci "disk is %s, cannot write al transaction\n", 41762306a36Sopenharmony_ci drbd_disk_str(device->state.disk)); 41862306a36Sopenharmony_ci put_ldev(device); 41962306a36Sopenharmony_ci return -EIO; 42062306a36Sopenharmony_ci } 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci /* protects md_io_buffer, al_tr_cycle, ... */ 42362306a36Sopenharmony_ci buffer = drbd_md_get_buffer(device, __func__); 42462306a36Sopenharmony_ci if (!buffer) { 42562306a36Sopenharmony_ci drbd_err(device, "disk failed while waiting for md_io buffer\n"); 42662306a36Sopenharmony_ci put_ldev(device); 42762306a36Sopenharmony_ci return -ENODEV; 42862306a36Sopenharmony_ci } 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci err = __al_write_transaction(device, buffer); 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci drbd_md_put_buffer(device); 43362306a36Sopenharmony_ci put_ldev(device); 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci return err; 43662306a36Sopenharmony_ci} 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_civoid drbd_al_begin_io_commit(struct drbd_device *device) 44062306a36Sopenharmony_ci{ 44162306a36Sopenharmony_ci bool locked = false; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci /* Serialize multiple transactions. 44462306a36Sopenharmony_ci * This uses test_and_set_bit, memory barrier is implicit. 44562306a36Sopenharmony_ci */ 44662306a36Sopenharmony_ci wait_event(device->al_wait, 44762306a36Sopenharmony_ci device->act_log->pending_changes == 0 || 44862306a36Sopenharmony_ci (locked = lc_try_lock_for_transaction(device->act_log))); 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci if (locked) { 45162306a36Sopenharmony_ci /* Double check: it may have been committed by someone else, 45262306a36Sopenharmony_ci * while we have been waiting for the lock. */ 45362306a36Sopenharmony_ci if (device->act_log->pending_changes) { 45462306a36Sopenharmony_ci bool write_al_updates; 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci rcu_read_lock(); 45762306a36Sopenharmony_ci write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 45862306a36Sopenharmony_ci rcu_read_unlock(); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci if (write_al_updates) 46162306a36Sopenharmony_ci al_write_transaction(device); 46262306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 46362306a36Sopenharmony_ci /* FIXME 46462306a36Sopenharmony_ci if (err) 46562306a36Sopenharmony_ci we need an "lc_cancel" here; 46662306a36Sopenharmony_ci */ 46762306a36Sopenharmony_ci lc_committed(device->act_log); 46862306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 46962306a36Sopenharmony_ci } 47062306a36Sopenharmony_ci lc_unlock(device->act_log); 47162306a36Sopenharmony_ci wake_up(&device->al_wait); 47262306a36Sopenharmony_ci } 47362306a36Sopenharmony_ci} 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci/* 47662306a36Sopenharmony_ci * @delegate: delegate activity log I/O to the worker thread 47762306a36Sopenharmony_ci */ 47862306a36Sopenharmony_civoid drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci if (drbd_al_begin_io_prepare(device, i)) 48162306a36Sopenharmony_ci drbd_al_begin_io_commit(device); 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ciint drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) 48562306a36Sopenharmony_ci{ 48662306a36Sopenharmony_ci struct lru_cache *al = device->act_log; 48762306a36Sopenharmony_ci /* for bios crossing activity log extent boundaries, 48862306a36Sopenharmony_ci * we may need to activate two extents in one go */ 48962306a36Sopenharmony_ci unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 49062306a36Sopenharmony_ci unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 49162306a36Sopenharmony_ci unsigned nr_al_extents; 49262306a36Sopenharmony_ci unsigned available_update_slots; 49362306a36Sopenharmony_ci unsigned enr; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci D_ASSERT(device, first <= last); 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ 49862306a36Sopenharmony_ci available_update_slots = min(al->nr_elements - al->used, 49962306a36Sopenharmony_ci al->max_pending_changes - al->pending_changes); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci /* We want all necessary updates for a given request within the same transaction 50262306a36Sopenharmony_ci * We could first check how many updates are *actually* needed, 50362306a36Sopenharmony_ci * and use that instead of the worst-case nr_al_extents */ 50462306a36Sopenharmony_ci if (available_update_slots < nr_al_extents) { 50562306a36Sopenharmony_ci /* Too many activity log extents are currently "hot". 50662306a36Sopenharmony_ci * 50762306a36Sopenharmony_ci * If we have accumulated pending changes already, 50862306a36Sopenharmony_ci * we made progress. 50962306a36Sopenharmony_ci * 51062306a36Sopenharmony_ci * If we cannot get even a single pending change through, 51162306a36Sopenharmony_ci * stop the fast path until we made some progress, 51262306a36Sopenharmony_ci * or requests to "cold" extents could be starved. */ 51362306a36Sopenharmony_ci if (!al->pending_changes) 51462306a36Sopenharmony_ci __set_bit(__LC_STARVING, &device->act_log->flags); 51562306a36Sopenharmony_ci return -ENOBUFS; 51662306a36Sopenharmony_ci } 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci /* Is resync active in this area? */ 51962306a36Sopenharmony_ci for (enr = first; enr <= last; enr++) { 52062306a36Sopenharmony_ci struct lc_element *tmp; 52162306a36Sopenharmony_ci tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 52262306a36Sopenharmony_ci if (unlikely(tmp != NULL)) { 52362306a36Sopenharmony_ci struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 52462306a36Sopenharmony_ci if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 52562306a36Sopenharmony_ci if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)) 52662306a36Sopenharmony_ci return -EBUSY; 52762306a36Sopenharmony_ci return -EWOULDBLOCK; 52862306a36Sopenharmony_ci } 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci } 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci /* Checkout the refcounts. 53362306a36Sopenharmony_ci * Given that we checked for available elements and update slots above, 53462306a36Sopenharmony_ci * this has to be successful. */ 53562306a36Sopenharmony_ci for (enr = first; enr <= last; enr++) { 53662306a36Sopenharmony_ci struct lc_element *al_ext; 53762306a36Sopenharmony_ci al_ext = lc_get_cumulative(device->act_log, enr); 53862306a36Sopenharmony_ci if (!al_ext) 53962306a36Sopenharmony_ci drbd_info(device, "LOGIC BUG for enr=%u\n", enr); 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci return 0; 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_civoid drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) 54562306a36Sopenharmony_ci{ 54662306a36Sopenharmony_ci /* for bios crossing activity log extent boundaries, 54762306a36Sopenharmony_ci * we may need to activate two extents in one go */ 54862306a36Sopenharmony_ci unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 54962306a36Sopenharmony_ci unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 55062306a36Sopenharmony_ci unsigned enr; 55162306a36Sopenharmony_ci struct lc_element *extent; 55262306a36Sopenharmony_ci unsigned long flags; 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci D_ASSERT(device, first <= last); 55562306a36Sopenharmony_ci spin_lock_irqsave(&device->al_lock, flags); 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci for (enr = first; enr <= last; enr++) { 55862306a36Sopenharmony_ci extent = lc_find(device->act_log, enr); 55962306a36Sopenharmony_ci if (!extent) { 56062306a36Sopenharmony_ci drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); 56162306a36Sopenharmony_ci continue; 56262306a36Sopenharmony_ci } 56362306a36Sopenharmony_ci lc_put(device->act_log, extent); 56462306a36Sopenharmony_ci } 56562306a36Sopenharmony_ci spin_unlock_irqrestore(&device->al_lock, flags); 56662306a36Sopenharmony_ci wake_up(&device->al_wait); 56762306a36Sopenharmony_ci} 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_cistatic int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci int rv; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 57462306a36Sopenharmony_ci rv = (al_ext->refcnt == 0); 57562306a36Sopenharmony_ci if (likely(rv)) 57662306a36Sopenharmony_ci lc_del(device->act_log, al_ext); 57762306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci return rv; 58062306a36Sopenharmony_ci} 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci/** 58362306a36Sopenharmony_ci * drbd_al_shrink() - Removes all active extents form the activity log 58462306a36Sopenharmony_ci * @device: DRBD device. 58562306a36Sopenharmony_ci * 58662306a36Sopenharmony_ci * Removes all active extents form the activity log, waiting until 58762306a36Sopenharmony_ci * the reference count of each entry dropped to 0 first, of course. 58862306a36Sopenharmony_ci * 58962306a36Sopenharmony_ci * You need to lock device->act_log with lc_try_lock() / lc_unlock() 59062306a36Sopenharmony_ci */ 59162306a36Sopenharmony_civoid drbd_al_shrink(struct drbd_device *device) 59262306a36Sopenharmony_ci{ 59362306a36Sopenharmony_ci struct lc_element *al_ext; 59462306a36Sopenharmony_ci int i; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags)); 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci for (i = 0; i < device->act_log->nr_elements; i++) { 59962306a36Sopenharmony_ci al_ext = lc_element_by_index(device->act_log, i); 60062306a36Sopenharmony_ci if (al_ext->lc_number == LC_FREE) 60162306a36Sopenharmony_ci continue; 60262306a36Sopenharmony_ci wait_event(device->al_wait, _try_lc_del(device, al_ext)); 60362306a36Sopenharmony_ci } 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci wake_up(&device->al_wait); 60662306a36Sopenharmony_ci} 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ciint drbd_al_initialize(struct drbd_device *device, void *buffer) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci struct al_transaction_on_disk *al = buffer; 61162306a36Sopenharmony_ci struct drbd_md *md = &device->ldev->md; 61262306a36Sopenharmony_ci int al_size_4k = md->al_stripes * md->al_stripe_size_4k; 61362306a36Sopenharmony_ci int i; 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci __al_write_transaction(device, al); 61662306a36Sopenharmony_ci /* There may or may not have been a pending transaction. */ 61762306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 61862306a36Sopenharmony_ci lc_committed(device->act_log); 61962306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci /* The rest of the transactions will have an empty "updates" list, and 62262306a36Sopenharmony_ci * are written out only to provide the context, and to initialize the 62362306a36Sopenharmony_ci * on-disk ring buffer. */ 62462306a36Sopenharmony_ci for (i = 1; i < al_size_4k; i++) { 62562306a36Sopenharmony_ci int err = __al_write_transaction(device, al); 62662306a36Sopenharmony_ci if (err) 62762306a36Sopenharmony_ci return err; 62862306a36Sopenharmony_ci } 62962306a36Sopenharmony_ci return 0; 63062306a36Sopenharmony_ci} 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_cistatic const char *drbd_change_sync_fname[] = { 63362306a36Sopenharmony_ci [RECORD_RS_FAILED] = "drbd_rs_failed_io", 63462306a36Sopenharmony_ci [SET_IN_SYNC] = "drbd_set_in_sync", 63562306a36Sopenharmony_ci [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync" 63662306a36Sopenharmony_ci}; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci/* ATTENTION. The AL's extents are 4MB each, while the extents in the 63962306a36Sopenharmony_ci * resync LRU-cache are 16MB each. 64062306a36Sopenharmony_ci * The caller of this function has to hold an get_ldev() reference. 64162306a36Sopenharmony_ci * 64262306a36Sopenharmony_ci * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success), 64362306a36Sopenharmony_ci * potentially pulling in (and recounting the corresponding bits) 64462306a36Sopenharmony_ci * this resync extent into the resync extent lru cache. 64562306a36Sopenharmony_ci * 64662306a36Sopenharmony_ci * Returns whether all bits have been cleared for this resync extent, 64762306a36Sopenharmony_ci * precisely: (rs_left <= rs_failed) 64862306a36Sopenharmony_ci * 64962306a36Sopenharmony_ci * TODO will be obsoleted once we have a caching lru of the on disk bitmap 65062306a36Sopenharmony_ci */ 65162306a36Sopenharmony_cistatic bool update_rs_extent(struct drbd_device *device, 65262306a36Sopenharmony_ci unsigned int enr, int count, 65362306a36Sopenharmony_ci enum update_sync_bits_mode mode) 65462306a36Sopenharmony_ci{ 65562306a36Sopenharmony_ci struct lc_element *e; 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci D_ASSERT(device, atomic_read(&device->local_cnt)); 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci /* When setting out-of-sync bits, 66062306a36Sopenharmony_ci * we don't need it cached (lc_find). 66162306a36Sopenharmony_ci * But if it is present in the cache, 66262306a36Sopenharmony_ci * we should update the cached bit count. 66362306a36Sopenharmony_ci * Otherwise, that extent should be in the resync extent lru cache 66462306a36Sopenharmony_ci * already -- or we want to pull it in if necessary -- (lc_get), 66562306a36Sopenharmony_ci * then update and check rs_left and rs_failed. */ 66662306a36Sopenharmony_ci if (mode == SET_OUT_OF_SYNC) 66762306a36Sopenharmony_ci e = lc_find(device->resync, enr); 66862306a36Sopenharmony_ci else 66962306a36Sopenharmony_ci e = lc_get(device->resync, enr); 67062306a36Sopenharmony_ci if (e) { 67162306a36Sopenharmony_ci struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 67262306a36Sopenharmony_ci if (ext->lce.lc_number == enr) { 67362306a36Sopenharmony_ci if (mode == SET_IN_SYNC) 67462306a36Sopenharmony_ci ext->rs_left -= count; 67562306a36Sopenharmony_ci else if (mode == SET_OUT_OF_SYNC) 67662306a36Sopenharmony_ci ext->rs_left += count; 67762306a36Sopenharmony_ci else 67862306a36Sopenharmony_ci ext->rs_failed += count; 67962306a36Sopenharmony_ci if (ext->rs_left < ext->rs_failed) { 68062306a36Sopenharmony_ci drbd_warn(device, "BAD! enr=%u rs_left=%d " 68162306a36Sopenharmony_ci "rs_failed=%d count=%d cstate=%s\n", 68262306a36Sopenharmony_ci ext->lce.lc_number, ext->rs_left, 68362306a36Sopenharmony_ci ext->rs_failed, count, 68462306a36Sopenharmony_ci drbd_conn_str(device->state.conn)); 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci /* We don't expect to be able to clear more bits 68762306a36Sopenharmony_ci * than have been set when we originally counted 68862306a36Sopenharmony_ci * the set bits to cache that value in ext->rs_left. 68962306a36Sopenharmony_ci * Whatever the reason (disconnect during resync, 69062306a36Sopenharmony_ci * delayed local completion of an application write), 69162306a36Sopenharmony_ci * try to fix it up by recounting here. */ 69262306a36Sopenharmony_ci ext->rs_left = drbd_bm_e_weight(device, enr); 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci } else { 69562306a36Sopenharmony_ci /* Normally this element should be in the cache, 69662306a36Sopenharmony_ci * since drbd_rs_begin_io() pulled it already in. 69762306a36Sopenharmony_ci * 69862306a36Sopenharmony_ci * But maybe an application write finished, and we set 69962306a36Sopenharmony_ci * something outside the resync lru_cache in sync. 70062306a36Sopenharmony_ci */ 70162306a36Sopenharmony_ci int rs_left = drbd_bm_e_weight(device, enr); 70262306a36Sopenharmony_ci if (ext->flags != 0) { 70362306a36Sopenharmony_ci drbd_warn(device, "changing resync lce: %d[%u;%02lx]" 70462306a36Sopenharmony_ci " -> %d[%u;00]\n", 70562306a36Sopenharmony_ci ext->lce.lc_number, ext->rs_left, 70662306a36Sopenharmony_ci ext->flags, enr, rs_left); 70762306a36Sopenharmony_ci ext->flags = 0; 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci if (ext->rs_failed) { 71062306a36Sopenharmony_ci drbd_warn(device, "Kicking resync_lru element enr=%u " 71162306a36Sopenharmony_ci "out with rs_failed=%d\n", 71262306a36Sopenharmony_ci ext->lce.lc_number, ext->rs_failed); 71362306a36Sopenharmony_ci } 71462306a36Sopenharmony_ci ext->rs_left = rs_left; 71562306a36Sopenharmony_ci ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0; 71662306a36Sopenharmony_ci /* we don't keep a persistent log of the resync lru, 71762306a36Sopenharmony_ci * we can commit any change right away. */ 71862306a36Sopenharmony_ci lc_committed(device->resync); 71962306a36Sopenharmony_ci } 72062306a36Sopenharmony_ci if (mode != SET_OUT_OF_SYNC) 72162306a36Sopenharmony_ci lc_put(device->resync, &ext->lce); 72262306a36Sopenharmony_ci /* no race, we are within the al_lock! */ 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci if (ext->rs_left <= ext->rs_failed) { 72562306a36Sopenharmony_ci ext->rs_failed = 0; 72662306a36Sopenharmony_ci return true; 72762306a36Sopenharmony_ci } 72862306a36Sopenharmony_ci } else if (mode != SET_OUT_OF_SYNC) { 72962306a36Sopenharmony_ci /* be quiet if lc_find() did not find it. */ 73062306a36Sopenharmony_ci drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", 73162306a36Sopenharmony_ci device->resync_locked, 73262306a36Sopenharmony_ci device->resync->nr_elements, 73362306a36Sopenharmony_ci device->resync->flags); 73462306a36Sopenharmony_ci } 73562306a36Sopenharmony_ci return false; 73662306a36Sopenharmony_ci} 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_civoid drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go) 73962306a36Sopenharmony_ci{ 74062306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 74162306a36Sopenharmony_ci unsigned long now = jiffies; 74262306a36Sopenharmony_ci unsigned long last = device->rs_mark_time[device->rs_last_mark]; 74362306a36Sopenharmony_ci int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; 74462306a36Sopenharmony_ci if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { 74562306a36Sopenharmony_ci if (device->rs_mark_left[device->rs_last_mark] != still_to_go && 74662306a36Sopenharmony_ci device->state.conn != C_PAUSED_SYNC_T && 74762306a36Sopenharmony_ci device->state.conn != C_PAUSED_SYNC_S) { 74862306a36Sopenharmony_ci device->rs_mark_time[next] = now; 74962306a36Sopenharmony_ci device->rs_mark_left[next] = still_to_go; 75062306a36Sopenharmony_ci device->rs_last_mark = next; 75162306a36Sopenharmony_ci } 75262306a36Sopenharmony_ci } 75362306a36Sopenharmony_ci} 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci/* It is called lazy update, so don't do write-out too often. */ 75662306a36Sopenharmony_cistatic bool lazy_bitmap_update_due(struct drbd_device *device) 75762306a36Sopenharmony_ci{ 75862306a36Sopenharmony_ci return time_after(jiffies, device->rs_last_bcast + 2*HZ); 75962306a36Sopenharmony_ci} 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_cistatic void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done) 76262306a36Sopenharmony_ci{ 76362306a36Sopenharmony_ci if (rs_done) { 76462306a36Sopenharmony_ci struct drbd_connection *connection = first_peer_device(device)->connection; 76562306a36Sopenharmony_ci if (connection->agreed_pro_version <= 95 || 76662306a36Sopenharmony_ci is_sync_target_state(device->state.conn)) 76762306a36Sopenharmony_ci set_bit(RS_DONE, &device->flags); 76862306a36Sopenharmony_ci /* and also set RS_PROGRESS below */ 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci /* Else: rather wait for explicit notification via receive_state, 77162306a36Sopenharmony_ci * to avoid uuids-rotated-too-fast causing full resync 77262306a36Sopenharmony_ci * in next handshake, in case the replication link breaks 77362306a36Sopenharmony_ci * at the most unfortunate time... */ 77462306a36Sopenharmony_ci } else if (!lazy_bitmap_update_due(device)) 77562306a36Sopenharmony_ci return; 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci drbd_device_post_work(device, RS_PROGRESS); 77862306a36Sopenharmony_ci} 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_cistatic int update_sync_bits(struct drbd_device *device, 78162306a36Sopenharmony_ci unsigned long sbnr, unsigned long ebnr, 78262306a36Sopenharmony_ci enum update_sync_bits_mode mode) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci /* 78562306a36Sopenharmony_ci * We keep a count of set bits per resync-extent in the ->rs_left 78662306a36Sopenharmony_ci * caching member, so we need to loop and work within the resync extent 78762306a36Sopenharmony_ci * alignment. Typically this loop will execute exactly once. 78862306a36Sopenharmony_ci */ 78962306a36Sopenharmony_ci unsigned long flags; 79062306a36Sopenharmony_ci unsigned long count = 0; 79162306a36Sopenharmony_ci unsigned int cleared = 0; 79262306a36Sopenharmony_ci while (sbnr <= ebnr) { 79362306a36Sopenharmony_ci /* set temporary boundary bit number to last bit number within 79462306a36Sopenharmony_ci * the resync extent of the current start bit number, 79562306a36Sopenharmony_ci * but cap at provided end bit number */ 79662306a36Sopenharmony_ci unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK); 79762306a36Sopenharmony_ci unsigned long c; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci if (mode == RECORD_RS_FAILED) 80062306a36Sopenharmony_ci /* Only called from drbd_rs_failed_io(), bits 80162306a36Sopenharmony_ci * supposedly still set. Recount, maybe some 80262306a36Sopenharmony_ci * of the bits have been successfully cleared 80362306a36Sopenharmony_ci * by application IO meanwhile. 80462306a36Sopenharmony_ci */ 80562306a36Sopenharmony_ci c = drbd_bm_count_bits(device, sbnr, tbnr); 80662306a36Sopenharmony_ci else if (mode == SET_IN_SYNC) 80762306a36Sopenharmony_ci c = drbd_bm_clear_bits(device, sbnr, tbnr); 80862306a36Sopenharmony_ci else /* if (mode == SET_OUT_OF_SYNC) */ 80962306a36Sopenharmony_ci c = drbd_bm_set_bits(device, sbnr, tbnr); 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci if (c) { 81262306a36Sopenharmony_ci spin_lock_irqsave(&device->al_lock, flags); 81362306a36Sopenharmony_ci cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode); 81462306a36Sopenharmony_ci spin_unlock_irqrestore(&device->al_lock, flags); 81562306a36Sopenharmony_ci count += c; 81662306a36Sopenharmony_ci } 81762306a36Sopenharmony_ci sbnr = tbnr + 1; 81862306a36Sopenharmony_ci } 81962306a36Sopenharmony_ci if (count) { 82062306a36Sopenharmony_ci if (mode == SET_IN_SYNC) { 82162306a36Sopenharmony_ci unsigned long still_to_go = drbd_bm_total_weight(device); 82262306a36Sopenharmony_ci bool rs_is_done = (still_to_go <= device->rs_failed); 82362306a36Sopenharmony_ci drbd_advance_rs_marks(first_peer_device(device), still_to_go); 82462306a36Sopenharmony_ci if (cleared || rs_is_done) 82562306a36Sopenharmony_ci maybe_schedule_on_disk_bitmap_update(device, rs_is_done); 82662306a36Sopenharmony_ci } else if (mode == RECORD_RS_FAILED) 82762306a36Sopenharmony_ci device->rs_failed += count; 82862306a36Sopenharmony_ci wake_up(&device->al_wait); 82962306a36Sopenharmony_ci } 83062306a36Sopenharmony_ci return count; 83162306a36Sopenharmony_ci} 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_cistatic bool plausible_request_size(int size) 83462306a36Sopenharmony_ci{ 83562306a36Sopenharmony_ci return size > 0 83662306a36Sopenharmony_ci && size <= DRBD_MAX_BATCH_BIO_SIZE 83762306a36Sopenharmony_ci && IS_ALIGNED(size, 512); 83862306a36Sopenharmony_ci} 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci/* clear the bit corresponding to the piece of storage in question: 84162306a36Sopenharmony_ci * size byte of data starting from sector. Only clear a bits of the affected 84262306a36Sopenharmony_ci * one ore more _aligned_ BM_BLOCK_SIZE blocks. 84362306a36Sopenharmony_ci * 84462306a36Sopenharmony_ci * called by worker on C_SYNC_TARGET and receiver on SyncSource. 84562306a36Sopenharmony_ci * 84662306a36Sopenharmony_ci */ 84762306a36Sopenharmony_ciint __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size, 84862306a36Sopenharmony_ci enum update_sync_bits_mode mode) 84962306a36Sopenharmony_ci{ 85062306a36Sopenharmony_ci /* Is called from worker and receiver context _only_ */ 85162306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 85262306a36Sopenharmony_ci unsigned long sbnr, ebnr, lbnr; 85362306a36Sopenharmony_ci unsigned long count = 0; 85462306a36Sopenharmony_ci sector_t esector, nr_sectors; 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci /* This would be an empty REQ_PREFLUSH, be silent. */ 85762306a36Sopenharmony_ci if ((mode == SET_OUT_OF_SYNC) && size == 0) 85862306a36Sopenharmony_ci return 0; 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci if (!plausible_request_size(size)) { 86162306a36Sopenharmony_ci drbd_err(device, "%s: sector=%llus size=%d nonsense!\n", 86262306a36Sopenharmony_ci drbd_change_sync_fname[mode], 86362306a36Sopenharmony_ci (unsigned long long)sector, size); 86462306a36Sopenharmony_ci return 0; 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci if (!get_ldev(device)) 86862306a36Sopenharmony_ci return 0; /* no disk, no metadata, no bitmap to manipulate bits in */ 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci nr_sectors = get_capacity(device->vdisk); 87162306a36Sopenharmony_ci esector = sector + (size >> 9) - 1; 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci if (!expect(device, sector < nr_sectors)) 87462306a36Sopenharmony_ci goto out; 87562306a36Sopenharmony_ci if (!expect(device, esector < nr_sectors)) 87662306a36Sopenharmony_ci esector = nr_sectors - 1; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci lbnr = BM_SECT_TO_BIT(nr_sectors-1); 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci if (mode == SET_IN_SYNC) { 88162306a36Sopenharmony_ci /* Round up start sector, round down end sector. We make sure 88262306a36Sopenharmony_ci * we only clear full, aligned, BM_BLOCK_SIZE blocks. */ 88362306a36Sopenharmony_ci if (unlikely(esector < BM_SECT_PER_BIT-1)) 88462306a36Sopenharmony_ci goto out; 88562306a36Sopenharmony_ci if (unlikely(esector == (nr_sectors-1))) 88662306a36Sopenharmony_ci ebnr = lbnr; 88762306a36Sopenharmony_ci else 88862306a36Sopenharmony_ci ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 88962306a36Sopenharmony_ci sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 89062306a36Sopenharmony_ci } else { 89162306a36Sopenharmony_ci /* We set it out of sync, or record resync failure. 89262306a36Sopenharmony_ci * Should not round anything here. */ 89362306a36Sopenharmony_ci sbnr = BM_SECT_TO_BIT(sector); 89462306a36Sopenharmony_ci ebnr = BM_SECT_TO_BIT(esector); 89562306a36Sopenharmony_ci } 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci count = update_sync_bits(device, sbnr, ebnr, mode); 89862306a36Sopenharmony_ciout: 89962306a36Sopenharmony_ci put_ldev(device); 90062306a36Sopenharmony_ci return count; 90162306a36Sopenharmony_ci} 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_cistatic 90462306a36Sopenharmony_cistruct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr) 90562306a36Sopenharmony_ci{ 90662306a36Sopenharmony_ci struct lc_element *e; 90762306a36Sopenharmony_ci struct bm_extent *bm_ext; 90862306a36Sopenharmony_ci int wakeup = 0; 90962306a36Sopenharmony_ci unsigned long rs_flags; 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 91262306a36Sopenharmony_ci if (device->resync_locked > device->resync->nr_elements/2) { 91362306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 91462306a36Sopenharmony_ci return NULL; 91562306a36Sopenharmony_ci } 91662306a36Sopenharmony_ci e = lc_get(device->resync, enr); 91762306a36Sopenharmony_ci bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 91862306a36Sopenharmony_ci if (bm_ext) { 91962306a36Sopenharmony_ci if (bm_ext->lce.lc_number != enr) { 92062306a36Sopenharmony_ci bm_ext->rs_left = drbd_bm_e_weight(device, enr); 92162306a36Sopenharmony_ci bm_ext->rs_failed = 0; 92262306a36Sopenharmony_ci lc_committed(device->resync); 92362306a36Sopenharmony_ci wakeup = 1; 92462306a36Sopenharmony_ci } 92562306a36Sopenharmony_ci if (bm_ext->lce.refcnt == 1) 92662306a36Sopenharmony_ci device->resync_locked++; 92762306a36Sopenharmony_ci set_bit(BME_NO_WRITES, &bm_ext->flags); 92862306a36Sopenharmony_ci } 92962306a36Sopenharmony_ci rs_flags = device->resync->flags; 93062306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 93162306a36Sopenharmony_ci if (wakeup) 93262306a36Sopenharmony_ci wake_up(&device->al_wait); 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci if (!bm_ext) { 93562306a36Sopenharmony_ci if (rs_flags & LC_STARVING) 93662306a36Sopenharmony_ci drbd_warn(device, "Have to wait for element" 93762306a36Sopenharmony_ci " (resync LRU too small?)\n"); 93862306a36Sopenharmony_ci BUG_ON(rs_flags & LC_LOCKED); 93962306a36Sopenharmony_ci } 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci return bm_ext; 94262306a36Sopenharmony_ci} 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_cistatic int _is_in_al(struct drbd_device *device, unsigned int enr) 94562306a36Sopenharmony_ci{ 94662306a36Sopenharmony_ci int rv; 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 94962306a36Sopenharmony_ci rv = lc_is_used(device->act_log, enr); 95062306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci return rv; 95362306a36Sopenharmony_ci} 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci/** 95662306a36Sopenharmony_ci * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED 95762306a36Sopenharmony_ci * @device: DRBD device. 95862306a36Sopenharmony_ci * @sector: The sector number. 95962306a36Sopenharmony_ci * 96062306a36Sopenharmony_ci * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted. 96162306a36Sopenharmony_ci */ 96262306a36Sopenharmony_ciint drbd_rs_begin_io(struct drbd_device *device, sector_t sector) 96362306a36Sopenharmony_ci{ 96462306a36Sopenharmony_ci unsigned int enr = BM_SECT_TO_EXT(sector); 96562306a36Sopenharmony_ci struct bm_extent *bm_ext; 96662306a36Sopenharmony_ci int i, sig; 96762306a36Sopenharmony_ci bool sa; 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ciretry: 97062306a36Sopenharmony_ci sig = wait_event_interruptible(device->al_wait, 97162306a36Sopenharmony_ci (bm_ext = _bme_get(device, enr))); 97262306a36Sopenharmony_ci if (sig) 97362306a36Sopenharmony_ci return -EINTR; 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci if (test_bit(BME_LOCKED, &bm_ext->flags)) 97662306a36Sopenharmony_ci return 0; 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci /* step aside only while we are above c-min-rate; unless disabled. */ 97962306a36Sopenharmony_ci sa = drbd_rs_c_min_rate_throttle(device); 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 98262306a36Sopenharmony_ci sig = wait_event_interruptible(device->al_wait, 98362306a36Sopenharmony_ci !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || 98462306a36Sopenharmony_ci (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); 98562306a36Sopenharmony_ci 98662306a36Sopenharmony_ci if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { 98762306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 98862306a36Sopenharmony_ci if (lc_put(device->resync, &bm_ext->lce) == 0) { 98962306a36Sopenharmony_ci bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ 99062306a36Sopenharmony_ci device->resync_locked--; 99162306a36Sopenharmony_ci wake_up(&device->al_wait); 99262306a36Sopenharmony_ci } 99362306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 99462306a36Sopenharmony_ci if (sig) 99562306a36Sopenharmony_ci return -EINTR; 99662306a36Sopenharmony_ci if (schedule_timeout_interruptible(HZ/10)) 99762306a36Sopenharmony_ci return -EINTR; 99862306a36Sopenharmony_ci goto retry; 99962306a36Sopenharmony_ci } 100062306a36Sopenharmony_ci } 100162306a36Sopenharmony_ci set_bit(BME_LOCKED, &bm_ext->flags); 100262306a36Sopenharmony_ci return 0; 100362306a36Sopenharmony_ci} 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci/** 100662306a36Sopenharmony_ci * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep 100762306a36Sopenharmony_ci * @device: DRBD device. 100862306a36Sopenharmony_ci * @sector: The sector number. 100962306a36Sopenharmony_ci * 101062306a36Sopenharmony_ci * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then 101162306a36Sopenharmony_ci * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN 101262306a36Sopenharmony_ci * if there is still application IO going on in this area. 101362306a36Sopenharmony_ci */ 101462306a36Sopenharmony_ciint drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector) 101562306a36Sopenharmony_ci{ 101662306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 101762306a36Sopenharmony_ci unsigned int enr = BM_SECT_TO_EXT(sector); 101862306a36Sopenharmony_ci const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; 101962306a36Sopenharmony_ci struct lc_element *e; 102062306a36Sopenharmony_ci struct bm_extent *bm_ext; 102162306a36Sopenharmony_ci int i; 102262306a36Sopenharmony_ci bool throttle = drbd_rs_should_slow_down(peer_device, sector, true); 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci /* If we need to throttle, a half-locked (only marked BME_NO_WRITES, 102562306a36Sopenharmony_ci * not yet BME_LOCKED) extent needs to be kicked out explicitly if we 102662306a36Sopenharmony_ci * need to throttle. There is at most one such half-locked extent, 102762306a36Sopenharmony_ci * which is remembered in resync_wenr. */ 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci if (throttle && device->resync_wenr != enr) 103062306a36Sopenharmony_ci return -EAGAIN; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 103362306a36Sopenharmony_ci if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { 103462306a36Sopenharmony_ci /* in case you have very heavy scattered io, it may 103562306a36Sopenharmony_ci * stall the syncer undefined if we give up the ref count 103662306a36Sopenharmony_ci * when we try again and requeue. 103762306a36Sopenharmony_ci * 103862306a36Sopenharmony_ci * if we don't give up the refcount, but the next time 103962306a36Sopenharmony_ci * we are scheduled this extent has been "synced" by new 104062306a36Sopenharmony_ci * application writes, we'd miss the lc_put on the 104162306a36Sopenharmony_ci * extent we keep the refcount on. 104262306a36Sopenharmony_ci * so we remembered which extent we had to try again, and 104362306a36Sopenharmony_ci * if the next requested one is something else, we do 104462306a36Sopenharmony_ci * the lc_put here... 104562306a36Sopenharmony_ci * we also have to wake_up 104662306a36Sopenharmony_ci */ 104762306a36Sopenharmony_ci e = lc_find(device->resync, device->resync_wenr); 104862306a36Sopenharmony_ci bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 104962306a36Sopenharmony_ci if (bm_ext) { 105062306a36Sopenharmony_ci D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 105162306a36Sopenharmony_ci D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 105262306a36Sopenharmony_ci clear_bit(BME_NO_WRITES, &bm_ext->flags); 105362306a36Sopenharmony_ci device->resync_wenr = LC_FREE; 105462306a36Sopenharmony_ci if (lc_put(device->resync, &bm_ext->lce) == 0) { 105562306a36Sopenharmony_ci bm_ext->flags = 0; 105662306a36Sopenharmony_ci device->resync_locked--; 105762306a36Sopenharmony_ci } 105862306a36Sopenharmony_ci wake_up(&device->al_wait); 105962306a36Sopenharmony_ci } else { 106062306a36Sopenharmony_ci drbd_alert(device, "LOGIC BUG\n"); 106162306a36Sopenharmony_ci } 106262306a36Sopenharmony_ci } 106362306a36Sopenharmony_ci /* TRY. */ 106462306a36Sopenharmony_ci e = lc_try_get(device->resync, enr); 106562306a36Sopenharmony_ci bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 106662306a36Sopenharmony_ci if (bm_ext) { 106762306a36Sopenharmony_ci if (test_bit(BME_LOCKED, &bm_ext->flags)) 106862306a36Sopenharmony_ci goto proceed; 106962306a36Sopenharmony_ci if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { 107062306a36Sopenharmony_ci device->resync_locked++; 107162306a36Sopenharmony_ci } else { 107262306a36Sopenharmony_ci /* we did set the BME_NO_WRITES, 107362306a36Sopenharmony_ci * but then could not set BME_LOCKED, 107462306a36Sopenharmony_ci * so we tried again. 107562306a36Sopenharmony_ci * drop the extra reference. */ 107662306a36Sopenharmony_ci bm_ext->lce.refcnt--; 107762306a36Sopenharmony_ci D_ASSERT(device, bm_ext->lce.refcnt > 0); 107862306a36Sopenharmony_ci } 107962306a36Sopenharmony_ci goto check_al; 108062306a36Sopenharmony_ci } else { 108162306a36Sopenharmony_ci /* do we rather want to try later? */ 108262306a36Sopenharmony_ci if (device->resync_locked > device->resync->nr_elements-3) 108362306a36Sopenharmony_ci goto try_again; 108462306a36Sopenharmony_ci /* Do or do not. There is no try. -- Yoda */ 108562306a36Sopenharmony_ci e = lc_get(device->resync, enr); 108662306a36Sopenharmony_ci bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 108762306a36Sopenharmony_ci if (!bm_ext) { 108862306a36Sopenharmony_ci const unsigned long rs_flags = device->resync->flags; 108962306a36Sopenharmony_ci if (rs_flags & LC_STARVING) 109062306a36Sopenharmony_ci drbd_warn(device, "Have to wait for element" 109162306a36Sopenharmony_ci " (resync LRU too small?)\n"); 109262306a36Sopenharmony_ci BUG_ON(rs_flags & LC_LOCKED); 109362306a36Sopenharmony_ci goto try_again; 109462306a36Sopenharmony_ci } 109562306a36Sopenharmony_ci if (bm_ext->lce.lc_number != enr) { 109662306a36Sopenharmony_ci bm_ext->rs_left = drbd_bm_e_weight(device, enr); 109762306a36Sopenharmony_ci bm_ext->rs_failed = 0; 109862306a36Sopenharmony_ci lc_committed(device->resync); 109962306a36Sopenharmony_ci wake_up(&device->al_wait); 110062306a36Sopenharmony_ci D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0); 110162306a36Sopenharmony_ci } 110262306a36Sopenharmony_ci set_bit(BME_NO_WRITES, &bm_ext->flags); 110362306a36Sopenharmony_ci D_ASSERT(device, bm_ext->lce.refcnt == 1); 110462306a36Sopenharmony_ci device->resync_locked++; 110562306a36Sopenharmony_ci goto check_al; 110662306a36Sopenharmony_ci } 110762306a36Sopenharmony_cicheck_al: 110862306a36Sopenharmony_ci for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 110962306a36Sopenharmony_ci if (lc_is_used(device->act_log, al_enr+i)) 111062306a36Sopenharmony_ci goto try_again; 111162306a36Sopenharmony_ci } 111262306a36Sopenharmony_ci set_bit(BME_LOCKED, &bm_ext->flags); 111362306a36Sopenharmony_ciproceed: 111462306a36Sopenharmony_ci device->resync_wenr = LC_FREE; 111562306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 111662306a36Sopenharmony_ci return 0; 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_citry_again: 111962306a36Sopenharmony_ci if (bm_ext) { 112062306a36Sopenharmony_ci if (throttle) { 112162306a36Sopenharmony_ci D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 112262306a36Sopenharmony_ci D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 112362306a36Sopenharmony_ci clear_bit(BME_NO_WRITES, &bm_ext->flags); 112462306a36Sopenharmony_ci device->resync_wenr = LC_FREE; 112562306a36Sopenharmony_ci if (lc_put(device->resync, &bm_ext->lce) == 0) { 112662306a36Sopenharmony_ci bm_ext->flags = 0; 112762306a36Sopenharmony_ci device->resync_locked--; 112862306a36Sopenharmony_ci } 112962306a36Sopenharmony_ci wake_up(&device->al_wait); 113062306a36Sopenharmony_ci } else 113162306a36Sopenharmony_ci device->resync_wenr = enr; 113262306a36Sopenharmony_ci } 113362306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 113462306a36Sopenharmony_ci return -EAGAIN; 113562306a36Sopenharmony_ci} 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_civoid drbd_rs_complete_io(struct drbd_device *device, sector_t sector) 113862306a36Sopenharmony_ci{ 113962306a36Sopenharmony_ci unsigned int enr = BM_SECT_TO_EXT(sector); 114062306a36Sopenharmony_ci struct lc_element *e; 114162306a36Sopenharmony_ci struct bm_extent *bm_ext; 114262306a36Sopenharmony_ci unsigned long flags; 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci spin_lock_irqsave(&device->al_lock, flags); 114562306a36Sopenharmony_ci e = lc_find(device->resync, enr); 114662306a36Sopenharmony_ci bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 114762306a36Sopenharmony_ci if (!bm_ext) { 114862306a36Sopenharmony_ci spin_unlock_irqrestore(&device->al_lock, flags); 114962306a36Sopenharmony_ci if (drbd_ratelimit()) 115062306a36Sopenharmony_ci drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n"); 115162306a36Sopenharmony_ci return; 115262306a36Sopenharmony_ci } 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci if (bm_ext->lce.refcnt == 0) { 115562306a36Sopenharmony_ci spin_unlock_irqrestore(&device->al_lock, flags); 115662306a36Sopenharmony_ci drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, " 115762306a36Sopenharmony_ci "but refcnt is 0!?\n", 115862306a36Sopenharmony_ci (unsigned long long)sector, enr); 115962306a36Sopenharmony_ci return; 116062306a36Sopenharmony_ci } 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci if (lc_put(device->resync, &bm_ext->lce) == 0) { 116362306a36Sopenharmony_ci bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ 116462306a36Sopenharmony_ci device->resync_locked--; 116562306a36Sopenharmony_ci wake_up(&device->al_wait); 116662306a36Sopenharmony_ci } 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci spin_unlock_irqrestore(&device->al_lock, flags); 116962306a36Sopenharmony_ci} 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci/** 117262306a36Sopenharmony_ci * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) 117362306a36Sopenharmony_ci * @device: DRBD device. 117462306a36Sopenharmony_ci */ 117562306a36Sopenharmony_civoid drbd_rs_cancel_all(struct drbd_device *device) 117662306a36Sopenharmony_ci{ 117762306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */ 118062306a36Sopenharmony_ci lc_reset(device->resync); 118162306a36Sopenharmony_ci put_ldev(device); 118262306a36Sopenharmony_ci } 118362306a36Sopenharmony_ci device->resync_locked = 0; 118462306a36Sopenharmony_ci device->resync_wenr = LC_FREE; 118562306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 118662306a36Sopenharmony_ci wake_up(&device->al_wait); 118762306a36Sopenharmony_ci} 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci/** 119062306a36Sopenharmony_ci * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU 119162306a36Sopenharmony_ci * @device: DRBD device. 119262306a36Sopenharmony_ci * 119362306a36Sopenharmony_ci * Returns 0 upon success, -EAGAIN if at least one reference count was 119462306a36Sopenharmony_ci * not zero. 119562306a36Sopenharmony_ci */ 119662306a36Sopenharmony_ciint drbd_rs_del_all(struct drbd_device *device) 119762306a36Sopenharmony_ci{ 119862306a36Sopenharmony_ci struct lc_element *e; 119962306a36Sopenharmony_ci struct bm_extent *bm_ext; 120062306a36Sopenharmony_ci int i; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci if (get_ldev_if_state(device, D_FAILED)) { 120562306a36Sopenharmony_ci /* ok, ->resync is there. */ 120662306a36Sopenharmony_ci for (i = 0; i < device->resync->nr_elements; i++) { 120762306a36Sopenharmony_ci e = lc_element_by_index(device->resync, i); 120862306a36Sopenharmony_ci bm_ext = lc_entry(e, struct bm_extent, lce); 120962306a36Sopenharmony_ci if (bm_ext->lce.lc_number == LC_FREE) 121062306a36Sopenharmony_ci continue; 121162306a36Sopenharmony_ci if (bm_ext->lce.lc_number == device->resync_wenr) { 121262306a36Sopenharmony_ci drbd_info(device, "dropping %u in drbd_rs_del_all, apparently" 121362306a36Sopenharmony_ci " got 'synced' by application io\n", 121462306a36Sopenharmony_ci device->resync_wenr); 121562306a36Sopenharmony_ci D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 121662306a36Sopenharmony_ci D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 121762306a36Sopenharmony_ci clear_bit(BME_NO_WRITES, &bm_ext->flags); 121862306a36Sopenharmony_ci device->resync_wenr = LC_FREE; 121962306a36Sopenharmony_ci lc_put(device->resync, &bm_ext->lce); 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci if (bm_ext->lce.refcnt != 0) { 122262306a36Sopenharmony_ci drbd_info(device, "Retrying drbd_rs_del_all() later. " 122362306a36Sopenharmony_ci "refcnt=%d\n", bm_ext->lce.refcnt); 122462306a36Sopenharmony_ci put_ldev(device); 122562306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 122662306a36Sopenharmony_ci return -EAGAIN; 122762306a36Sopenharmony_ci } 122862306a36Sopenharmony_ci D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 122962306a36Sopenharmony_ci D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags)); 123062306a36Sopenharmony_ci lc_del(device->resync, &bm_ext->lce); 123162306a36Sopenharmony_ci } 123262306a36Sopenharmony_ci D_ASSERT(device, device->resync->used == 0); 123362306a36Sopenharmony_ci put_ldev(device); 123462306a36Sopenharmony_ci } 123562306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 123662306a36Sopenharmony_ci wake_up(&device->al_wait); 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci return 0; 123962306a36Sopenharmony_ci} 1240