18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci drbd_worker.c 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 88c2ecf20Sopenharmony_ci Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 98c2ecf20Sopenharmony_ci Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci*/ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <linux/module.h> 158c2ecf20Sopenharmony_ci#include <linux/drbd.h> 168c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 178c2ecf20Sopenharmony_ci#include <linux/wait.h> 188c2ecf20Sopenharmony_ci#include <linux/mm.h> 198c2ecf20Sopenharmony_ci#include <linux/memcontrol.h> 208c2ecf20Sopenharmony_ci#include <linux/mm_inline.h> 218c2ecf20Sopenharmony_ci#include <linux/slab.h> 228c2ecf20Sopenharmony_ci#include <linux/random.h> 238c2ecf20Sopenharmony_ci#include <linux/string.h> 248c2ecf20Sopenharmony_ci#include <linux/scatterlist.h> 258c2ecf20Sopenharmony_ci#include <linux/part_stat.h> 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci#include "drbd_int.h" 288c2ecf20Sopenharmony_ci#include "drbd_protocol.h" 298c2ecf20Sopenharmony_ci#include "drbd_req.h" 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_cistatic int make_ov_request(struct drbd_device *, int); 328c2ecf20Sopenharmony_cistatic int make_resync_request(struct drbd_device *, int); 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci/* endio handlers: 358c2ecf20Sopenharmony_ci * drbd_md_endio (defined here) 368c2ecf20Sopenharmony_ci * drbd_request_endio (defined here) 378c2ecf20Sopenharmony_ci * drbd_peer_request_endio (defined here) 388c2ecf20Sopenharmony_ci * drbd_bm_endio (defined in drbd_bitmap.c) 398c2ecf20Sopenharmony_ci * 408c2ecf20Sopenharmony_ci * For all these callbacks, note the following: 418c2ecf20Sopenharmony_ci * The callbacks will be called in irq context by the IDE drivers, 428c2ecf20Sopenharmony_ci * and in Softirqs/Tasklets/BH context by the SCSI drivers. 438c2ecf20Sopenharmony_ci * Try to get the locking right :) 448c2ecf20Sopenharmony_ci * 458c2ecf20Sopenharmony_ci */ 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci/* used for synchronous meta data and bitmap IO 488c2ecf20Sopenharmony_ci * submitted by drbd_md_sync_page_io() 498c2ecf20Sopenharmony_ci */ 508c2ecf20Sopenharmony_civoid drbd_md_endio(struct bio *bio) 518c2ecf20Sopenharmony_ci{ 528c2ecf20Sopenharmony_ci struct drbd_device *device; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci device = bio->bi_private; 558c2ecf20Sopenharmony_ci device->md_io.error = blk_status_to_errno(bio->bi_status); 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci /* special case: drbd_md_read() during drbd_adm_attach() */ 588c2ecf20Sopenharmony_ci if (device->ldev) 598c2ecf20Sopenharmony_ci put_ldev(device); 608c2ecf20Sopenharmony_ci bio_put(bio); 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able 638c2ecf20Sopenharmony_ci * to timeout on the lower level device, and eventually detach from it. 648c2ecf20Sopenharmony_ci * If this io completion runs after that timeout expired, this 658c2ecf20Sopenharmony_ci * drbd_md_put_buffer() may allow us to finally try and re-attach. 668c2ecf20Sopenharmony_ci * During normal operation, this only puts that extra reference 678c2ecf20Sopenharmony_ci * down to 1 again. 688c2ecf20Sopenharmony_ci * Make sure we first drop the reference, and only then signal 698c2ecf20Sopenharmony_ci * completion, or we may (in drbd_al_read_log()) cycle so fast into the 708c2ecf20Sopenharmony_ci * next drbd_md_sync_page_io(), that we trigger the 718c2ecf20Sopenharmony_ci * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. 728c2ecf20Sopenharmony_ci */ 738c2ecf20Sopenharmony_ci drbd_md_put_buffer(device); 748c2ecf20Sopenharmony_ci device->md_io.done = 1; 758c2ecf20Sopenharmony_ci wake_up(&device->misc_wait); 768c2ecf20Sopenharmony_ci} 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci/* reads on behalf of the partner, 798c2ecf20Sopenharmony_ci * "submitted" by the receiver 808c2ecf20Sopenharmony_ci */ 818c2ecf20Sopenharmony_cistatic void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) 828c2ecf20Sopenharmony_ci{ 838c2ecf20Sopenharmony_ci unsigned long flags = 0; 848c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 858c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci spin_lock_irqsave(&device->resource->req_lock, flags); 888c2ecf20Sopenharmony_ci device->read_cnt += peer_req->i.size >> 9; 898c2ecf20Sopenharmony_ci list_del(&peer_req->w.list); 908c2ecf20Sopenharmony_ci if (list_empty(&device->read_ee)) 918c2ecf20Sopenharmony_ci wake_up(&device->ee_wait); 928c2ecf20Sopenharmony_ci if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) 938c2ecf20Sopenharmony_ci __drbd_chk_io_error(device, DRBD_READ_ERROR); 948c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&device->resource->req_lock, flags); 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w); 978c2ecf20Sopenharmony_ci put_ldev(device); 988c2ecf20Sopenharmony_ci} 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci/* writes on behalf of the partner, or resync writes, 1018c2ecf20Sopenharmony_ci * "submitted" by the receiver, final stage. */ 1028c2ecf20Sopenharmony_civoid drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) 1038c2ecf20Sopenharmony_ci{ 1048c2ecf20Sopenharmony_ci unsigned long flags = 0; 1058c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 1068c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 1078c2ecf20Sopenharmony_ci struct drbd_connection *connection = peer_device->connection; 1088c2ecf20Sopenharmony_ci struct drbd_interval i; 1098c2ecf20Sopenharmony_ci int do_wake; 1108c2ecf20Sopenharmony_ci u64 block_id; 1118c2ecf20Sopenharmony_ci int do_al_complete_io; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci /* after we moved peer_req to done_ee, 1148c2ecf20Sopenharmony_ci * we may no longer access it, 1158c2ecf20Sopenharmony_ci * it may be freed/reused already! 1168c2ecf20Sopenharmony_ci * (as soon as we release the req_lock) */ 1178c2ecf20Sopenharmony_ci i = peer_req->i; 1188c2ecf20Sopenharmony_ci do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; 1198c2ecf20Sopenharmony_ci block_id = peer_req->block_id; 1208c2ecf20Sopenharmony_ci peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci if (peer_req->flags & EE_WAS_ERROR) { 1238c2ecf20Sopenharmony_ci /* In protocol != C, we usually do not send write acks. 1248c2ecf20Sopenharmony_ci * In case of a write error, send the neg ack anyways. */ 1258c2ecf20Sopenharmony_ci if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags)) 1268c2ecf20Sopenharmony_ci inc_unacked(device); 1278c2ecf20Sopenharmony_ci drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); 1288c2ecf20Sopenharmony_ci } 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci spin_lock_irqsave(&device->resource->req_lock, flags); 1318c2ecf20Sopenharmony_ci device->writ_cnt += peer_req->i.size >> 9; 1328c2ecf20Sopenharmony_ci list_move_tail(&peer_req->w.list, &device->done_ee); 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci /* 1358c2ecf20Sopenharmony_ci * Do not remove from the write_requests tree here: we did not send the 1368c2ecf20Sopenharmony_ci * Ack yet and did not wake possibly waiting conflicting requests. 1378c2ecf20Sopenharmony_ci * Removed from the tree from "drbd_process_done_ee" within the 1388c2ecf20Sopenharmony_ci * appropriate dw.cb (e_end_block/e_end_resync_block) or from 1398c2ecf20Sopenharmony_ci * _drbd_clear_done_ee. 1408c2ecf20Sopenharmony_ci */ 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci /* FIXME do we want to detach for failed REQ_OP_DISCARD? 1458c2ecf20Sopenharmony_ci * ((peer_req->flags & (EE_WAS_ERROR|EE_TRIM)) == EE_WAS_ERROR) */ 1468c2ecf20Sopenharmony_ci if (peer_req->flags & EE_WAS_ERROR) 1478c2ecf20Sopenharmony_ci __drbd_chk_io_error(device, DRBD_WRITE_ERROR); 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci if (connection->cstate >= C_WF_REPORT_PARAMS) { 1508c2ecf20Sopenharmony_ci kref_get(&device->kref); /* put is in drbd_send_acks_wf() */ 1518c2ecf20Sopenharmony_ci if (!queue_work(connection->ack_sender, &peer_device->send_acks_work)) 1528c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 1538c2ecf20Sopenharmony_ci } 1548c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&device->resource->req_lock, flags); 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci if (block_id == ID_SYNCER) 1578c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, i.sector); 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci if (do_wake) 1608c2ecf20Sopenharmony_ci wake_up(&device->ee_wait); 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci if (do_al_complete_io) 1638c2ecf20Sopenharmony_ci drbd_al_complete_io(device, &i); 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci put_ldev(device); 1668c2ecf20Sopenharmony_ci} 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci/* writes on behalf of the partner, or resync writes, 1698c2ecf20Sopenharmony_ci * "submitted" by the receiver. 1708c2ecf20Sopenharmony_ci */ 1718c2ecf20Sopenharmony_civoid drbd_peer_request_endio(struct bio *bio) 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = bio->bi_private; 1748c2ecf20Sopenharmony_ci struct drbd_device *device = peer_req->peer_device->device; 1758c2ecf20Sopenharmony_ci bool is_write = bio_data_dir(bio) == WRITE; 1768c2ecf20Sopenharmony_ci bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || 1778c2ecf20Sopenharmony_ci bio_op(bio) == REQ_OP_DISCARD; 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci if (bio->bi_status && __ratelimit(&drbd_ratelimit_state)) 1808c2ecf20Sopenharmony_ci drbd_warn(device, "%s: error=%d s=%llus\n", 1818c2ecf20Sopenharmony_ci is_write ? (is_discard ? "discard" : "write") 1828c2ecf20Sopenharmony_ci : "read", bio->bi_status, 1838c2ecf20Sopenharmony_ci (unsigned long long)peer_req->i.sector); 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci if (bio->bi_status) 1868c2ecf20Sopenharmony_ci set_bit(__EE_WAS_ERROR, &peer_req->flags); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci bio_put(bio); /* no need for the bio anymore */ 1898c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&peer_req->pending_bios)) { 1908c2ecf20Sopenharmony_ci if (is_write) 1918c2ecf20Sopenharmony_ci drbd_endio_write_sec_final(peer_req); 1928c2ecf20Sopenharmony_ci else 1938c2ecf20Sopenharmony_ci drbd_endio_read_sec_final(peer_req); 1948c2ecf20Sopenharmony_ci } 1958c2ecf20Sopenharmony_ci} 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_cistatic void 1988c2ecf20Sopenharmony_cidrbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device) 1998c2ecf20Sopenharmony_ci{ 2008c2ecf20Sopenharmony_ci panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n", 2018c2ecf20Sopenharmony_ci device->minor, device->resource->name, device->vnr); 2028c2ecf20Sopenharmony_ci} 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci/* read, readA or write requests on R_PRIMARY coming from drbd_make_request 2058c2ecf20Sopenharmony_ci */ 2068c2ecf20Sopenharmony_civoid drbd_request_endio(struct bio *bio) 2078c2ecf20Sopenharmony_ci{ 2088c2ecf20Sopenharmony_ci unsigned long flags; 2098c2ecf20Sopenharmony_ci struct drbd_request *req = bio->bi_private; 2108c2ecf20Sopenharmony_ci struct drbd_device *device = req->device; 2118c2ecf20Sopenharmony_ci struct bio_and_error m; 2128c2ecf20Sopenharmony_ci enum drbd_req_event what; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci /* If this request was aborted locally before, 2158c2ecf20Sopenharmony_ci * but now was completed "successfully", 2168c2ecf20Sopenharmony_ci * chances are that this caused arbitrary data corruption. 2178c2ecf20Sopenharmony_ci * 2188c2ecf20Sopenharmony_ci * "aborting" requests, or force-detaching the disk, is intended for 2198c2ecf20Sopenharmony_ci * completely blocked/hung local backing devices which do no longer 2208c2ecf20Sopenharmony_ci * complete requests at all, not even do error completions. In this 2218c2ecf20Sopenharmony_ci * situation, usually a hard-reset and failover is the only way out. 2228c2ecf20Sopenharmony_ci * 2238c2ecf20Sopenharmony_ci * By "aborting", basically faking a local error-completion, 2248c2ecf20Sopenharmony_ci * we allow for a more graceful swichover by cleanly migrating services. 2258c2ecf20Sopenharmony_ci * Still the affected node has to be rebooted "soon". 2268c2ecf20Sopenharmony_ci * 2278c2ecf20Sopenharmony_ci * By completing these requests, we allow the upper layers to re-use 2288c2ecf20Sopenharmony_ci * the associated data pages. 2298c2ecf20Sopenharmony_ci * 2308c2ecf20Sopenharmony_ci * If later the local backing device "recovers", and now DMAs some data 2318c2ecf20Sopenharmony_ci * from disk into the original request pages, in the best case it will 2328c2ecf20Sopenharmony_ci * just put random data into unused pages; but typically it will corrupt 2338c2ecf20Sopenharmony_ci * meanwhile completely unrelated data, causing all sorts of damage. 2348c2ecf20Sopenharmony_ci * 2358c2ecf20Sopenharmony_ci * Which means delayed successful completion, 2368c2ecf20Sopenharmony_ci * especially for READ requests, 2378c2ecf20Sopenharmony_ci * is a reason to panic(). 2388c2ecf20Sopenharmony_ci * 2398c2ecf20Sopenharmony_ci * We assume that a delayed *error* completion is OK, 2408c2ecf20Sopenharmony_ci * though we still will complain noisily about it. 2418c2ecf20Sopenharmony_ci */ 2428c2ecf20Sopenharmony_ci if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { 2438c2ecf20Sopenharmony_ci if (__ratelimit(&drbd_ratelimit_state)) 2448c2ecf20Sopenharmony_ci drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci if (!bio->bi_status) 2478c2ecf20Sopenharmony_ci drbd_panic_after_delayed_completion_of_aborted_request(device); 2488c2ecf20Sopenharmony_ci } 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci /* to avoid recursion in __req_mod */ 2518c2ecf20Sopenharmony_ci if (unlikely(bio->bi_status)) { 2528c2ecf20Sopenharmony_ci switch (bio_op(bio)) { 2538c2ecf20Sopenharmony_ci case REQ_OP_WRITE_ZEROES: 2548c2ecf20Sopenharmony_ci case REQ_OP_DISCARD: 2558c2ecf20Sopenharmony_ci if (bio->bi_status == BLK_STS_NOTSUPP) 2568c2ecf20Sopenharmony_ci what = DISCARD_COMPLETED_NOTSUPP; 2578c2ecf20Sopenharmony_ci else 2588c2ecf20Sopenharmony_ci what = DISCARD_COMPLETED_WITH_ERROR; 2598c2ecf20Sopenharmony_ci break; 2608c2ecf20Sopenharmony_ci case REQ_OP_READ: 2618c2ecf20Sopenharmony_ci if (bio->bi_opf & REQ_RAHEAD) 2628c2ecf20Sopenharmony_ci what = READ_AHEAD_COMPLETED_WITH_ERROR; 2638c2ecf20Sopenharmony_ci else 2648c2ecf20Sopenharmony_ci what = READ_COMPLETED_WITH_ERROR; 2658c2ecf20Sopenharmony_ci break; 2668c2ecf20Sopenharmony_ci default: 2678c2ecf20Sopenharmony_ci what = WRITE_COMPLETED_WITH_ERROR; 2688c2ecf20Sopenharmony_ci break; 2698c2ecf20Sopenharmony_ci } 2708c2ecf20Sopenharmony_ci } else { 2718c2ecf20Sopenharmony_ci what = COMPLETED_OK; 2728c2ecf20Sopenharmony_ci } 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status)); 2758c2ecf20Sopenharmony_ci bio_put(bio); 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci /* not req_mod(), we need irqsave here! */ 2788c2ecf20Sopenharmony_ci spin_lock_irqsave(&device->resource->req_lock, flags); 2798c2ecf20Sopenharmony_ci __req_mod(req, what, &m); 2808c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&device->resource->req_lock, flags); 2818c2ecf20Sopenharmony_ci put_ldev(device); 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci if (m.bio) 2848c2ecf20Sopenharmony_ci complete_master_bio(device, &m); 2858c2ecf20Sopenharmony_ci} 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_civoid drbd_csum_ee(struct crypto_shash *tfm, struct drbd_peer_request *peer_req, void *digest) 2888c2ecf20Sopenharmony_ci{ 2898c2ecf20Sopenharmony_ci SHASH_DESC_ON_STACK(desc, tfm); 2908c2ecf20Sopenharmony_ci struct page *page = peer_req->pages; 2918c2ecf20Sopenharmony_ci struct page *tmp; 2928c2ecf20Sopenharmony_ci unsigned len; 2938c2ecf20Sopenharmony_ci void *src; 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci desc->tfm = tfm; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci crypto_shash_init(desc); 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci src = kmap_atomic(page); 3008c2ecf20Sopenharmony_ci while ((tmp = page_chain_next(page))) { 3018c2ecf20Sopenharmony_ci /* all but the last page will be fully used */ 3028c2ecf20Sopenharmony_ci crypto_shash_update(desc, src, PAGE_SIZE); 3038c2ecf20Sopenharmony_ci kunmap_atomic(src); 3048c2ecf20Sopenharmony_ci page = tmp; 3058c2ecf20Sopenharmony_ci src = kmap_atomic(page); 3068c2ecf20Sopenharmony_ci } 3078c2ecf20Sopenharmony_ci /* and now the last, possibly only partially used page */ 3088c2ecf20Sopenharmony_ci len = peer_req->i.size & (PAGE_SIZE - 1); 3098c2ecf20Sopenharmony_ci crypto_shash_update(desc, src, len ?: PAGE_SIZE); 3108c2ecf20Sopenharmony_ci kunmap_atomic(src); 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci crypto_shash_final(desc, digest); 3138c2ecf20Sopenharmony_ci shash_desc_zero(desc); 3148c2ecf20Sopenharmony_ci} 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_civoid drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest) 3178c2ecf20Sopenharmony_ci{ 3188c2ecf20Sopenharmony_ci SHASH_DESC_ON_STACK(desc, tfm); 3198c2ecf20Sopenharmony_ci struct bio_vec bvec; 3208c2ecf20Sopenharmony_ci struct bvec_iter iter; 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci desc->tfm = tfm; 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci crypto_shash_init(desc); 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci bio_for_each_segment(bvec, bio, iter) { 3278c2ecf20Sopenharmony_ci u8 *src; 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci src = kmap_atomic(bvec.bv_page); 3308c2ecf20Sopenharmony_ci crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len); 3318c2ecf20Sopenharmony_ci kunmap_atomic(src); 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci /* REQ_OP_WRITE_SAME has only one segment, 3348c2ecf20Sopenharmony_ci * checksum the payload only once. */ 3358c2ecf20Sopenharmony_ci if (bio_op(bio) == REQ_OP_WRITE_SAME) 3368c2ecf20Sopenharmony_ci break; 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci crypto_shash_final(desc, digest); 3398c2ecf20Sopenharmony_ci shash_desc_zero(desc); 3408c2ecf20Sopenharmony_ci} 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci/* MAYBE merge common code with w_e_end_ov_req */ 3438c2ecf20Sopenharmony_cistatic int w_e_send_csum(struct drbd_work *w, int cancel) 3448c2ecf20Sopenharmony_ci{ 3458c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 3468c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 3478c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 3488c2ecf20Sopenharmony_ci int digest_size; 3498c2ecf20Sopenharmony_ci void *digest; 3508c2ecf20Sopenharmony_ci int err = 0; 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci if (unlikely(cancel)) 3538c2ecf20Sopenharmony_ci goto out; 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) 3568c2ecf20Sopenharmony_ci goto out; 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm); 3598c2ecf20Sopenharmony_ci digest = kmalloc(digest_size, GFP_NOIO); 3608c2ecf20Sopenharmony_ci if (digest) { 3618c2ecf20Sopenharmony_ci sector_t sector = peer_req->i.sector; 3628c2ecf20Sopenharmony_ci unsigned int size = peer_req->i.size; 3638c2ecf20Sopenharmony_ci drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 3648c2ecf20Sopenharmony_ci /* Free peer_req and pages before send. 3658c2ecf20Sopenharmony_ci * In case we block on congestion, we could otherwise run into 3668c2ecf20Sopenharmony_ci * some distributed deadlock, if the other side blocks on 3678c2ecf20Sopenharmony_ci * congestion as well, because our receiver blocks in 3688c2ecf20Sopenharmony_ci * drbd_alloc_pages due to pp_in_use > max_buffers. */ 3698c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 3708c2ecf20Sopenharmony_ci peer_req = NULL; 3718c2ecf20Sopenharmony_ci inc_rs_pending(device); 3728c2ecf20Sopenharmony_ci err = drbd_send_drequest_csum(peer_device, sector, size, 3738c2ecf20Sopenharmony_ci digest, digest_size, 3748c2ecf20Sopenharmony_ci P_CSUM_RS_REQUEST); 3758c2ecf20Sopenharmony_ci kfree(digest); 3768c2ecf20Sopenharmony_ci } else { 3778c2ecf20Sopenharmony_ci drbd_err(device, "kmalloc() of digest failed.\n"); 3788c2ecf20Sopenharmony_ci err = -ENOMEM; 3798c2ecf20Sopenharmony_ci } 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ciout: 3828c2ecf20Sopenharmony_ci if (peer_req) 3838c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci if (unlikely(err)) 3868c2ecf20Sopenharmony_ci drbd_err(device, "drbd_send_drequest(..., csum) failed\n"); 3878c2ecf20Sopenharmony_ci return err; 3888c2ecf20Sopenharmony_ci} 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_cistatic int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size) 3938c2ecf20Sopenharmony_ci{ 3948c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 3958c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req; 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci if (!get_ldev(device)) 3988c2ecf20Sopenharmony_ci return -EIO; 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci /* GFP_TRY, because if there is no memory available right now, this may 4018c2ecf20Sopenharmony_ci * be rescheduled for later. It is "only" background resync, after all. */ 4028c2ecf20Sopenharmony_ci peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, 4038c2ecf20Sopenharmony_ci size, size, GFP_TRY); 4048c2ecf20Sopenharmony_ci if (!peer_req) 4058c2ecf20Sopenharmony_ci goto defer; 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_ci peer_req->w.cb = w_e_send_csum; 4088c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 4098c2ecf20Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->read_ee); 4108c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci atomic_add(size >> 9, &device->rs_sect_ev); 4138c2ecf20Sopenharmony_ci if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0, 4148c2ecf20Sopenharmony_ci DRBD_FAULT_RS_RD) == 0) 4158c2ecf20Sopenharmony_ci return 0; 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci /* If it failed because of ENOMEM, retry should help. If it failed 4188c2ecf20Sopenharmony_ci * because bio_add_page failed (probably broken lower level driver), 4198c2ecf20Sopenharmony_ci * retry may or may not help. 4208c2ecf20Sopenharmony_ci * If it does not, you may need to force disconnect. */ 4218c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 4228c2ecf20Sopenharmony_ci list_del(&peer_req->w.list); 4238c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 4268c2ecf20Sopenharmony_cidefer: 4278c2ecf20Sopenharmony_ci put_ldev(device); 4288c2ecf20Sopenharmony_ci return -EAGAIN; 4298c2ecf20Sopenharmony_ci} 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ciint w_resync_timer(struct drbd_work *w, int cancel) 4328c2ecf20Sopenharmony_ci{ 4338c2ecf20Sopenharmony_ci struct drbd_device *device = 4348c2ecf20Sopenharmony_ci container_of(w, struct drbd_device, resync_work); 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci switch (device->state.conn) { 4378c2ecf20Sopenharmony_ci case C_VERIFY_S: 4388c2ecf20Sopenharmony_ci make_ov_request(device, cancel); 4398c2ecf20Sopenharmony_ci break; 4408c2ecf20Sopenharmony_ci case C_SYNC_TARGET: 4418c2ecf20Sopenharmony_ci make_resync_request(device, cancel); 4428c2ecf20Sopenharmony_ci break; 4438c2ecf20Sopenharmony_ci } 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci return 0; 4468c2ecf20Sopenharmony_ci} 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_civoid resync_timer_fn(struct timer_list *t) 4498c2ecf20Sopenharmony_ci{ 4508c2ecf20Sopenharmony_ci struct drbd_device *device = from_timer(device, t, resync_timer); 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci drbd_queue_work_if_unqueued( 4538c2ecf20Sopenharmony_ci &first_peer_device(device)->connection->sender_work, 4548c2ecf20Sopenharmony_ci &device->resync_work); 4558c2ecf20Sopenharmony_ci} 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_cistatic void fifo_set(struct fifo_buffer *fb, int value) 4588c2ecf20Sopenharmony_ci{ 4598c2ecf20Sopenharmony_ci int i; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci for (i = 0; i < fb->size; i++) 4628c2ecf20Sopenharmony_ci fb->values[i] = value; 4638c2ecf20Sopenharmony_ci} 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_cistatic int fifo_push(struct fifo_buffer *fb, int value) 4668c2ecf20Sopenharmony_ci{ 4678c2ecf20Sopenharmony_ci int ov; 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci ov = fb->values[fb->head_index]; 4708c2ecf20Sopenharmony_ci fb->values[fb->head_index++] = value; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci if (fb->head_index >= fb->size) 4738c2ecf20Sopenharmony_ci fb->head_index = 0; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci return ov; 4768c2ecf20Sopenharmony_ci} 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_cistatic void fifo_add_val(struct fifo_buffer *fb, int value) 4798c2ecf20Sopenharmony_ci{ 4808c2ecf20Sopenharmony_ci int i; 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci for (i = 0; i < fb->size; i++) 4838c2ecf20Sopenharmony_ci fb->values[i] += value; 4848c2ecf20Sopenharmony_ci} 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_cistruct fifo_buffer *fifo_alloc(unsigned int fifo_size) 4878c2ecf20Sopenharmony_ci{ 4888c2ecf20Sopenharmony_ci struct fifo_buffer *fb; 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO); 4918c2ecf20Sopenharmony_ci if (!fb) 4928c2ecf20Sopenharmony_ci return NULL; 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci fb->head_index = 0; 4958c2ecf20Sopenharmony_ci fb->size = fifo_size; 4968c2ecf20Sopenharmony_ci fb->total = 0; 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci return fb; 4998c2ecf20Sopenharmony_ci} 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_cistatic int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) 5028c2ecf20Sopenharmony_ci{ 5038c2ecf20Sopenharmony_ci struct disk_conf *dc; 5048c2ecf20Sopenharmony_ci unsigned int want; /* The number of sectors we want in-flight */ 5058c2ecf20Sopenharmony_ci int req_sect; /* Number of sectors to request in this turn */ 5068c2ecf20Sopenharmony_ci int correction; /* Number of sectors more we need in-flight */ 5078c2ecf20Sopenharmony_ci int cps; /* correction per invocation of drbd_rs_controller() */ 5088c2ecf20Sopenharmony_ci int steps; /* Number of time steps to plan ahead */ 5098c2ecf20Sopenharmony_ci int curr_corr; 5108c2ecf20Sopenharmony_ci int max_sect; 5118c2ecf20Sopenharmony_ci struct fifo_buffer *plan; 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci dc = rcu_dereference(device->ldev->disk_conf); 5148c2ecf20Sopenharmony_ci plan = rcu_dereference(device->rs_plan_s); 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci if (device->rs_in_flight + sect_in == 0) { /* At start of resync */ 5198c2ecf20Sopenharmony_ci want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; 5208c2ecf20Sopenharmony_ci } else { /* normal path */ 5218c2ecf20Sopenharmony_ci want = dc->c_fill_target ? dc->c_fill_target : 5228c2ecf20Sopenharmony_ci sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); 5238c2ecf20Sopenharmony_ci } 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci correction = want - device->rs_in_flight - plan->total; 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci /* Plan ahead */ 5288c2ecf20Sopenharmony_ci cps = correction / steps; 5298c2ecf20Sopenharmony_ci fifo_add_val(plan, cps); 5308c2ecf20Sopenharmony_ci plan->total += cps * steps; 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci /* What we do in this step */ 5338c2ecf20Sopenharmony_ci curr_corr = fifo_push(plan, 0); 5348c2ecf20Sopenharmony_ci plan->total -= curr_corr; 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci req_sect = sect_in + curr_corr; 5378c2ecf20Sopenharmony_ci if (req_sect < 0) 5388c2ecf20Sopenharmony_ci req_sect = 0; 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ; 5418c2ecf20Sopenharmony_ci if (req_sect > max_sect) 5428c2ecf20Sopenharmony_ci req_sect = max_sect; 5438c2ecf20Sopenharmony_ci 5448c2ecf20Sopenharmony_ci /* 5458c2ecf20Sopenharmony_ci drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", 5468c2ecf20Sopenharmony_ci sect_in, device->rs_in_flight, want, correction, 5478c2ecf20Sopenharmony_ci steps, cps, device->rs_planed, curr_corr, req_sect); 5488c2ecf20Sopenharmony_ci */ 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci return req_sect; 5518c2ecf20Sopenharmony_ci} 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_cistatic int drbd_rs_number_requests(struct drbd_device *device) 5548c2ecf20Sopenharmony_ci{ 5558c2ecf20Sopenharmony_ci unsigned int sect_in; /* Number of sectors that came in since the last turn */ 5568c2ecf20Sopenharmony_ci int number, mxb; 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci sect_in = atomic_xchg(&device->rs_sect_in, 0); 5598c2ecf20Sopenharmony_ci device->rs_in_flight -= sect_in; 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci rcu_read_lock(); 5628c2ecf20Sopenharmony_ci mxb = drbd_get_max_buffers(device) / 2; 5638c2ecf20Sopenharmony_ci if (rcu_dereference(device->rs_plan_s)->size) { 5648c2ecf20Sopenharmony_ci number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); 5658c2ecf20Sopenharmony_ci device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; 5668c2ecf20Sopenharmony_ci } else { 5678c2ecf20Sopenharmony_ci device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; 5688c2ecf20Sopenharmony_ci number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 5698c2ecf20Sopenharmony_ci } 5708c2ecf20Sopenharmony_ci rcu_read_unlock(); 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci /* Don't have more than "max-buffers"/2 in-flight. 5738c2ecf20Sopenharmony_ci * Otherwise we may cause the remote site to stall on drbd_alloc_pages(), 5748c2ecf20Sopenharmony_ci * potentially causing a distributed deadlock on congestion during 5758c2ecf20Sopenharmony_ci * online-verify or (checksum-based) resync, if max-buffers, 5768c2ecf20Sopenharmony_ci * socket buffer sizes and resync rate settings are mis-configured. */ 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k), 5798c2ecf20Sopenharmony_ci * mxb (as used here, and in drbd_alloc_pages on the peer) is 5808c2ecf20Sopenharmony_ci * "number of pages" (typically also 4k), 5818c2ecf20Sopenharmony_ci * but "rs_in_flight" is in "sectors" (512 Byte). */ 5828c2ecf20Sopenharmony_ci if (mxb - device->rs_in_flight/8 < number) 5838c2ecf20Sopenharmony_ci number = mxb - device->rs_in_flight/8; 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci return number; 5868c2ecf20Sopenharmony_ci} 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_cistatic int make_resync_request(struct drbd_device *const device, int cancel) 5898c2ecf20Sopenharmony_ci{ 5908c2ecf20Sopenharmony_ci struct drbd_peer_device *const peer_device = first_peer_device(device); 5918c2ecf20Sopenharmony_ci struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 5928c2ecf20Sopenharmony_ci unsigned long bit; 5938c2ecf20Sopenharmony_ci sector_t sector; 5948c2ecf20Sopenharmony_ci const sector_t capacity = get_capacity(device->vdisk); 5958c2ecf20Sopenharmony_ci int max_bio_size; 5968c2ecf20Sopenharmony_ci int number, rollback_i, size; 5978c2ecf20Sopenharmony_ci int align, requeue = 0; 5988c2ecf20Sopenharmony_ci int i = 0; 5998c2ecf20Sopenharmony_ci int discard_granularity = 0; 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci if (unlikely(cancel)) 6028c2ecf20Sopenharmony_ci return 0; 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci if (device->rs_total == 0) { 6058c2ecf20Sopenharmony_ci /* empty resync? */ 6068c2ecf20Sopenharmony_ci drbd_resync_finished(device); 6078c2ecf20Sopenharmony_ci return 0; 6088c2ecf20Sopenharmony_ci } 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_ci if (!get_ldev(device)) { 6118c2ecf20Sopenharmony_ci /* Since we only need to access device->rsync a 6128c2ecf20Sopenharmony_ci get_ldev_if_state(device,D_FAILED) would be sufficient, but 6138c2ecf20Sopenharmony_ci to continue resync with a broken disk makes no sense at 6148c2ecf20Sopenharmony_ci all */ 6158c2ecf20Sopenharmony_ci drbd_err(device, "Disk broke down during resync!\n"); 6168c2ecf20Sopenharmony_ci return 0; 6178c2ecf20Sopenharmony_ci } 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci if (connection->agreed_features & DRBD_FF_THIN_RESYNC) { 6208c2ecf20Sopenharmony_ci rcu_read_lock(); 6218c2ecf20Sopenharmony_ci discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity; 6228c2ecf20Sopenharmony_ci rcu_read_unlock(); 6238c2ecf20Sopenharmony_ci } 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; 6268c2ecf20Sopenharmony_ci number = drbd_rs_number_requests(device); 6278c2ecf20Sopenharmony_ci if (number <= 0) 6288c2ecf20Sopenharmony_ci goto requeue; 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci for (i = 0; i < number; i++) { 6318c2ecf20Sopenharmony_ci /* Stop generating RS requests when half of the send buffer is filled, 6328c2ecf20Sopenharmony_ci * but notify TCP that we'd like to have more space. */ 6338c2ecf20Sopenharmony_ci mutex_lock(&connection->data.mutex); 6348c2ecf20Sopenharmony_ci if (connection->data.socket) { 6358c2ecf20Sopenharmony_ci struct sock *sk = connection->data.socket->sk; 6368c2ecf20Sopenharmony_ci int queued = sk->sk_wmem_queued; 6378c2ecf20Sopenharmony_ci int sndbuf = sk->sk_sndbuf; 6388c2ecf20Sopenharmony_ci if (queued > sndbuf / 2) { 6398c2ecf20Sopenharmony_ci requeue = 1; 6408c2ecf20Sopenharmony_ci if (sk->sk_socket) 6418c2ecf20Sopenharmony_ci set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 6428c2ecf20Sopenharmony_ci } 6438c2ecf20Sopenharmony_ci } else 6448c2ecf20Sopenharmony_ci requeue = 1; 6458c2ecf20Sopenharmony_ci mutex_unlock(&connection->data.mutex); 6468c2ecf20Sopenharmony_ci if (requeue) 6478c2ecf20Sopenharmony_ci goto requeue; 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_cinext_sector: 6508c2ecf20Sopenharmony_ci size = BM_BLOCK_SIZE; 6518c2ecf20Sopenharmony_ci bit = drbd_bm_find_next(device, device->bm_resync_fo); 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_ci if (bit == DRBD_END_OF_BITMAP) { 6548c2ecf20Sopenharmony_ci device->bm_resync_fo = drbd_bm_bits(device); 6558c2ecf20Sopenharmony_ci put_ldev(device); 6568c2ecf20Sopenharmony_ci return 0; 6578c2ecf20Sopenharmony_ci } 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci sector = BM_BIT_TO_SECT(bit); 6608c2ecf20Sopenharmony_ci 6618c2ecf20Sopenharmony_ci if (drbd_try_rs_begin_io(device, sector)) { 6628c2ecf20Sopenharmony_ci device->bm_resync_fo = bit; 6638c2ecf20Sopenharmony_ci goto requeue; 6648c2ecf20Sopenharmony_ci } 6658c2ecf20Sopenharmony_ci device->bm_resync_fo = bit + 1; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci if (unlikely(drbd_bm_test_bit(device, bit) == 0)) { 6688c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, sector); 6698c2ecf20Sopenharmony_ci goto next_sector; 6708c2ecf20Sopenharmony_ci } 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE 6738c2ecf20Sopenharmony_ci /* try to find some adjacent bits. 6748c2ecf20Sopenharmony_ci * we stop if we have already the maximum req size. 6758c2ecf20Sopenharmony_ci * 6768c2ecf20Sopenharmony_ci * Additionally always align bigger requests, in order to 6778c2ecf20Sopenharmony_ci * be prepared for all stripe sizes of software RAIDs. 6788c2ecf20Sopenharmony_ci */ 6798c2ecf20Sopenharmony_ci align = 1; 6808c2ecf20Sopenharmony_ci rollback_i = i; 6818c2ecf20Sopenharmony_ci while (i < number) { 6828c2ecf20Sopenharmony_ci if (size + BM_BLOCK_SIZE > max_bio_size) 6838c2ecf20Sopenharmony_ci break; 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci /* Be always aligned */ 6868c2ecf20Sopenharmony_ci if (sector & ((1<<(align+3))-1)) 6878c2ecf20Sopenharmony_ci break; 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci if (discard_granularity && size == discard_granularity) 6908c2ecf20Sopenharmony_ci break; 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci /* do not cross extent boundaries */ 6938c2ecf20Sopenharmony_ci if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) 6948c2ecf20Sopenharmony_ci break; 6958c2ecf20Sopenharmony_ci /* now, is it actually dirty, after all? 6968c2ecf20Sopenharmony_ci * caution, drbd_bm_test_bit is tri-state for some 6978c2ecf20Sopenharmony_ci * obscure reason; ( b == 0 ) would get the out-of-band 6988c2ecf20Sopenharmony_ci * only accidentally right because of the "oddly sized" 6998c2ecf20Sopenharmony_ci * adjustment below */ 7008c2ecf20Sopenharmony_ci if (drbd_bm_test_bit(device, bit+1) != 1) 7018c2ecf20Sopenharmony_ci break; 7028c2ecf20Sopenharmony_ci bit++; 7038c2ecf20Sopenharmony_ci size += BM_BLOCK_SIZE; 7048c2ecf20Sopenharmony_ci if ((BM_BLOCK_SIZE << align) <= size) 7058c2ecf20Sopenharmony_ci align++; 7068c2ecf20Sopenharmony_ci i++; 7078c2ecf20Sopenharmony_ci } 7088c2ecf20Sopenharmony_ci /* if we merged some, 7098c2ecf20Sopenharmony_ci * reset the offset to start the next drbd_bm_find_next from */ 7108c2ecf20Sopenharmony_ci if (size > BM_BLOCK_SIZE) 7118c2ecf20Sopenharmony_ci device->bm_resync_fo = bit + 1; 7128c2ecf20Sopenharmony_ci#endif 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci /* adjust very last sectors, in case we are oddly sized */ 7158c2ecf20Sopenharmony_ci if (sector + (size>>9) > capacity) 7168c2ecf20Sopenharmony_ci size = (capacity-sector)<<9; 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ci if (device->use_csums) { 7198c2ecf20Sopenharmony_ci switch (read_for_csum(peer_device, sector, size)) { 7208c2ecf20Sopenharmony_ci case -EIO: /* Disk failure */ 7218c2ecf20Sopenharmony_ci put_ldev(device); 7228c2ecf20Sopenharmony_ci return -EIO; 7238c2ecf20Sopenharmony_ci case -EAGAIN: /* allocation failed, or ldev busy */ 7248c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, sector); 7258c2ecf20Sopenharmony_ci device->bm_resync_fo = BM_SECT_TO_BIT(sector); 7268c2ecf20Sopenharmony_ci i = rollback_i; 7278c2ecf20Sopenharmony_ci goto requeue; 7288c2ecf20Sopenharmony_ci case 0: 7298c2ecf20Sopenharmony_ci /* everything ok */ 7308c2ecf20Sopenharmony_ci break; 7318c2ecf20Sopenharmony_ci default: 7328c2ecf20Sopenharmony_ci BUG(); 7338c2ecf20Sopenharmony_ci } 7348c2ecf20Sopenharmony_ci } else { 7358c2ecf20Sopenharmony_ci int err; 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_ci inc_rs_pending(device); 7388c2ecf20Sopenharmony_ci err = drbd_send_drequest(peer_device, 7398c2ecf20Sopenharmony_ci size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, 7408c2ecf20Sopenharmony_ci sector, size, ID_SYNCER); 7418c2ecf20Sopenharmony_ci if (err) { 7428c2ecf20Sopenharmony_ci drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); 7438c2ecf20Sopenharmony_ci dec_rs_pending(device); 7448c2ecf20Sopenharmony_ci put_ldev(device); 7458c2ecf20Sopenharmony_ci return err; 7468c2ecf20Sopenharmony_ci } 7478c2ecf20Sopenharmony_ci } 7488c2ecf20Sopenharmony_ci } 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci if (device->bm_resync_fo >= drbd_bm_bits(device)) { 7518c2ecf20Sopenharmony_ci /* last syncer _request_ was sent, 7528c2ecf20Sopenharmony_ci * but the P_RS_DATA_REPLY not yet received. sync will end (and 7538c2ecf20Sopenharmony_ci * next sync group will resume), as soon as we receive the last 7548c2ecf20Sopenharmony_ci * resync data block, and the last bit is cleared. 7558c2ecf20Sopenharmony_ci * until then resync "work" is "inactive" ... 7568c2ecf20Sopenharmony_ci */ 7578c2ecf20Sopenharmony_ci put_ldev(device); 7588c2ecf20Sopenharmony_ci return 0; 7598c2ecf20Sopenharmony_ci } 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci requeue: 7628c2ecf20Sopenharmony_ci device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 7638c2ecf20Sopenharmony_ci mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 7648c2ecf20Sopenharmony_ci put_ldev(device); 7658c2ecf20Sopenharmony_ci return 0; 7668c2ecf20Sopenharmony_ci} 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_cistatic int make_ov_request(struct drbd_device *device, int cancel) 7698c2ecf20Sopenharmony_ci{ 7708c2ecf20Sopenharmony_ci int number, i, size; 7718c2ecf20Sopenharmony_ci sector_t sector; 7728c2ecf20Sopenharmony_ci const sector_t capacity = get_capacity(device->vdisk); 7738c2ecf20Sopenharmony_ci bool stop_sector_reached = false; 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci if (unlikely(cancel)) 7768c2ecf20Sopenharmony_ci return 1; 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci number = drbd_rs_number_requests(device); 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_ci sector = device->ov_position; 7818c2ecf20Sopenharmony_ci for (i = 0; i < number; i++) { 7828c2ecf20Sopenharmony_ci if (sector >= capacity) 7838c2ecf20Sopenharmony_ci return 1; 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci /* We check for "finished" only in the reply path: 7868c2ecf20Sopenharmony_ci * w_e_end_ov_reply(). 7878c2ecf20Sopenharmony_ci * We need to send at least one request out. */ 7888c2ecf20Sopenharmony_ci stop_sector_reached = i > 0 7898c2ecf20Sopenharmony_ci && verify_can_do_stop_sector(device) 7908c2ecf20Sopenharmony_ci && sector >= device->ov_stop_sector; 7918c2ecf20Sopenharmony_ci if (stop_sector_reached) 7928c2ecf20Sopenharmony_ci break; 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci size = BM_BLOCK_SIZE; 7958c2ecf20Sopenharmony_ci 7968c2ecf20Sopenharmony_ci if (drbd_try_rs_begin_io(device, sector)) { 7978c2ecf20Sopenharmony_ci device->ov_position = sector; 7988c2ecf20Sopenharmony_ci goto requeue; 7998c2ecf20Sopenharmony_ci } 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci if (sector + (size>>9) > capacity) 8028c2ecf20Sopenharmony_ci size = (capacity-sector)<<9; 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci inc_rs_pending(device); 8058c2ecf20Sopenharmony_ci if (drbd_send_ov_request(first_peer_device(device), sector, size)) { 8068c2ecf20Sopenharmony_ci dec_rs_pending(device); 8078c2ecf20Sopenharmony_ci return 0; 8088c2ecf20Sopenharmony_ci } 8098c2ecf20Sopenharmony_ci sector += BM_SECT_PER_BIT; 8108c2ecf20Sopenharmony_ci } 8118c2ecf20Sopenharmony_ci device->ov_position = sector; 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_ci requeue: 8148c2ecf20Sopenharmony_ci device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 8158c2ecf20Sopenharmony_ci if (i == 0 || !stop_sector_reached) 8168c2ecf20Sopenharmony_ci mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 8178c2ecf20Sopenharmony_ci return 1; 8188c2ecf20Sopenharmony_ci} 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ciint w_ov_finished(struct drbd_work *w, int cancel) 8218c2ecf20Sopenharmony_ci{ 8228c2ecf20Sopenharmony_ci struct drbd_device_work *dw = 8238c2ecf20Sopenharmony_ci container_of(w, struct drbd_device_work, w); 8248c2ecf20Sopenharmony_ci struct drbd_device *device = dw->device; 8258c2ecf20Sopenharmony_ci kfree(dw); 8268c2ecf20Sopenharmony_ci ov_out_of_sync_print(device); 8278c2ecf20Sopenharmony_ci drbd_resync_finished(device); 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci return 0; 8308c2ecf20Sopenharmony_ci} 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_cistatic int w_resync_finished(struct drbd_work *w, int cancel) 8338c2ecf20Sopenharmony_ci{ 8348c2ecf20Sopenharmony_ci struct drbd_device_work *dw = 8358c2ecf20Sopenharmony_ci container_of(w, struct drbd_device_work, w); 8368c2ecf20Sopenharmony_ci struct drbd_device *device = dw->device; 8378c2ecf20Sopenharmony_ci kfree(dw); 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci drbd_resync_finished(device); 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci return 0; 8428c2ecf20Sopenharmony_ci} 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_cistatic void ping_peer(struct drbd_device *device) 8458c2ecf20Sopenharmony_ci{ 8468c2ecf20Sopenharmony_ci struct drbd_connection *connection = first_peer_device(device)->connection; 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci clear_bit(GOT_PING_ACK, &connection->flags); 8498c2ecf20Sopenharmony_ci request_ping(connection); 8508c2ecf20Sopenharmony_ci wait_event(connection->ping_wait, 8518c2ecf20Sopenharmony_ci test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); 8528c2ecf20Sopenharmony_ci} 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ciint drbd_resync_finished(struct drbd_device *device) 8558c2ecf20Sopenharmony_ci{ 8568c2ecf20Sopenharmony_ci struct drbd_connection *connection = first_peer_device(device)->connection; 8578c2ecf20Sopenharmony_ci unsigned long db, dt, dbdt; 8588c2ecf20Sopenharmony_ci unsigned long n_oos; 8598c2ecf20Sopenharmony_ci union drbd_state os, ns; 8608c2ecf20Sopenharmony_ci struct drbd_device_work *dw; 8618c2ecf20Sopenharmony_ci char *khelper_cmd = NULL; 8628c2ecf20Sopenharmony_ci int verify_done = 0; 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_ci /* Remove all elements from the resync LRU. Since future actions 8658c2ecf20Sopenharmony_ci * might set bits in the (main) bitmap, then the entries in the 8668c2ecf20Sopenharmony_ci * resync LRU would be wrong. */ 8678c2ecf20Sopenharmony_ci if (drbd_rs_del_all(device)) { 8688c2ecf20Sopenharmony_ci /* In case this is not possible now, most probably because 8698c2ecf20Sopenharmony_ci * there are P_RS_DATA_REPLY Packets lingering on the worker's 8708c2ecf20Sopenharmony_ci * queue (or even the read operations for those packets 8718c2ecf20Sopenharmony_ci * is not finished by now). Retry in 100ms. */ 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci schedule_timeout_interruptible(HZ / 10); 8748c2ecf20Sopenharmony_ci dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC); 8758c2ecf20Sopenharmony_ci if (dw) { 8768c2ecf20Sopenharmony_ci dw->w.cb = w_resync_finished; 8778c2ecf20Sopenharmony_ci dw->device = device; 8788c2ecf20Sopenharmony_ci drbd_queue_work(&connection->sender_work, &dw->w); 8798c2ecf20Sopenharmony_ci return 1; 8808c2ecf20Sopenharmony_ci } 8818c2ecf20Sopenharmony_ci drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); 8828c2ecf20Sopenharmony_ci } 8838c2ecf20Sopenharmony_ci 8848c2ecf20Sopenharmony_ci dt = (jiffies - device->rs_start - device->rs_paused) / HZ; 8858c2ecf20Sopenharmony_ci if (dt <= 0) 8868c2ecf20Sopenharmony_ci dt = 1; 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci db = device->rs_total; 8898c2ecf20Sopenharmony_ci /* adjust for verify start and stop sectors, respective reached position */ 8908c2ecf20Sopenharmony_ci if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 8918c2ecf20Sopenharmony_ci db -= device->ov_left; 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci dbdt = Bit2KB(db/dt); 8948c2ecf20Sopenharmony_ci device->rs_paused /= HZ; 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci if (!get_ldev(device)) 8978c2ecf20Sopenharmony_ci goto out; 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_ci ping_peer(device); 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 9028c2ecf20Sopenharmony_ci os = drbd_read_state(device); 9038c2ecf20Sopenharmony_ci 9048c2ecf20Sopenharmony_ci verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci /* This protects us against multiple calls (that can happen in the presence 9078c2ecf20Sopenharmony_ci of application IO), and against connectivity loss just before we arrive here. */ 9088c2ecf20Sopenharmony_ci if (os.conn <= C_CONNECTED) 9098c2ecf20Sopenharmony_ci goto out_unlock; 9108c2ecf20Sopenharmony_ci 9118c2ecf20Sopenharmony_ci ns = os; 9128c2ecf20Sopenharmony_ci ns.conn = C_CONNECTED; 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_ci drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 9158c2ecf20Sopenharmony_ci verify_done ? "Online verify" : "Resync", 9168c2ecf20Sopenharmony_ci dt + device->rs_paused, device->rs_paused, dbdt); 9178c2ecf20Sopenharmony_ci 9188c2ecf20Sopenharmony_ci n_oos = drbd_bm_total_weight(device); 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { 9218c2ecf20Sopenharmony_ci if (n_oos) { 9228c2ecf20Sopenharmony_ci drbd_alert(device, "Online verify found %lu %dk block out of sync!\n", 9238c2ecf20Sopenharmony_ci n_oos, Bit2KB(1)); 9248c2ecf20Sopenharmony_ci khelper_cmd = "out-of-sync"; 9258c2ecf20Sopenharmony_ci } 9268c2ecf20Sopenharmony_ci } else { 9278c2ecf20Sopenharmony_ci D_ASSERT(device, (n_oos - device->rs_failed) == 0); 9288c2ecf20Sopenharmony_ci 9298c2ecf20Sopenharmony_ci if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) 9308c2ecf20Sopenharmony_ci khelper_cmd = "after-resync-target"; 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci if (device->use_csums && device->rs_total) { 9338c2ecf20Sopenharmony_ci const unsigned long s = device->rs_same_csum; 9348c2ecf20Sopenharmony_ci const unsigned long t = device->rs_total; 9358c2ecf20Sopenharmony_ci const int ratio = 9368c2ecf20Sopenharmony_ci (t == 0) ? 0 : 9378c2ecf20Sopenharmony_ci (t < 100000) ? ((s*100)/t) : (s/(t/100)); 9388c2ecf20Sopenharmony_ci drbd_info(device, "%u %% had equal checksums, eliminated: %luK; " 9398c2ecf20Sopenharmony_ci "transferred %luK total %luK\n", 9408c2ecf20Sopenharmony_ci ratio, 9418c2ecf20Sopenharmony_ci Bit2KB(device->rs_same_csum), 9428c2ecf20Sopenharmony_ci Bit2KB(device->rs_total - device->rs_same_csum), 9438c2ecf20Sopenharmony_ci Bit2KB(device->rs_total)); 9448c2ecf20Sopenharmony_ci } 9458c2ecf20Sopenharmony_ci } 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ci if (device->rs_failed) { 9488c2ecf20Sopenharmony_ci drbd_info(device, " %lu failed blocks\n", device->rs_failed); 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 9518c2ecf20Sopenharmony_ci ns.disk = D_INCONSISTENT; 9528c2ecf20Sopenharmony_ci ns.pdsk = D_UP_TO_DATE; 9538c2ecf20Sopenharmony_ci } else { 9548c2ecf20Sopenharmony_ci ns.disk = D_UP_TO_DATE; 9558c2ecf20Sopenharmony_ci ns.pdsk = D_INCONSISTENT; 9568c2ecf20Sopenharmony_ci } 9578c2ecf20Sopenharmony_ci } else { 9588c2ecf20Sopenharmony_ci ns.disk = D_UP_TO_DATE; 9598c2ecf20Sopenharmony_ci ns.pdsk = D_UP_TO_DATE; 9608c2ecf20Sopenharmony_ci 9618c2ecf20Sopenharmony_ci if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 9628c2ecf20Sopenharmony_ci if (device->p_uuid) { 9638c2ecf20Sopenharmony_ci int i; 9648c2ecf20Sopenharmony_ci for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) 9658c2ecf20Sopenharmony_ci _drbd_uuid_set(device, i, device->p_uuid[i]); 9668c2ecf20Sopenharmony_ci drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]); 9678c2ecf20Sopenharmony_ci _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]); 9688c2ecf20Sopenharmony_ci } else { 9698c2ecf20Sopenharmony_ci drbd_err(device, "device->p_uuid is NULL! BUG\n"); 9708c2ecf20Sopenharmony_ci } 9718c2ecf20Sopenharmony_ci } 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { 9748c2ecf20Sopenharmony_ci /* for verify runs, we don't update uuids here, 9758c2ecf20Sopenharmony_ci * so there would be nothing to report. */ 9768c2ecf20Sopenharmony_ci drbd_uuid_set_bm(device, 0UL); 9778c2ecf20Sopenharmony_ci drbd_print_uuids(device, "updated UUIDs"); 9788c2ecf20Sopenharmony_ci if (device->p_uuid) { 9798c2ecf20Sopenharmony_ci /* Now the two UUID sets are equal, update what we 9808c2ecf20Sopenharmony_ci * know of the peer. */ 9818c2ecf20Sopenharmony_ci int i; 9828c2ecf20Sopenharmony_ci for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) 9838c2ecf20Sopenharmony_ci device->p_uuid[i] = device->ldev->md.uuid[i]; 9848c2ecf20Sopenharmony_ci } 9858c2ecf20Sopenharmony_ci } 9868c2ecf20Sopenharmony_ci } 9878c2ecf20Sopenharmony_ci 9888c2ecf20Sopenharmony_ci _drbd_set_state(device, ns, CS_VERBOSE, NULL); 9898c2ecf20Sopenharmony_ciout_unlock: 9908c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_ci /* If we have been sync source, and have an effective fencing-policy, 9938c2ecf20Sopenharmony_ci * once *all* volumes are back in sync, call "unfence". */ 9948c2ecf20Sopenharmony_ci if (os.conn == C_SYNC_SOURCE) { 9958c2ecf20Sopenharmony_ci enum drbd_disk_state disk_state = D_MASK; 9968c2ecf20Sopenharmony_ci enum drbd_disk_state pdsk_state = D_MASK; 9978c2ecf20Sopenharmony_ci enum drbd_fencing_p fp = FP_DONT_CARE; 9988c2ecf20Sopenharmony_ci 9998c2ecf20Sopenharmony_ci rcu_read_lock(); 10008c2ecf20Sopenharmony_ci fp = rcu_dereference(device->ldev->disk_conf)->fencing; 10018c2ecf20Sopenharmony_ci if (fp != FP_DONT_CARE) { 10028c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 10038c2ecf20Sopenharmony_ci int vnr; 10048c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 10058c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 10068c2ecf20Sopenharmony_ci disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk); 10078c2ecf20Sopenharmony_ci pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk); 10088c2ecf20Sopenharmony_ci } 10098c2ecf20Sopenharmony_ci } 10108c2ecf20Sopenharmony_ci rcu_read_unlock(); 10118c2ecf20Sopenharmony_ci if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE) 10128c2ecf20Sopenharmony_ci conn_khelper(connection, "unfence-peer"); 10138c2ecf20Sopenharmony_ci } 10148c2ecf20Sopenharmony_ci 10158c2ecf20Sopenharmony_ci put_ldev(device); 10168c2ecf20Sopenharmony_ciout: 10178c2ecf20Sopenharmony_ci device->rs_total = 0; 10188c2ecf20Sopenharmony_ci device->rs_failed = 0; 10198c2ecf20Sopenharmony_ci device->rs_paused = 0; 10208c2ecf20Sopenharmony_ci 10218c2ecf20Sopenharmony_ci /* reset start sector, if we reached end of device */ 10228c2ecf20Sopenharmony_ci if (verify_done && device->ov_left == 0) 10238c2ecf20Sopenharmony_ci device->ov_start_sector = 0; 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_ci drbd_md_sync(device); 10268c2ecf20Sopenharmony_ci 10278c2ecf20Sopenharmony_ci if (khelper_cmd) 10288c2ecf20Sopenharmony_ci drbd_khelper(device, khelper_cmd); 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_ci return 1; 10318c2ecf20Sopenharmony_ci} 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_ci/* helper */ 10348c2ecf20Sopenharmony_cistatic void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) 10358c2ecf20Sopenharmony_ci{ 10368c2ecf20Sopenharmony_ci if (drbd_peer_req_has_active_page(peer_req)) { 10378c2ecf20Sopenharmony_ci /* This might happen if sendpage() has not finished */ 10388c2ecf20Sopenharmony_ci int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; 10398c2ecf20Sopenharmony_ci atomic_add(i, &device->pp_in_use_by_net); 10408c2ecf20Sopenharmony_ci atomic_sub(i, &device->pp_in_use); 10418c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 10428c2ecf20Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->net_ee); 10438c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 10448c2ecf20Sopenharmony_ci wake_up(&drbd_pp_wait); 10458c2ecf20Sopenharmony_ci } else 10468c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 10478c2ecf20Sopenharmony_ci} 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_ci/** 10508c2ecf20Sopenharmony_ci * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST 10518c2ecf20Sopenharmony_ci * @w: work object. 10528c2ecf20Sopenharmony_ci * @cancel: The connection will be closed anyways 10538c2ecf20Sopenharmony_ci */ 10548c2ecf20Sopenharmony_ciint w_e_end_data_req(struct drbd_work *w, int cancel) 10558c2ecf20Sopenharmony_ci{ 10568c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 10578c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 10588c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 10598c2ecf20Sopenharmony_ci int err; 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_ci if (unlikely(cancel)) { 10628c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 10638c2ecf20Sopenharmony_ci dec_unacked(device); 10648c2ecf20Sopenharmony_ci return 0; 10658c2ecf20Sopenharmony_ci } 10668c2ecf20Sopenharmony_ci 10678c2ecf20Sopenharmony_ci if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 10688c2ecf20Sopenharmony_ci err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req); 10698c2ecf20Sopenharmony_ci } else { 10708c2ecf20Sopenharmony_ci if (__ratelimit(&drbd_ratelimit_state)) 10718c2ecf20Sopenharmony_ci drbd_err(device, "Sending NegDReply. sector=%llus.\n", 10728c2ecf20Sopenharmony_ci (unsigned long long)peer_req->i.sector); 10738c2ecf20Sopenharmony_ci 10748c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); 10758c2ecf20Sopenharmony_ci } 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_ci dec_unacked(device); 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci move_to_net_ee_or_free(device, peer_req); 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci if (unlikely(err)) 10828c2ecf20Sopenharmony_ci drbd_err(device, "drbd_send_block() failed\n"); 10838c2ecf20Sopenharmony_ci return err; 10848c2ecf20Sopenharmony_ci} 10858c2ecf20Sopenharmony_ci 10868c2ecf20Sopenharmony_cistatic bool all_zero(struct drbd_peer_request *peer_req) 10878c2ecf20Sopenharmony_ci{ 10888c2ecf20Sopenharmony_ci struct page *page = peer_req->pages; 10898c2ecf20Sopenharmony_ci unsigned int len = peer_req->i.size; 10908c2ecf20Sopenharmony_ci 10918c2ecf20Sopenharmony_ci page_chain_for_each(page) { 10928c2ecf20Sopenharmony_ci unsigned int l = min_t(unsigned int, len, PAGE_SIZE); 10938c2ecf20Sopenharmony_ci unsigned int i, words = l / sizeof(long); 10948c2ecf20Sopenharmony_ci unsigned long *d; 10958c2ecf20Sopenharmony_ci 10968c2ecf20Sopenharmony_ci d = kmap_atomic(page); 10978c2ecf20Sopenharmony_ci for (i = 0; i < words; i++) { 10988c2ecf20Sopenharmony_ci if (d[i]) { 10998c2ecf20Sopenharmony_ci kunmap_atomic(d); 11008c2ecf20Sopenharmony_ci return false; 11018c2ecf20Sopenharmony_ci } 11028c2ecf20Sopenharmony_ci } 11038c2ecf20Sopenharmony_ci kunmap_atomic(d); 11048c2ecf20Sopenharmony_ci len -= l; 11058c2ecf20Sopenharmony_ci } 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci return true; 11088c2ecf20Sopenharmony_ci} 11098c2ecf20Sopenharmony_ci 11108c2ecf20Sopenharmony_ci/** 11118c2ecf20Sopenharmony_ci * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST 11128c2ecf20Sopenharmony_ci * @w: work object. 11138c2ecf20Sopenharmony_ci * @cancel: The connection will be closed anyways 11148c2ecf20Sopenharmony_ci */ 11158c2ecf20Sopenharmony_ciint w_e_end_rsdata_req(struct drbd_work *w, int cancel) 11168c2ecf20Sopenharmony_ci{ 11178c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11188c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 11198c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 11208c2ecf20Sopenharmony_ci int err; 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci if (unlikely(cancel)) { 11238c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 11248c2ecf20Sopenharmony_ci dec_unacked(device); 11258c2ecf20Sopenharmony_ci return 0; 11268c2ecf20Sopenharmony_ci } 11278c2ecf20Sopenharmony_ci 11288c2ecf20Sopenharmony_ci if (get_ldev_if_state(device, D_FAILED)) { 11298c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, peer_req->i.sector); 11308c2ecf20Sopenharmony_ci put_ldev(device); 11318c2ecf20Sopenharmony_ci } 11328c2ecf20Sopenharmony_ci 11338c2ecf20Sopenharmony_ci if (device->state.conn == C_AHEAD) { 11348c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); 11358c2ecf20Sopenharmony_ci } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 11368c2ecf20Sopenharmony_ci if (likely(device->state.pdsk >= D_INCONSISTENT)) { 11378c2ecf20Sopenharmony_ci inc_rs_pending(device); 11388c2ecf20Sopenharmony_ci if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) 11398c2ecf20Sopenharmony_ci err = drbd_send_rs_deallocated(peer_device, peer_req); 11408c2ecf20Sopenharmony_ci else 11418c2ecf20Sopenharmony_ci err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 11428c2ecf20Sopenharmony_ci } else { 11438c2ecf20Sopenharmony_ci if (__ratelimit(&drbd_ratelimit_state)) 11448c2ecf20Sopenharmony_ci drbd_err(device, "Not sending RSDataReply, " 11458c2ecf20Sopenharmony_ci "partner DISKLESS!\n"); 11468c2ecf20Sopenharmony_ci err = 0; 11478c2ecf20Sopenharmony_ci } 11488c2ecf20Sopenharmony_ci } else { 11498c2ecf20Sopenharmony_ci if (__ratelimit(&drbd_ratelimit_state)) 11508c2ecf20Sopenharmony_ci drbd_err(device, "Sending NegRSDReply. sector %llus.\n", 11518c2ecf20Sopenharmony_ci (unsigned long long)peer_req->i.sector); 11528c2ecf20Sopenharmony_ci 11538c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 11548c2ecf20Sopenharmony_ci 11558c2ecf20Sopenharmony_ci /* update resync data with failure */ 11568c2ecf20Sopenharmony_ci drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); 11578c2ecf20Sopenharmony_ci } 11588c2ecf20Sopenharmony_ci 11598c2ecf20Sopenharmony_ci dec_unacked(device); 11608c2ecf20Sopenharmony_ci 11618c2ecf20Sopenharmony_ci move_to_net_ee_or_free(device, peer_req); 11628c2ecf20Sopenharmony_ci 11638c2ecf20Sopenharmony_ci if (unlikely(err)) 11648c2ecf20Sopenharmony_ci drbd_err(device, "drbd_send_block() failed\n"); 11658c2ecf20Sopenharmony_ci return err; 11668c2ecf20Sopenharmony_ci} 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ciint w_e_end_csum_rs_req(struct drbd_work *w, int cancel) 11698c2ecf20Sopenharmony_ci{ 11708c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11718c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 11728c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 11738c2ecf20Sopenharmony_ci struct digest_info *di; 11748c2ecf20Sopenharmony_ci int digest_size; 11758c2ecf20Sopenharmony_ci void *digest = NULL; 11768c2ecf20Sopenharmony_ci int err, eq = 0; 11778c2ecf20Sopenharmony_ci 11788c2ecf20Sopenharmony_ci if (unlikely(cancel)) { 11798c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 11808c2ecf20Sopenharmony_ci dec_unacked(device); 11818c2ecf20Sopenharmony_ci return 0; 11828c2ecf20Sopenharmony_ci } 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci if (get_ldev(device)) { 11858c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, peer_req->i.sector); 11868c2ecf20Sopenharmony_ci put_ldev(device); 11878c2ecf20Sopenharmony_ci } 11888c2ecf20Sopenharmony_ci 11898c2ecf20Sopenharmony_ci di = peer_req->digest; 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 11928c2ecf20Sopenharmony_ci /* quick hack to try to avoid a race against reconfiguration. 11938c2ecf20Sopenharmony_ci * a real fix would be much more involved, 11948c2ecf20Sopenharmony_ci * introducing more locking mechanisms */ 11958c2ecf20Sopenharmony_ci if (peer_device->connection->csums_tfm) { 11968c2ecf20Sopenharmony_ci digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm); 11978c2ecf20Sopenharmony_ci D_ASSERT(device, digest_size == di->digest_size); 11988c2ecf20Sopenharmony_ci digest = kmalloc(digest_size, GFP_NOIO); 11998c2ecf20Sopenharmony_ci } 12008c2ecf20Sopenharmony_ci if (digest) { 12018c2ecf20Sopenharmony_ci drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 12028c2ecf20Sopenharmony_ci eq = !memcmp(digest, di->digest, digest_size); 12038c2ecf20Sopenharmony_ci kfree(digest); 12048c2ecf20Sopenharmony_ci } 12058c2ecf20Sopenharmony_ci 12068c2ecf20Sopenharmony_ci if (eq) { 12078c2ecf20Sopenharmony_ci drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); 12088c2ecf20Sopenharmony_ci /* rs_same_csums unit is BM_BLOCK_SIZE */ 12098c2ecf20Sopenharmony_ci device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; 12108c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); 12118c2ecf20Sopenharmony_ci } else { 12128c2ecf20Sopenharmony_ci inc_rs_pending(device); 12138c2ecf20Sopenharmony_ci peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ 12148c2ecf20Sopenharmony_ci peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ 12158c2ecf20Sopenharmony_ci kfree(di); 12168c2ecf20Sopenharmony_ci err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 12178c2ecf20Sopenharmony_ci } 12188c2ecf20Sopenharmony_ci } else { 12198c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 12208c2ecf20Sopenharmony_ci if (__ratelimit(&drbd_ratelimit_state)) 12218c2ecf20Sopenharmony_ci drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); 12228c2ecf20Sopenharmony_ci } 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_ci dec_unacked(device); 12258c2ecf20Sopenharmony_ci move_to_net_ee_or_free(device, peer_req); 12268c2ecf20Sopenharmony_ci 12278c2ecf20Sopenharmony_ci if (unlikely(err)) 12288c2ecf20Sopenharmony_ci drbd_err(device, "drbd_send_block/ack() failed\n"); 12298c2ecf20Sopenharmony_ci return err; 12308c2ecf20Sopenharmony_ci} 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ciint w_e_end_ov_req(struct drbd_work *w, int cancel) 12338c2ecf20Sopenharmony_ci{ 12348c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 12358c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 12368c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 12378c2ecf20Sopenharmony_ci sector_t sector = peer_req->i.sector; 12388c2ecf20Sopenharmony_ci unsigned int size = peer_req->i.size; 12398c2ecf20Sopenharmony_ci int digest_size; 12408c2ecf20Sopenharmony_ci void *digest; 12418c2ecf20Sopenharmony_ci int err = 0; 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ci if (unlikely(cancel)) 12448c2ecf20Sopenharmony_ci goto out; 12458c2ecf20Sopenharmony_ci 12468c2ecf20Sopenharmony_ci digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm); 12478c2ecf20Sopenharmony_ci digest = kmalloc(digest_size, GFP_NOIO); 12488c2ecf20Sopenharmony_ci if (!digest) { 12498c2ecf20Sopenharmony_ci err = 1; /* terminate the connection in case the allocation failed */ 12508c2ecf20Sopenharmony_ci goto out; 12518c2ecf20Sopenharmony_ci } 12528c2ecf20Sopenharmony_ci 12538c2ecf20Sopenharmony_ci if (likely(!(peer_req->flags & EE_WAS_ERROR))) 12548c2ecf20Sopenharmony_ci drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 12558c2ecf20Sopenharmony_ci else 12568c2ecf20Sopenharmony_ci memset(digest, 0, digest_size); 12578c2ecf20Sopenharmony_ci 12588c2ecf20Sopenharmony_ci /* Free e and pages before send. 12598c2ecf20Sopenharmony_ci * In case we block on congestion, we could otherwise run into 12608c2ecf20Sopenharmony_ci * some distributed deadlock, if the other side blocks on 12618c2ecf20Sopenharmony_ci * congestion as well, because our receiver blocks in 12628c2ecf20Sopenharmony_ci * drbd_alloc_pages due to pp_in_use > max_buffers. */ 12638c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 12648c2ecf20Sopenharmony_ci peer_req = NULL; 12658c2ecf20Sopenharmony_ci inc_rs_pending(device); 12668c2ecf20Sopenharmony_ci err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); 12678c2ecf20Sopenharmony_ci if (err) 12688c2ecf20Sopenharmony_ci dec_rs_pending(device); 12698c2ecf20Sopenharmony_ci kfree(digest); 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_ciout: 12728c2ecf20Sopenharmony_ci if (peer_req) 12738c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 12748c2ecf20Sopenharmony_ci dec_unacked(device); 12758c2ecf20Sopenharmony_ci return err; 12768c2ecf20Sopenharmony_ci} 12778c2ecf20Sopenharmony_ci 12788c2ecf20Sopenharmony_civoid drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) 12798c2ecf20Sopenharmony_ci{ 12808c2ecf20Sopenharmony_ci if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { 12818c2ecf20Sopenharmony_ci device->ov_last_oos_size += size>>9; 12828c2ecf20Sopenharmony_ci } else { 12838c2ecf20Sopenharmony_ci device->ov_last_oos_start = sector; 12848c2ecf20Sopenharmony_ci device->ov_last_oos_size = size>>9; 12858c2ecf20Sopenharmony_ci } 12868c2ecf20Sopenharmony_ci drbd_set_out_of_sync(device, sector, size); 12878c2ecf20Sopenharmony_ci} 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_ciint w_e_end_ov_reply(struct drbd_work *w, int cancel) 12908c2ecf20Sopenharmony_ci{ 12918c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 12928c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 12938c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 12948c2ecf20Sopenharmony_ci struct digest_info *di; 12958c2ecf20Sopenharmony_ci void *digest; 12968c2ecf20Sopenharmony_ci sector_t sector = peer_req->i.sector; 12978c2ecf20Sopenharmony_ci unsigned int size = peer_req->i.size; 12988c2ecf20Sopenharmony_ci int digest_size; 12998c2ecf20Sopenharmony_ci int err, eq = 0; 13008c2ecf20Sopenharmony_ci bool stop_sector_reached = false; 13018c2ecf20Sopenharmony_ci 13028c2ecf20Sopenharmony_ci if (unlikely(cancel)) { 13038c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 13048c2ecf20Sopenharmony_ci dec_unacked(device); 13058c2ecf20Sopenharmony_ci return 0; 13068c2ecf20Sopenharmony_ci } 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all 13098c2ecf20Sopenharmony_ci * the resync lru has been cleaned up already */ 13108c2ecf20Sopenharmony_ci if (get_ldev(device)) { 13118c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, peer_req->i.sector); 13128c2ecf20Sopenharmony_ci put_ldev(device); 13138c2ecf20Sopenharmony_ci } 13148c2ecf20Sopenharmony_ci 13158c2ecf20Sopenharmony_ci di = peer_req->digest; 13168c2ecf20Sopenharmony_ci 13178c2ecf20Sopenharmony_ci if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 13188c2ecf20Sopenharmony_ci digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm); 13198c2ecf20Sopenharmony_ci digest = kmalloc(digest_size, GFP_NOIO); 13208c2ecf20Sopenharmony_ci if (digest) { 13218c2ecf20Sopenharmony_ci drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 13228c2ecf20Sopenharmony_ci 13238c2ecf20Sopenharmony_ci D_ASSERT(device, digest_size == di->digest_size); 13248c2ecf20Sopenharmony_ci eq = !memcmp(digest, di->digest, digest_size); 13258c2ecf20Sopenharmony_ci kfree(digest); 13268c2ecf20Sopenharmony_ci } 13278c2ecf20Sopenharmony_ci } 13288c2ecf20Sopenharmony_ci 13298c2ecf20Sopenharmony_ci /* Free peer_req and pages before send. 13308c2ecf20Sopenharmony_ci * In case we block on congestion, we could otherwise run into 13318c2ecf20Sopenharmony_ci * some distributed deadlock, if the other side blocks on 13328c2ecf20Sopenharmony_ci * congestion as well, because our receiver blocks in 13338c2ecf20Sopenharmony_ci * drbd_alloc_pages due to pp_in_use > max_buffers. */ 13348c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 13358c2ecf20Sopenharmony_ci if (!eq) 13368c2ecf20Sopenharmony_ci drbd_ov_out_of_sync_found(device, sector, size); 13378c2ecf20Sopenharmony_ci else 13388c2ecf20Sopenharmony_ci ov_out_of_sync_print(device); 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_ci err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, 13418c2ecf20Sopenharmony_ci eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_ci dec_unacked(device); 13448c2ecf20Sopenharmony_ci 13458c2ecf20Sopenharmony_ci --device->ov_left; 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci /* let's advance progress step marks only for every other megabyte */ 13488c2ecf20Sopenharmony_ci if ((device->ov_left & 0x200) == 0x200) 13498c2ecf20Sopenharmony_ci drbd_advance_rs_marks(device, device->ov_left); 13508c2ecf20Sopenharmony_ci 13518c2ecf20Sopenharmony_ci stop_sector_reached = verify_can_do_stop_sector(device) && 13528c2ecf20Sopenharmony_ci (sector + (size>>9)) >= device->ov_stop_sector; 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ci if (device->ov_left == 0 || stop_sector_reached) { 13558c2ecf20Sopenharmony_ci ov_out_of_sync_print(device); 13568c2ecf20Sopenharmony_ci drbd_resync_finished(device); 13578c2ecf20Sopenharmony_ci } 13588c2ecf20Sopenharmony_ci 13598c2ecf20Sopenharmony_ci return err; 13608c2ecf20Sopenharmony_ci} 13618c2ecf20Sopenharmony_ci 13628c2ecf20Sopenharmony_ci/* FIXME 13638c2ecf20Sopenharmony_ci * We need to track the number of pending barrier acks, 13648c2ecf20Sopenharmony_ci * and to be able to wait for them. 13658c2ecf20Sopenharmony_ci * See also comment in drbd_adm_attach before drbd_suspend_io. 13668c2ecf20Sopenharmony_ci */ 13678c2ecf20Sopenharmony_cistatic int drbd_send_barrier(struct drbd_connection *connection) 13688c2ecf20Sopenharmony_ci{ 13698c2ecf20Sopenharmony_ci struct p_barrier *p; 13708c2ecf20Sopenharmony_ci struct drbd_socket *sock; 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci sock = &connection->data; 13738c2ecf20Sopenharmony_ci p = conn_prepare_command(connection, sock); 13748c2ecf20Sopenharmony_ci if (!p) 13758c2ecf20Sopenharmony_ci return -EIO; 13768c2ecf20Sopenharmony_ci p->barrier = connection->send.current_epoch_nr; 13778c2ecf20Sopenharmony_ci p->pad = 0; 13788c2ecf20Sopenharmony_ci connection->send.current_epoch_writes = 0; 13798c2ecf20Sopenharmony_ci connection->send.last_sent_barrier_jif = jiffies; 13808c2ecf20Sopenharmony_ci 13818c2ecf20Sopenharmony_ci return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); 13828c2ecf20Sopenharmony_ci} 13838c2ecf20Sopenharmony_ci 13848c2ecf20Sopenharmony_cistatic int pd_send_unplug_remote(struct drbd_peer_device *pd) 13858c2ecf20Sopenharmony_ci{ 13868c2ecf20Sopenharmony_ci struct drbd_socket *sock = &pd->connection->data; 13878c2ecf20Sopenharmony_ci if (!drbd_prepare_command(pd, sock)) 13888c2ecf20Sopenharmony_ci return -EIO; 13898c2ecf20Sopenharmony_ci return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0); 13908c2ecf20Sopenharmony_ci} 13918c2ecf20Sopenharmony_ci 13928c2ecf20Sopenharmony_ciint w_send_write_hint(struct drbd_work *w, int cancel) 13938c2ecf20Sopenharmony_ci{ 13948c2ecf20Sopenharmony_ci struct drbd_device *device = 13958c2ecf20Sopenharmony_ci container_of(w, struct drbd_device, unplug_work); 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci if (cancel) 13988c2ecf20Sopenharmony_ci return 0; 13998c2ecf20Sopenharmony_ci return pd_send_unplug_remote(first_peer_device(device)); 14008c2ecf20Sopenharmony_ci} 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_cistatic void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch) 14038c2ecf20Sopenharmony_ci{ 14048c2ecf20Sopenharmony_ci if (!connection->send.seen_any_write_yet) { 14058c2ecf20Sopenharmony_ci connection->send.seen_any_write_yet = true; 14068c2ecf20Sopenharmony_ci connection->send.current_epoch_nr = epoch; 14078c2ecf20Sopenharmony_ci connection->send.current_epoch_writes = 0; 14088c2ecf20Sopenharmony_ci connection->send.last_sent_barrier_jif = jiffies; 14098c2ecf20Sopenharmony_ci } 14108c2ecf20Sopenharmony_ci} 14118c2ecf20Sopenharmony_ci 14128c2ecf20Sopenharmony_cistatic void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch) 14138c2ecf20Sopenharmony_ci{ 14148c2ecf20Sopenharmony_ci /* re-init if first write on this connection */ 14158c2ecf20Sopenharmony_ci if (!connection->send.seen_any_write_yet) 14168c2ecf20Sopenharmony_ci return; 14178c2ecf20Sopenharmony_ci if (connection->send.current_epoch_nr != epoch) { 14188c2ecf20Sopenharmony_ci if (connection->send.current_epoch_writes) 14198c2ecf20Sopenharmony_ci drbd_send_barrier(connection); 14208c2ecf20Sopenharmony_ci connection->send.current_epoch_nr = epoch; 14218c2ecf20Sopenharmony_ci } 14228c2ecf20Sopenharmony_ci} 14238c2ecf20Sopenharmony_ci 14248c2ecf20Sopenharmony_ciint w_send_out_of_sync(struct drbd_work *w, int cancel) 14258c2ecf20Sopenharmony_ci{ 14268c2ecf20Sopenharmony_ci struct drbd_request *req = container_of(w, struct drbd_request, w); 14278c2ecf20Sopenharmony_ci struct drbd_device *device = req->device; 14288c2ecf20Sopenharmony_ci struct drbd_peer_device *const peer_device = first_peer_device(device); 14298c2ecf20Sopenharmony_ci struct drbd_connection *const connection = peer_device->connection; 14308c2ecf20Sopenharmony_ci int err; 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_ci if (unlikely(cancel)) { 14338c2ecf20Sopenharmony_ci req_mod(req, SEND_CANCELED); 14348c2ecf20Sopenharmony_ci return 0; 14358c2ecf20Sopenharmony_ci } 14368c2ecf20Sopenharmony_ci req->pre_send_jif = jiffies; 14378c2ecf20Sopenharmony_ci 14388c2ecf20Sopenharmony_ci /* this time, no connection->send.current_epoch_writes++; 14398c2ecf20Sopenharmony_ci * If it was sent, it was the closing barrier for the last 14408c2ecf20Sopenharmony_ci * replicated epoch, before we went into AHEAD mode. 14418c2ecf20Sopenharmony_ci * No more barriers will be sent, until we leave AHEAD mode again. */ 14428c2ecf20Sopenharmony_ci maybe_send_barrier(connection, req->epoch); 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_ci err = drbd_send_out_of_sync(peer_device, req); 14458c2ecf20Sopenharmony_ci req_mod(req, OOS_HANDED_TO_NETWORK); 14468c2ecf20Sopenharmony_ci 14478c2ecf20Sopenharmony_ci return err; 14488c2ecf20Sopenharmony_ci} 14498c2ecf20Sopenharmony_ci 14508c2ecf20Sopenharmony_ci/** 14518c2ecf20Sopenharmony_ci * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request 14528c2ecf20Sopenharmony_ci * @w: work object. 14538c2ecf20Sopenharmony_ci * @cancel: The connection will be closed anyways 14548c2ecf20Sopenharmony_ci */ 14558c2ecf20Sopenharmony_ciint w_send_dblock(struct drbd_work *w, int cancel) 14568c2ecf20Sopenharmony_ci{ 14578c2ecf20Sopenharmony_ci struct drbd_request *req = container_of(w, struct drbd_request, w); 14588c2ecf20Sopenharmony_ci struct drbd_device *device = req->device; 14598c2ecf20Sopenharmony_ci struct drbd_peer_device *const peer_device = first_peer_device(device); 14608c2ecf20Sopenharmony_ci struct drbd_connection *connection = peer_device->connection; 14618c2ecf20Sopenharmony_ci bool do_send_unplug = req->rq_state & RQ_UNPLUG; 14628c2ecf20Sopenharmony_ci int err; 14638c2ecf20Sopenharmony_ci 14648c2ecf20Sopenharmony_ci if (unlikely(cancel)) { 14658c2ecf20Sopenharmony_ci req_mod(req, SEND_CANCELED); 14668c2ecf20Sopenharmony_ci return 0; 14678c2ecf20Sopenharmony_ci } 14688c2ecf20Sopenharmony_ci req->pre_send_jif = jiffies; 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_ci re_init_if_first_write(connection, req->epoch); 14718c2ecf20Sopenharmony_ci maybe_send_barrier(connection, req->epoch); 14728c2ecf20Sopenharmony_ci connection->send.current_epoch_writes++; 14738c2ecf20Sopenharmony_ci 14748c2ecf20Sopenharmony_ci err = drbd_send_dblock(peer_device, req); 14758c2ecf20Sopenharmony_ci req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 14768c2ecf20Sopenharmony_ci 14778c2ecf20Sopenharmony_ci if (do_send_unplug && !err) 14788c2ecf20Sopenharmony_ci pd_send_unplug_remote(peer_device); 14798c2ecf20Sopenharmony_ci 14808c2ecf20Sopenharmony_ci return err; 14818c2ecf20Sopenharmony_ci} 14828c2ecf20Sopenharmony_ci 14838c2ecf20Sopenharmony_ci/** 14848c2ecf20Sopenharmony_ci * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet 14858c2ecf20Sopenharmony_ci * @w: work object. 14868c2ecf20Sopenharmony_ci * @cancel: The connection will be closed anyways 14878c2ecf20Sopenharmony_ci */ 14888c2ecf20Sopenharmony_ciint w_send_read_req(struct drbd_work *w, int cancel) 14898c2ecf20Sopenharmony_ci{ 14908c2ecf20Sopenharmony_ci struct drbd_request *req = container_of(w, struct drbd_request, w); 14918c2ecf20Sopenharmony_ci struct drbd_device *device = req->device; 14928c2ecf20Sopenharmony_ci struct drbd_peer_device *const peer_device = first_peer_device(device); 14938c2ecf20Sopenharmony_ci struct drbd_connection *connection = peer_device->connection; 14948c2ecf20Sopenharmony_ci bool do_send_unplug = req->rq_state & RQ_UNPLUG; 14958c2ecf20Sopenharmony_ci int err; 14968c2ecf20Sopenharmony_ci 14978c2ecf20Sopenharmony_ci if (unlikely(cancel)) { 14988c2ecf20Sopenharmony_ci req_mod(req, SEND_CANCELED); 14998c2ecf20Sopenharmony_ci return 0; 15008c2ecf20Sopenharmony_ci } 15018c2ecf20Sopenharmony_ci req->pre_send_jif = jiffies; 15028c2ecf20Sopenharmony_ci 15038c2ecf20Sopenharmony_ci /* Even read requests may close a write epoch, 15048c2ecf20Sopenharmony_ci * if there was any yet. */ 15058c2ecf20Sopenharmony_ci maybe_send_barrier(connection, req->epoch); 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, 15088c2ecf20Sopenharmony_ci (unsigned long)req); 15098c2ecf20Sopenharmony_ci 15108c2ecf20Sopenharmony_ci req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 15118c2ecf20Sopenharmony_ci 15128c2ecf20Sopenharmony_ci if (do_send_unplug && !err) 15138c2ecf20Sopenharmony_ci pd_send_unplug_remote(peer_device); 15148c2ecf20Sopenharmony_ci 15158c2ecf20Sopenharmony_ci return err; 15168c2ecf20Sopenharmony_ci} 15178c2ecf20Sopenharmony_ci 15188c2ecf20Sopenharmony_ciint w_restart_disk_io(struct drbd_work *w, int cancel) 15198c2ecf20Sopenharmony_ci{ 15208c2ecf20Sopenharmony_ci struct drbd_request *req = container_of(w, struct drbd_request, w); 15218c2ecf20Sopenharmony_ci struct drbd_device *device = req->device; 15228c2ecf20Sopenharmony_ci 15238c2ecf20Sopenharmony_ci if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) 15248c2ecf20Sopenharmony_ci drbd_al_begin_io(device, &req->i); 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_ci drbd_req_make_private_bio(req, req->master_bio); 15278c2ecf20Sopenharmony_ci bio_set_dev(req->private_bio, device->ldev->backing_bdev); 15288c2ecf20Sopenharmony_ci submit_bio_noacct(req->private_bio); 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci return 0; 15318c2ecf20Sopenharmony_ci} 15328c2ecf20Sopenharmony_ci 15338c2ecf20Sopenharmony_cistatic int _drbd_may_sync_now(struct drbd_device *device) 15348c2ecf20Sopenharmony_ci{ 15358c2ecf20Sopenharmony_ci struct drbd_device *odev = device; 15368c2ecf20Sopenharmony_ci int resync_after; 15378c2ecf20Sopenharmony_ci 15388c2ecf20Sopenharmony_ci while (1) { 15398c2ecf20Sopenharmony_ci if (!odev->ldev || odev->state.disk == D_DISKLESS) 15408c2ecf20Sopenharmony_ci return 1; 15418c2ecf20Sopenharmony_ci rcu_read_lock(); 15428c2ecf20Sopenharmony_ci resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 15438c2ecf20Sopenharmony_ci rcu_read_unlock(); 15448c2ecf20Sopenharmony_ci if (resync_after == -1) 15458c2ecf20Sopenharmony_ci return 1; 15468c2ecf20Sopenharmony_ci odev = minor_to_device(resync_after); 15478c2ecf20Sopenharmony_ci if (!odev) 15488c2ecf20Sopenharmony_ci return 1; 15498c2ecf20Sopenharmony_ci if ((odev->state.conn >= C_SYNC_SOURCE && 15508c2ecf20Sopenharmony_ci odev->state.conn <= C_PAUSED_SYNC_T) || 15518c2ecf20Sopenharmony_ci odev->state.aftr_isp || odev->state.peer_isp || 15528c2ecf20Sopenharmony_ci odev->state.user_isp) 15538c2ecf20Sopenharmony_ci return 0; 15548c2ecf20Sopenharmony_ci } 15558c2ecf20Sopenharmony_ci} 15568c2ecf20Sopenharmony_ci 15578c2ecf20Sopenharmony_ci/** 15588c2ecf20Sopenharmony_ci * drbd_pause_after() - Pause resync on all devices that may not resync now 15598c2ecf20Sopenharmony_ci * @device: DRBD device. 15608c2ecf20Sopenharmony_ci * 15618c2ecf20Sopenharmony_ci * Called from process context only (admin command and after_state_ch). 15628c2ecf20Sopenharmony_ci */ 15638c2ecf20Sopenharmony_cistatic bool drbd_pause_after(struct drbd_device *device) 15648c2ecf20Sopenharmony_ci{ 15658c2ecf20Sopenharmony_ci bool changed = false; 15668c2ecf20Sopenharmony_ci struct drbd_device *odev; 15678c2ecf20Sopenharmony_ci int i; 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci rcu_read_lock(); 15708c2ecf20Sopenharmony_ci idr_for_each_entry(&drbd_devices, odev, i) { 15718c2ecf20Sopenharmony_ci if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 15728c2ecf20Sopenharmony_ci continue; 15738c2ecf20Sopenharmony_ci if (!_drbd_may_sync_now(odev) && 15748c2ecf20Sopenharmony_ci _drbd_set_state(_NS(odev, aftr_isp, 1), 15758c2ecf20Sopenharmony_ci CS_HARD, NULL) != SS_NOTHING_TO_DO) 15768c2ecf20Sopenharmony_ci changed = true; 15778c2ecf20Sopenharmony_ci } 15788c2ecf20Sopenharmony_ci rcu_read_unlock(); 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci return changed; 15818c2ecf20Sopenharmony_ci} 15828c2ecf20Sopenharmony_ci 15838c2ecf20Sopenharmony_ci/** 15848c2ecf20Sopenharmony_ci * drbd_resume_next() - Resume resync on all devices that may resync now 15858c2ecf20Sopenharmony_ci * @device: DRBD device. 15868c2ecf20Sopenharmony_ci * 15878c2ecf20Sopenharmony_ci * Called from process context only (admin command and worker). 15888c2ecf20Sopenharmony_ci */ 15898c2ecf20Sopenharmony_cistatic bool drbd_resume_next(struct drbd_device *device) 15908c2ecf20Sopenharmony_ci{ 15918c2ecf20Sopenharmony_ci bool changed = false; 15928c2ecf20Sopenharmony_ci struct drbd_device *odev; 15938c2ecf20Sopenharmony_ci int i; 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci rcu_read_lock(); 15968c2ecf20Sopenharmony_ci idr_for_each_entry(&drbd_devices, odev, i) { 15978c2ecf20Sopenharmony_ci if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 15988c2ecf20Sopenharmony_ci continue; 15998c2ecf20Sopenharmony_ci if (odev->state.aftr_isp) { 16008c2ecf20Sopenharmony_ci if (_drbd_may_sync_now(odev) && 16018c2ecf20Sopenharmony_ci _drbd_set_state(_NS(odev, aftr_isp, 0), 16028c2ecf20Sopenharmony_ci CS_HARD, NULL) != SS_NOTHING_TO_DO) 16038c2ecf20Sopenharmony_ci changed = true; 16048c2ecf20Sopenharmony_ci } 16058c2ecf20Sopenharmony_ci } 16068c2ecf20Sopenharmony_ci rcu_read_unlock(); 16078c2ecf20Sopenharmony_ci return changed; 16088c2ecf20Sopenharmony_ci} 16098c2ecf20Sopenharmony_ci 16108c2ecf20Sopenharmony_civoid resume_next_sg(struct drbd_device *device) 16118c2ecf20Sopenharmony_ci{ 16128c2ecf20Sopenharmony_ci lock_all_resources(); 16138c2ecf20Sopenharmony_ci drbd_resume_next(device); 16148c2ecf20Sopenharmony_ci unlock_all_resources(); 16158c2ecf20Sopenharmony_ci} 16168c2ecf20Sopenharmony_ci 16178c2ecf20Sopenharmony_civoid suspend_other_sg(struct drbd_device *device) 16188c2ecf20Sopenharmony_ci{ 16198c2ecf20Sopenharmony_ci lock_all_resources(); 16208c2ecf20Sopenharmony_ci drbd_pause_after(device); 16218c2ecf20Sopenharmony_ci unlock_all_resources(); 16228c2ecf20Sopenharmony_ci} 16238c2ecf20Sopenharmony_ci 16248c2ecf20Sopenharmony_ci/* caller must lock_all_resources() */ 16258c2ecf20Sopenharmony_cienum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) 16268c2ecf20Sopenharmony_ci{ 16278c2ecf20Sopenharmony_ci struct drbd_device *odev; 16288c2ecf20Sopenharmony_ci int resync_after; 16298c2ecf20Sopenharmony_ci 16308c2ecf20Sopenharmony_ci if (o_minor == -1) 16318c2ecf20Sopenharmony_ci return NO_ERROR; 16328c2ecf20Sopenharmony_ci if (o_minor < -1 || o_minor > MINORMASK) 16338c2ecf20Sopenharmony_ci return ERR_RESYNC_AFTER; 16348c2ecf20Sopenharmony_ci 16358c2ecf20Sopenharmony_ci /* check for loops */ 16368c2ecf20Sopenharmony_ci odev = minor_to_device(o_minor); 16378c2ecf20Sopenharmony_ci while (1) { 16388c2ecf20Sopenharmony_ci if (odev == device) 16398c2ecf20Sopenharmony_ci return ERR_RESYNC_AFTER_CYCLE; 16408c2ecf20Sopenharmony_ci 16418c2ecf20Sopenharmony_ci /* You are free to depend on diskless, non-existing, 16428c2ecf20Sopenharmony_ci * or not yet/no longer existing minors. 16438c2ecf20Sopenharmony_ci * We only reject dependency loops. 16448c2ecf20Sopenharmony_ci * We cannot follow the dependency chain beyond a detached or 16458c2ecf20Sopenharmony_ci * missing minor. 16468c2ecf20Sopenharmony_ci */ 16478c2ecf20Sopenharmony_ci if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS) 16488c2ecf20Sopenharmony_ci return NO_ERROR; 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_ci rcu_read_lock(); 16518c2ecf20Sopenharmony_ci resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 16528c2ecf20Sopenharmony_ci rcu_read_unlock(); 16538c2ecf20Sopenharmony_ci /* dependency chain ends here, no cycles. */ 16548c2ecf20Sopenharmony_ci if (resync_after == -1) 16558c2ecf20Sopenharmony_ci return NO_ERROR; 16568c2ecf20Sopenharmony_ci 16578c2ecf20Sopenharmony_ci /* follow the dependency chain */ 16588c2ecf20Sopenharmony_ci odev = minor_to_device(resync_after); 16598c2ecf20Sopenharmony_ci } 16608c2ecf20Sopenharmony_ci} 16618c2ecf20Sopenharmony_ci 16628c2ecf20Sopenharmony_ci/* caller must lock_all_resources() */ 16638c2ecf20Sopenharmony_civoid drbd_resync_after_changed(struct drbd_device *device) 16648c2ecf20Sopenharmony_ci{ 16658c2ecf20Sopenharmony_ci int changed; 16668c2ecf20Sopenharmony_ci 16678c2ecf20Sopenharmony_ci do { 16688c2ecf20Sopenharmony_ci changed = drbd_pause_after(device); 16698c2ecf20Sopenharmony_ci changed |= drbd_resume_next(device); 16708c2ecf20Sopenharmony_ci } while (changed); 16718c2ecf20Sopenharmony_ci} 16728c2ecf20Sopenharmony_ci 16738c2ecf20Sopenharmony_civoid drbd_rs_controller_reset(struct drbd_device *device) 16748c2ecf20Sopenharmony_ci{ 16758c2ecf20Sopenharmony_ci struct gendisk *disk = device->ldev->backing_bdev->bd_disk; 16768c2ecf20Sopenharmony_ci struct fifo_buffer *plan; 16778c2ecf20Sopenharmony_ci 16788c2ecf20Sopenharmony_ci atomic_set(&device->rs_sect_in, 0); 16798c2ecf20Sopenharmony_ci atomic_set(&device->rs_sect_ev, 0); 16808c2ecf20Sopenharmony_ci device->rs_in_flight = 0; 16818c2ecf20Sopenharmony_ci device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors); 16828c2ecf20Sopenharmony_ci 16838c2ecf20Sopenharmony_ci /* Updating the RCU protected object in place is necessary since 16848c2ecf20Sopenharmony_ci this function gets called from atomic context. 16858c2ecf20Sopenharmony_ci It is valid since all other updates also lead to an completely 16868c2ecf20Sopenharmony_ci empty fifo */ 16878c2ecf20Sopenharmony_ci rcu_read_lock(); 16888c2ecf20Sopenharmony_ci plan = rcu_dereference(device->rs_plan_s); 16898c2ecf20Sopenharmony_ci plan->total = 0; 16908c2ecf20Sopenharmony_ci fifo_set(plan, 0); 16918c2ecf20Sopenharmony_ci rcu_read_unlock(); 16928c2ecf20Sopenharmony_ci} 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_civoid start_resync_timer_fn(struct timer_list *t) 16958c2ecf20Sopenharmony_ci{ 16968c2ecf20Sopenharmony_ci struct drbd_device *device = from_timer(device, t, start_resync_timer); 16978c2ecf20Sopenharmony_ci drbd_device_post_work(device, RS_START); 16988c2ecf20Sopenharmony_ci} 16998c2ecf20Sopenharmony_ci 17008c2ecf20Sopenharmony_cistatic void do_start_resync(struct drbd_device *device) 17018c2ecf20Sopenharmony_ci{ 17028c2ecf20Sopenharmony_ci if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { 17038c2ecf20Sopenharmony_ci drbd_warn(device, "postponing start_resync ...\n"); 17048c2ecf20Sopenharmony_ci device->start_resync_timer.expires = jiffies + HZ/10; 17058c2ecf20Sopenharmony_ci add_timer(&device->start_resync_timer); 17068c2ecf20Sopenharmony_ci return; 17078c2ecf20Sopenharmony_ci } 17088c2ecf20Sopenharmony_ci 17098c2ecf20Sopenharmony_ci drbd_start_resync(device, C_SYNC_SOURCE); 17108c2ecf20Sopenharmony_ci clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); 17118c2ecf20Sopenharmony_ci} 17128c2ecf20Sopenharmony_ci 17138c2ecf20Sopenharmony_cistatic bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) 17148c2ecf20Sopenharmony_ci{ 17158c2ecf20Sopenharmony_ci bool csums_after_crash_only; 17168c2ecf20Sopenharmony_ci rcu_read_lock(); 17178c2ecf20Sopenharmony_ci csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only; 17188c2ecf20Sopenharmony_ci rcu_read_unlock(); 17198c2ecf20Sopenharmony_ci return connection->agreed_pro_version >= 89 && /* supported? */ 17208c2ecf20Sopenharmony_ci connection->csums_tfm && /* configured? */ 17218c2ecf20Sopenharmony_ci (csums_after_crash_only == false /* use for each resync? */ 17228c2ecf20Sopenharmony_ci || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ 17238c2ecf20Sopenharmony_ci} 17248c2ecf20Sopenharmony_ci 17258c2ecf20Sopenharmony_ci/** 17268c2ecf20Sopenharmony_ci * drbd_start_resync() - Start the resync process 17278c2ecf20Sopenharmony_ci * @device: DRBD device. 17288c2ecf20Sopenharmony_ci * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET 17298c2ecf20Sopenharmony_ci * 17308c2ecf20Sopenharmony_ci * This function might bring you directly into one of the 17318c2ecf20Sopenharmony_ci * C_PAUSED_SYNC_* states. 17328c2ecf20Sopenharmony_ci */ 17338c2ecf20Sopenharmony_civoid drbd_start_resync(struct drbd_device *device, enum drbd_conns side) 17348c2ecf20Sopenharmony_ci{ 17358c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = first_peer_device(device); 17368c2ecf20Sopenharmony_ci struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 17378c2ecf20Sopenharmony_ci union drbd_state ns; 17388c2ecf20Sopenharmony_ci int r; 17398c2ecf20Sopenharmony_ci 17408c2ecf20Sopenharmony_ci if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) { 17418c2ecf20Sopenharmony_ci drbd_err(device, "Resync already running!\n"); 17428c2ecf20Sopenharmony_ci return; 17438c2ecf20Sopenharmony_ci } 17448c2ecf20Sopenharmony_ci 17458c2ecf20Sopenharmony_ci if (!connection) { 17468c2ecf20Sopenharmony_ci drbd_err(device, "No connection to peer, aborting!\n"); 17478c2ecf20Sopenharmony_ci return; 17488c2ecf20Sopenharmony_ci } 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci if (!test_bit(B_RS_H_DONE, &device->flags)) { 17518c2ecf20Sopenharmony_ci if (side == C_SYNC_TARGET) { 17528c2ecf20Sopenharmony_ci /* Since application IO was locked out during C_WF_BITMAP_T and 17538c2ecf20Sopenharmony_ci C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET 17548c2ecf20Sopenharmony_ci we check that we might make the data inconsistent. */ 17558c2ecf20Sopenharmony_ci r = drbd_khelper(device, "before-resync-target"); 17568c2ecf20Sopenharmony_ci r = (r >> 8) & 0xff; 17578c2ecf20Sopenharmony_ci if (r > 0) { 17588c2ecf20Sopenharmony_ci drbd_info(device, "before-resync-target handler returned %d, " 17598c2ecf20Sopenharmony_ci "dropping connection.\n", r); 17608c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 17618c2ecf20Sopenharmony_ci return; 17628c2ecf20Sopenharmony_ci } 17638c2ecf20Sopenharmony_ci } else /* C_SYNC_SOURCE */ { 17648c2ecf20Sopenharmony_ci r = drbd_khelper(device, "before-resync-source"); 17658c2ecf20Sopenharmony_ci r = (r >> 8) & 0xff; 17668c2ecf20Sopenharmony_ci if (r > 0) { 17678c2ecf20Sopenharmony_ci if (r == 3) { 17688c2ecf20Sopenharmony_ci drbd_info(device, "before-resync-source handler returned %d, " 17698c2ecf20Sopenharmony_ci "ignoring. Old userland tools?", r); 17708c2ecf20Sopenharmony_ci } else { 17718c2ecf20Sopenharmony_ci drbd_info(device, "before-resync-source handler returned %d, " 17728c2ecf20Sopenharmony_ci "dropping connection.\n", r); 17738c2ecf20Sopenharmony_ci conn_request_state(connection, 17748c2ecf20Sopenharmony_ci NS(conn, C_DISCONNECTING), CS_HARD); 17758c2ecf20Sopenharmony_ci return; 17768c2ecf20Sopenharmony_ci } 17778c2ecf20Sopenharmony_ci } 17788c2ecf20Sopenharmony_ci } 17798c2ecf20Sopenharmony_ci } 17808c2ecf20Sopenharmony_ci 17818c2ecf20Sopenharmony_ci if (current == connection->worker.task) { 17828c2ecf20Sopenharmony_ci /* The worker should not sleep waiting for state_mutex, 17838c2ecf20Sopenharmony_ci that can take long */ 17848c2ecf20Sopenharmony_ci if (!mutex_trylock(device->state_mutex)) { 17858c2ecf20Sopenharmony_ci set_bit(B_RS_H_DONE, &device->flags); 17868c2ecf20Sopenharmony_ci device->start_resync_timer.expires = jiffies + HZ/5; 17878c2ecf20Sopenharmony_ci add_timer(&device->start_resync_timer); 17888c2ecf20Sopenharmony_ci return; 17898c2ecf20Sopenharmony_ci } 17908c2ecf20Sopenharmony_ci } else { 17918c2ecf20Sopenharmony_ci mutex_lock(device->state_mutex); 17928c2ecf20Sopenharmony_ci } 17938c2ecf20Sopenharmony_ci 17948c2ecf20Sopenharmony_ci lock_all_resources(); 17958c2ecf20Sopenharmony_ci clear_bit(B_RS_H_DONE, &device->flags); 17968c2ecf20Sopenharmony_ci /* Did some connection breakage or IO error race with us? */ 17978c2ecf20Sopenharmony_ci if (device->state.conn < C_CONNECTED 17988c2ecf20Sopenharmony_ci || !get_ldev_if_state(device, D_NEGOTIATING)) { 17998c2ecf20Sopenharmony_ci unlock_all_resources(); 18008c2ecf20Sopenharmony_ci goto out; 18018c2ecf20Sopenharmony_ci } 18028c2ecf20Sopenharmony_ci 18038c2ecf20Sopenharmony_ci ns = drbd_read_state(device); 18048c2ecf20Sopenharmony_ci 18058c2ecf20Sopenharmony_ci ns.aftr_isp = !_drbd_may_sync_now(device); 18068c2ecf20Sopenharmony_ci 18078c2ecf20Sopenharmony_ci ns.conn = side; 18088c2ecf20Sopenharmony_ci 18098c2ecf20Sopenharmony_ci if (side == C_SYNC_TARGET) 18108c2ecf20Sopenharmony_ci ns.disk = D_INCONSISTENT; 18118c2ecf20Sopenharmony_ci else /* side == C_SYNC_SOURCE */ 18128c2ecf20Sopenharmony_ci ns.pdsk = D_INCONSISTENT; 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci r = _drbd_set_state(device, ns, CS_VERBOSE, NULL); 18158c2ecf20Sopenharmony_ci ns = drbd_read_state(device); 18168c2ecf20Sopenharmony_ci 18178c2ecf20Sopenharmony_ci if (ns.conn < C_CONNECTED) 18188c2ecf20Sopenharmony_ci r = SS_UNKNOWN_ERROR; 18198c2ecf20Sopenharmony_ci 18208c2ecf20Sopenharmony_ci if (r == SS_SUCCESS) { 18218c2ecf20Sopenharmony_ci unsigned long tw = drbd_bm_total_weight(device); 18228c2ecf20Sopenharmony_ci unsigned long now = jiffies; 18238c2ecf20Sopenharmony_ci int i; 18248c2ecf20Sopenharmony_ci 18258c2ecf20Sopenharmony_ci device->rs_failed = 0; 18268c2ecf20Sopenharmony_ci device->rs_paused = 0; 18278c2ecf20Sopenharmony_ci device->rs_same_csum = 0; 18288c2ecf20Sopenharmony_ci device->rs_last_sect_ev = 0; 18298c2ecf20Sopenharmony_ci device->rs_total = tw; 18308c2ecf20Sopenharmony_ci device->rs_start = now; 18318c2ecf20Sopenharmony_ci for (i = 0; i < DRBD_SYNC_MARKS; i++) { 18328c2ecf20Sopenharmony_ci device->rs_mark_left[i] = tw; 18338c2ecf20Sopenharmony_ci device->rs_mark_time[i] = now; 18348c2ecf20Sopenharmony_ci } 18358c2ecf20Sopenharmony_ci drbd_pause_after(device); 18368c2ecf20Sopenharmony_ci /* Forget potentially stale cached per resync extent bit-counts. 18378c2ecf20Sopenharmony_ci * Open coded drbd_rs_cancel_all(device), we already have IRQs 18388c2ecf20Sopenharmony_ci * disabled, and know the disk state is ok. */ 18398c2ecf20Sopenharmony_ci spin_lock(&device->al_lock); 18408c2ecf20Sopenharmony_ci lc_reset(device->resync); 18418c2ecf20Sopenharmony_ci device->resync_locked = 0; 18428c2ecf20Sopenharmony_ci device->resync_wenr = LC_FREE; 18438c2ecf20Sopenharmony_ci spin_unlock(&device->al_lock); 18448c2ecf20Sopenharmony_ci } 18458c2ecf20Sopenharmony_ci unlock_all_resources(); 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_ci if (r == SS_SUCCESS) { 18488c2ecf20Sopenharmony_ci wake_up(&device->al_wait); /* for lc_reset() above */ 18498c2ecf20Sopenharmony_ci /* reset rs_last_bcast when a resync or verify is started, 18508c2ecf20Sopenharmony_ci * to deal with potential jiffies wrap. */ 18518c2ecf20Sopenharmony_ci device->rs_last_bcast = jiffies - HZ; 18528c2ecf20Sopenharmony_ci 18538c2ecf20Sopenharmony_ci drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", 18548c2ecf20Sopenharmony_ci drbd_conn_str(ns.conn), 18558c2ecf20Sopenharmony_ci (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), 18568c2ecf20Sopenharmony_ci (unsigned long) device->rs_total); 18578c2ecf20Sopenharmony_ci if (side == C_SYNC_TARGET) { 18588c2ecf20Sopenharmony_ci device->bm_resync_fo = 0; 18598c2ecf20Sopenharmony_ci device->use_csums = use_checksum_based_resync(connection, device); 18608c2ecf20Sopenharmony_ci } else { 18618c2ecf20Sopenharmony_ci device->use_csums = false; 18628c2ecf20Sopenharmony_ci } 18638c2ecf20Sopenharmony_ci 18648c2ecf20Sopenharmony_ci /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid 18658c2ecf20Sopenharmony_ci * with w_send_oos, or the sync target will get confused as to 18668c2ecf20Sopenharmony_ci * how much bits to resync. We cannot do that always, because for an 18678c2ecf20Sopenharmony_ci * empty resync and protocol < 95, we need to do it here, as we call 18688c2ecf20Sopenharmony_ci * drbd_resync_finished from here in that case. 18698c2ecf20Sopenharmony_ci * We drbd_gen_and_send_sync_uuid here for protocol < 96, 18708c2ecf20Sopenharmony_ci * and from after_state_ch otherwise. */ 18718c2ecf20Sopenharmony_ci if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96) 18728c2ecf20Sopenharmony_ci drbd_gen_and_send_sync_uuid(peer_device); 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci if (connection->agreed_pro_version < 95 && device->rs_total == 0) { 18758c2ecf20Sopenharmony_ci /* This still has a race (about when exactly the peers 18768c2ecf20Sopenharmony_ci * detect connection loss) that can lead to a full sync 18778c2ecf20Sopenharmony_ci * on next handshake. In 8.3.9 we fixed this with explicit 18788c2ecf20Sopenharmony_ci * resync-finished notifications, but the fix 18798c2ecf20Sopenharmony_ci * introduces a protocol change. Sleeping for some 18808c2ecf20Sopenharmony_ci * time longer than the ping interval + timeout on the 18818c2ecf20Sopenharmony_ci * SyncSource, to give the SyncTarget the chance to 18828c2ecf20Sopenharmony_ci * detect connection loss, then waiting for a ping 18838c2ecf20Sopenharmony_ci * response (implicit in drbd_resync_finished) reduces 18848c2ecf20Sopenharmony_ci * the race considerably, but does not solve it. */ 18858c2ecf20Sopenharmony_ci if (side == C_SYNC_SOURCE) { 18868c2ecf20Sopenharmony_ci struct net_conf *nc; 18878c2ecf20Sopenharmony_ci int timeo; 18888c2ecf20Sopenharmony_ci 18898c2ecf20Sopenharmony_ci rcu_read_lock(); 18908c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 18918c2ecf20Sopenharmony_ci timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; 18928c2ecf20Sopenharmony_ci rcu_read_unlock(); 18938c2ecf20Sopenharmony_ci schedule_timeout_interruptible(timeo); 18948c2ecf20Sopenharmony_ci } 18958c2ecf20Sopenharmony_ci drbd_resync_finished(device); 18968c2ecf20Sopenharmony_ci } 18978c2ecf20Sopenharmony_ci 18988c2ecf20Sopenharmony_ci drbd_rs_controller_reset(device); 18998c2ecf20Sopenharmony_ci /* ns.conn may already be != device->state.conn, 19008c2ecf20Sopenharmony_ci * we may have been paused in between, or become paused until 19018c2ecf20Sopenharmony_ci * the timer triggers. 19028c2ecf20Sopenharmony_ci * No matter, that is handled in resync_timer_fn() */ 19038c2ecf20Sopenharmony_ci if (ns.conn == C_SYNC_TARGET) 19048c2ecf20Sopenharmony_ci mod_timer(&device->resync_timer, jiffies); 19058c2ecf20Sopenharmony_ci 19068c2ecf20Sopenharmony_ci drbd_md_sync(device); 19078c2ecf20Sopenharmony_ci } 19088c2ecf20Sopenharmony_ci put_ldev(device); 19098c2ecf20Sopenharmony_ciout: 19108c2ecf20Sopenharmony_ci mutex_unlock(device->state_mutex); 19118c2ecf20Sopenharmony_ci} 19128c2ecf20Sopenharmony_ci 19138c2ecf20Sopenharmony_cistatic void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) 19148c2ecf20Sopenharmony_ci{ 19158c2ecf20Sopenharmony_ci struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; 19168c2ecf20Sopenharmony_ci device->rs_last_bcast = jiffies; 19178c2ecf20Sopenharmony_ci 19188c2ecf20Sopenharmony_ci if (!get_ldev(device)) 19198c2ecf20Sopenharmony_ci return; 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_ci drbd_bm_write_lazy(device, 0); 19228c2ecf20Sopenharmony_ci if (resync_done && is_sync_state(device->state.conn)) 19238c2ecf20Sopenharmony_ci drbd_resync_finished(device); 19248c2ecf20Sopenharmony_ci 19258c2ecf20Sopenharmony_ci drbd_bcast_event(device, &sib); 19268c2ecf20Sopenharmony_ci /* update timestamp, in case it took a while to write out stuff */ 19278c2ecf20Sopenharmony_ci device->rs_last_bcast = jiffies; 19288c2ecf20Sopenharmony_ci put_ldev(device); 19298c2ecf20Sopenharmony_ci} 19308c2ecf20Sopenharmony_ci 19318c2ecf20Sopenharmony_cistatic void drbd_ldev_destroy(struct drbd_device *device) 19328c2ecf20Sopenharmony_ci{ 19338c2ecf20Sopenharmony_ci lc_destroy(device->resync); 19348c2ecf20Sopenharmony_ci device->resync = NULL; 19358c2ecf20Sopenharmony_ci lc_destroy(device->act_log); 19368c2ecf20Sopenharmony_ci device->act_log = NULL; 19378c2ecf20Sopenharmony_ci 19388c2ecf20Sopenharmony_ci __acquire(local); 19398c2ecf20Sopenharmony_ci drbd_backing_dev_free(device, device->ldev); 19408c2ecf20Sopenharmony_ci device->ldev = NULL; 19418c2ecf20Sopenharmony_ci __release(local); 19428c2ecf20Sopenharmony_ci 19438c2ecf20Sopenharmony_ci clear_bit(GOING_DISKLESS, &device->flags); 19448c2ecf20Sopenharmony_ci wake_up(&device->misc_wait); 19458c2ecf20Sopenharmony_ci} 19468c2ecf20Sopenharmony_ci 19478c2ecf20Sopenharmony_cistatic void go_diskless(struct drbd_device *device) 19488c2ecf20Sopenharmony_ci{ 19498c2ecf20Sopenharmony_ci D_ASSERT(device, device->state.disk == D_FAILED); 19508c2ecf20Sopenharmony_ci /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will 19518c2ecf20Sopenharmony_ci * inc/dec it frequently. Once we are D_DISKLESS, no one will touch 19528c2ecf20Sopenharmony_ci * the protected members anymore, though, so once put_ldev reaches zero 19538c2ecf20Sopenharmony_ci * again, it will be safe to free them. */ 19548c2ecf20Sopenharmony_ci 19558c2ecf20Sopenharmony_ci /* Try to write changed bitmap pages, read errors may have just 19568c2ecf20Sopenharmony_ci * set some bits outside the area covered by the activity log. 19578c2ecf20Sopenharmony_ci * 19588c2ecf20Sopenharmony_ci * If we have an IO error during the bitmap writeout, 19598c2ecf20Sopenharmony_ci * we will want a full sync next time, just in case. 19608c2ecf20Sopenharmony_ci * (Do we want a specific meta data flag for this?) 19618c2ecf20Sopenharmony_ci * 19628c2ecf20Sopenharmony_ci * If that does not make it to stable storage either, 19638c2ecf20Sopenharmony_ci * we cannot do anything about that anymore. 19648c2ecf20Sopenharmony_ci * 19658c2ecf20Sopenharmony_ci * We still need to check if both bitmap and ldev are present, we may 19668c2ecf20Sopenharmony_ci * end up here after a failed attach, before ldev was even assigned. 19678c2ecf20Sopenharmony_ci */ 19688c2ecf20Sopenharmony_ci if (device->bitmap && device->ldev) { 19698c2ecf20Sopenharmony_ci /* An interrupted resync or similar is allowed to recounts bits 19708c2ecf20Sopenharmony_ci * while we detach. 19718c2ecf20Sopenharmony_ci * Any modifications would not be expected anymore, though. 19728c2ecf20Sopenharmony_ci */ 19738c2ecf20Sopenharmony_ci if (drbd_bitmap_io_from_worker(device, drbd_bm_write, 19748c2ecf20Sopenharmony_ci "detach", BM_LOCKED_TEST_ALLOWED)) { 19758c2ecf20Sopenharmony_ci if (test_bit(WAS_READ_ERROR, &device->flags)) { 19768c2ecf20Sopenharmony_ci drbd_md_set_flag(device, MDF_FULL_SYNC); 19778c2ecf20Sopenharmony_ci drbd_md_sync(device); 19788c2ecf20Sopenharmony_ci } 19798c2ecf20Sopenharmony_ci } 19808c2ecf20Sopenharmony_ci } 19818c2ecf20Sopenharmony_ci 19828c2ecf20Sopenharmony_ci drbd_force_state(device, NS(disk, D_DISKLESS)); 19838c2ecf20Sopenharmony_ci} 19848c2ecf20Sopenharmony_ci 19858c2ecf20Sopenharmony_cistatic int do_md_sync(struct drbd_device *device) 19868c2ecf20Sopenharmony_ci{ 19878c2ecf20Sopenharmony_ci drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); 19888c2ecf20Sopenharmony_ci drbd_md_sync(device); 19898c2ecf20Sopenharmony_ci return 0; 19908c2ecf20Sopenharmony_ci} 19918c2ecf20Sopenharmony_ci 19928c2ecf20Sopenharmony_ci/* only called from drbd_worker thread, no locking */ 19938c2ecf20Sopenharmony_civoid __update_timing_details( 19948c2ecf20Sopenharmony_ci struct drbd_thread_timing_details *tdp, 19958c2ecf20Sopenharmony_ci unsigned int *cb_nr, 19968c2ecf20Sopenharmony_ci void *cb, 19978c2ecf20Sopenharmony_ci const char *fn, const unsigned int line) 19988c2ecf20Sopenharmony_ci{ 19998c2ecf20Sopenharmony_ci unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST; 20008c2ecf20Sopenharmony_ci struct drbd_thread_timing_details *td = tdp + i; 20018c2ecf20Sopenharmony_ci 20028c2ecf20Sopenharmony_ci td->start_jif = jiffies; 20038c2ecf20Sopenharmony_ci td->cb_addr = cb; 20048c2ecf20Sopenharmony_ci td->caller_fn = fn; 20058c2ecf20Sopenharmony_ci td->line = line; 20068c2ecf20Sopenharmony_ci td->cb_nr = *cb_nr; 20078c2ecf20Sopenharmony_ci 20088c2ecf20Sopenharmony_ci i = (i+1) % DRBD_THREAD_DETAILS_HIST; 20098c2ecf20Sopenharmony_ci td = tdp + i; 20108c2ecf20Sopenharmony_ci memset(td, 0, sizeof(*td)); 20118c2ecf20Sopenharmony_ci 20128c2ecf20Sopenharmony_ci ++(*cb_nr); 20138c2ecf20Sopenharmony_ci} 20148c2ecf20Sopenharmony_ci 20158c2ecf20Sopenharmony_cistatic void do_device_work(struct drbd_device *device, const unsigned long todo) 20168c2ecf20Sopenharmony_ci{ 20178c2ecf20Sopenharmony_ci if (test_bit(MD_SYNC, &todo)) 20188c2ecf20Sopenharmony_ci do_md_sync(device); 20198c2ecf20Sopenharmony_ci if (test_bit(RS_DONE, &todo) || 20208c2ecf20Sopenharmony_ci test_bit(RS_PROGRESS, &todo)) 20218c2ecf20Sopenharmony_ci update_on_disk_bitmap(device, test_bit(RS_DONE, &todo)); 20228c2ecf20Sopenharmony_ci if (test_bit(GO_DISKLESS, &todo)) 20238c2ecf20Sopenharmony_ci go_diskless(device); 20248c2ecf20Sopenharmony_ci if (test_bit(DESTROY_DISK, &todo)) 20258c2ecf20Sopenharmony_ci drbd_ldev_destroy(device); 20268c2ecf20Sopenharmony_ci if (test_bit(RS_START, &todo)) 20278c2ecf20Sopenharmony_ci do_start_resync(device); 20288c2ecf20Sopenharmony_ci} 20298c2ecf20Sopenharmony_ci 20308c2ecf20Sopenharmony_ci#define DRBD_DEVICE_WORK_MASK \ 20318c2ecf20Sopenharmony_ci ((1UL << GO_DISKLESS) \ 20328c2ecf20Sopenharmony_ci |(1UL << DESTROY_DISK) \ 20338c2ecf20Sopenharmony_ci |(1UL << MD_SYNC) \ 20348c2ecf20Sopenharmony_ci |(1UL << RS_START) \ 20358c2ecf20Sopenharmony_ci |(1UL << RS_PROGRESS) \ 20368c2ecf20Sopenharmony_ci |(1UL << RS_DONE) \ 20378c2ecf20Sopenharmony_ci ) 20388c2ecf20Sopenharmony_ci 20398c2ecf20Sopenharmony_cistatic unsigned long get_work_bits(unsigned long *flags) 20408c2ecf20Sopenharmony_ci{ 20418c2ecf20Sopenharmony_ci unsigned long old, new; 20428c2ecf20Sopenharmony_ci do { 20438c2ecf20Sopenharmony_ci old = *flags; 20448c2ecf20Sopenharmony_ci new = old & ~DRBD_DEVICE_WORK_MASK; 20458c2ecf20Sopenharmony_ci } while (cmpxchg(flags, old, new) != old); 20468c2ecf20Sopenharmony_ci return old & DRBD_DEVICE_WORK_MASK; 20478c2ecf20Sopenharmony_ci} 20488c2ecf20Sopenharmony_ci 20498c2ecf20Sopenharmony_cistatic void do_unqueued_work(struct drbd_connection *connection) 20508c2ecf20Sopenharmony_ci{ 20518c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 20528c2ecf20Sopenharmony_ci int vnr; 20538c2ecf20Sopenharmony_ci 20548c2ecf20Sopenharmony_ci rcu_read_lock(); 20558c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 20568c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 20578c2ecf20Sopenharmony_ci unsigned long todo = get_work_bits(&device->flags); 20588c2ecf20Sopenharmony_ci if (!todo) 20598c2ecf20Sopenharmony_ci continue; 20608c2ecf20Sopenharmony_ci 20618c2ecf20Sopenharmony_ci kref_get(&device->kref); 20628c2ecf20Sopenharmony_ci rcu_read_unlock(); 20638c2ecf20Sopenharmony_ci do_device_work(device, todo); 20648c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 20658c2ecf20Sopenharmony_ci rcu_read_lock(); 20668c2ecf20Sopenharmony_ci } 20678c2ecf20Sopenharmony_ci rcu_read_unlock(); 20688c2ecf20Sopenharmony_ci} 20698c2ecf20Sopenharmony_ci 20708c2ecf20Sopenharmony_cistatic bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) 20718c2ecf20Sopenharmony_ci{ 20728c2ecf20Sopenharmony_ci spin_lock_irq(&queue->q_lock); 20738c2ecf20Sopenharmony_ci list_splice_tail_init(&queue->q, work_list); 20748c2ecf20Sopenharmony_ci spin_unlock_irq(&queue->q_lock); 20758c2ecf20Sopenharmony_ci return !list_empty(work_list); 20768c2ecf20Sopenharmony_ci} 20778c2ecf20Sopenharmony_ci 20788c2ecf20Sopenharmony_cistatic void wait_for_work(struct drbd_connection *connection, struct list_head *work_list) 20798c2ecf20Sopenharmony_ci{ 20808c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 20818c2ecf20Sopenharmony_ci struct net_conf *nc; 20828c2ecf20Sopenharmony_ci int uncork, cork; 20838c2ecf20Sopenharmony_ci 20848c2ecf20Sopenharmony_ci dequeue_work_batch(&connection->sender_work, work_list); 20858c2ecf20Sopenharmony_ci if (!list_empty(work_list)) 20868c2ecf20Sopenharmony_ci return; 20878c2ecf20Sopenharmony_ci 20888c2ecf20Sopenharmony_ci /* Still nothing to do? 20898c2ecf20Sopenharmony_ci * Maybe we still need to close the current epoch, 20908c2ecf20Sopenharmony_ci * even if no new requests are queued yet. 20918c2ecf20Sopenharmony_ci * 20928c2ecf20Sopenharmony_ci * Also, poke TCP, just in case. 20938c2ecf20Sopenharmony_ci * Then wait for new work (or signal). */ 20948c2ecf20Sopenharmony_ci rcu_read_lock(); 20958c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 20968c2ecf20Sopenharmony_ci uncork = nc ? nc->tcp_cork : 0; 20978c2ecf20Sopenharmony_ci rcu_read_unlock(); 20988c2ecf20Sopenharmony_ci if (uncork) { 20998c2ecf20Sopenharmony_ci mutex_lock(&connection->data.mutex); 21008c2ecf20Sopenharmony_ci if (connection->data.socket) 21018c2ecf20Sopenharmony_ci tcp_sock_set_cork(connection->data.socket->sk, false); 21028c2ecf20Sopenharmony_ci mutex_unlock(&connection->data.mutex); 21038c2ecf20Sopenharmony_ci } 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_ci for (;;) { 21068c2ecf20Sopenharmony_ci int send_barrier; 21078c2ecf20Sopenharmony_ci prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); 21088c2ecf20Sopenharmony_ci spin_lock_irq(&connection->resource->req_lock); 21098c2ecf20Sopenharmony_ci spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 21108c2ecf20Sopenharmony_ci if (!list_empty(&connection->sender_work.q)) 21118c2ecf20Sopenharmony_ci list_splice_tail_init(&connection->sender_work.q, work_list); 21128c2ecf20Sopenharmony_ci spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 21138c2ecf20Sopenharmony_ci if (!list_empty(work_list) || signal_pending(current)) { 21148c2ecf20Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 21158c2ecf20Sopenharmony_ci break; 21168c2ecf20Sopenharmony_ci } 21178c2ecf20Sopenharmony_ci 21188c2ecf20Sopenharmony_ci /* We found nothing new to do, no to-be-communicated request, 21198c2ecf20Sopenharmony_ci * no other work item. We may still need to close the last 21208c2ecf20Sopenharmony_ci * epoch. Next incoming request epoch will be connection -> 21218c2ecf20Sopenharmony_ci * current transfer log epoch number. If that is different 21228c2ecf20Sopenharmony_ci * from the epoch of the last request we communicated, it is 21238c2ecf20Sopenharmony_ci * safe to send the epoch separating barrier now. 21248c2ecf20Sopenharmony_ci */ 21258c2ecf20Sopenharmony_ci send_barrier = 21268c2ecf20Sopenharmony_ci atomic_read(&connection->current_tle_nr) != 21278c2ecf20Sopenharmony_ci connection->send.current_epoch_nr; 21288c2ecf20Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 21298c2ecf20Sopenharmony_ci 21308c2ecf20Sopenharmony_ci if (send_barrier) 21318c2ecf20Sopenharmony_ci maybe_send_barrier(connection, 21328c2ecf20Sopenharmony_ci connection->send.current_epoch_nr + 1); 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci if (test_bit(DEVICE_WORK_PENDING, &connection->flags)) 21358c2ecf20Sopenharmony_ci break; 21368c2ecf20Sopenharmony_ci 21378c2ecf20Sopenharmony_ci /* drbd_send() may have called flush_signals() */ 21388c2ecf20Sopenharmony_ci if (get_t_state(&connection->worker) != RUNNING) 21398c2ecf20Sopenharmony_ci break; 21408c2ecf20Sopenharmony_ci 21418c2ecf20Sopenharmony_ci schedule(); 21428c2ecf20Sopenharmony_ci /* may be woken up for other things but new work, too, 21438c2ecf20Sopenharmony_ci * e.g. if the current epoch got closed. 21448c2ecf20Sopenharmony_ci * In which case we send the barrier above. */ 21458c2ecf20Sopenharmony_ci } 21468c2ecf20Sopenharmony_ci finish_wait(&connection->sender_work.q_wait, &wait); 21478c2ecf20Sopenharmony_ci 21488c2ecf20Sopenharmony_ci /* someone may have changed the config while we have been waiting above. */ 21498c2ecf20Sopenharmony_ci rcu_read_lock(); 21508c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 21518c2ecf20Sopenharmony_ci cork = nc ? nc->tcp_cork : 0; 21528c2ecf20Sopenharmony_ci rcu_read_unlock(); 21538c2ecf20Sopenharmony_ci mutex_lock(&connection->data.mutex); 21548c2ecf20Sopenharmony_ci if (connection->data.socket) { 21558c2ecf20Sopenharmony_ci if (cork) 21568c2ecf20Sopenharmony_ci tcp_sock_set_cork(connection->data.socket->sk, true); 21578c2ecf20Sopenharmony_ci else if (!uncork) 21588c2ecf20Sopenharmony_ci tcp_sock_set_cork(connection->data.socket->sk, false); 21598c2ecf20Sopenharmony_ci } 21608c2ecf20Sopenharmony_ci mutex_unlock(&connection->data.mutex); 21618c2ecf20Sopenharmony_ci} 21628c2ecf20Sopenharmony_ci 21638c2ecf20Sopenharmony_ciint drbd_worker(struct drbd_thread *thi) 21648c2ecf20Sopenharmony_ci{ 21658c2ecf20Sopenharmony_ci struct drbd_connection *connection = thi->connection; 21668c2ecf20Sopenharmony_ci struct drbd_work *w = NULL; 21678c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 21688c2ecf20Sopenharmony_ci LIST_HEAD(work_list); 21698c2ecf20Sopenharmony_ci int vnr; 21708c2ecf20Sopenharmony_ci 21718c2ecf20Sopenharmony_ci while (get_t_state(thi) == RUNNING) { 21728c2ecf20Sopenharmony_ci drbd_thread_current_set_cpu(thi); 21738c2ecf20Sopenharmony_ci 21748c2ecf20Sopenharmony_ci if (list_empty(&work_list)) { 21758c2ecf20Sopenharmony_ci update_worker_timing_details(connection, wait_for_work); 21768c2ecf20Sopenharmony_ci wait_for_work(connection, &work_list); 21778c2ecf20Sopenharmony_ci } 21788c2ecf20Sopenharmony_ci 21798c2ecf20Sopenharmony_ci if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { 21808c2ecf20Sopenharmony_ci update_worker_timing_details(connection, do_unqueued_work); 21818c2ecf20Sopenharmony_ci do_unqueued_work(connection); 21828c2ecf20Sopenharmony_ci } 21838c2ecf20Sopenharmony_ci 21848c2ecf20Sopenharmony_ci if (signal_pending(current)) { 21858c2ecf20Sopenharmony_ci flush_signals(current); 21868c2ecf20Sopenharmony_ci if (get_t_state(thi) == RUNNING) { 21878c2ecf20Sopenharmony_ci drbd_warn(connection, "Worker got an unexpected signal\n"); 21888c2ecf20Sopenharmony_ci continue; 21898c2ecf20Sopenharmony_ci } 21908c2ecf20Sopenharmony_ci break; 21918c2ecf20Sopenharmony_ci } 21928c2ecf20Sopenharmony_ci 21938c2ecf20Sopenharmony_ci if (get_t_state(thi) != RUNNING) 21948c2ecf20Sopenharmony_ci break; 21958c2ecf20Sopenharmony_ci 21968c2ecf20Sopenharmony_ci if (!list_empty(&work_list)) { 21978c2ecf20Sopenharmony_ci w = list_first_entry(&work_list, struct drbd_work, list); 21988c2ecf20Sopenharmony_ci list_del_init(&w->list); 21998c2ecf20Sopenharmony_ci update_worker_timing_details(connection, w->cb); 22008c2ecf20Sopenharmony_ci if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) 22018c2ecf20Sopenharmony_ci continue; 22028c2ecf20Sopenharmony_ci if (connection->cstate >= C_WF_REPORT_PARAMS) 22038c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 22048c2ecf20Sopenharmony_ci } 22058c2ecf20Sopenharmony_ci } 22068c2ecf20Sopenharmony_ci 22078c2ecf20Sopenharmony_ci do { 22088c2ecf20Sopenharmony_ci if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { 22098c2ecf20Sopenharmony_ci update_worker_timing_details(connection, do_unqueued_work); 22108c2ecf20Sopenharmony_ci do_unqueued_work(connection); 22118c2ecf20Sopenharmony_ci } 22128c2ecf20Sopenharmony_ci if (!list_empty(&work_list)) { 22138c2ecf20Sopenharmony_ci w = list_first_entry(&work_list, struct drbd_work, list); 22148c2ecf20Sopenharmony_ci list_del_init(&w->list); 22158c2ecf20Sopenharmony_ci update_worker_timing_details(connection, w->cb); 22168c2ecf20Sopenharmony_ci w->cb(w, 1); 22178c2ecf20Sopenharmony_ci } else 22188c2ecf20Sopenharmony_ci dequeue_work_batch(&connection->sender_work, &work_list); 22198c2ecf20Sopenharmony_ci } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags)); 22208c2ecf20Sopenharmony_ci 22218c2ecf20Sopenharmony_ci rcu_read_lock(); 22228c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 22238c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 22248c2ecf20Sopenharmony_ci D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE); 22258c2ecf20Sopenharmony_ci kref_get(&device->kref); 22268c2ecf20Sopenharmony_ci rcu_read_unlock(); 22278c2ecf20Sopenharmony_ci drbd_device_cleanup(device); 22288c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 22298c2ecf20Sopenharmony_ci rcu_read_lock(); 22308c2ecf20Sopenharmony_ci } 22318c2ecf20Sopenharmony_ci rcu_read_unlock(); 22328c2ecf20Sopenharmony_ci 22338c2ecf20Sopenharmony_ci return 0; 22348c2ecf20Sopenharmony_ci} 2235