162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci drbd_req.c 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 862306a36Sopenharmony_ci Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 962306a36Sopenharmony_ci Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/module.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <linux/slab.h> 1762306a36Sopenharmony_ci#include <linux/drbd.h> 1862306a36Sopenharmony_ci#include "drbd_int.h" 1962306a36Sopenharmony_ci#include "drbd_req.h" 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cistatic bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size); 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistatic struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src) 2562306a36Sopenharmony_ci{ 2662306a36Sopenharmony_ci struct drbd_request *req; 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci req = mempool_alloc(&drbd_request_mempool, GFP_NOIO); 2962306a36Sopenharmony_ci if (!req) 3062306a36Sopenharmony_ci return NULL; 3162306a36Sopenharmony_ci memset(req, 0, sizeof(*req)); 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0) 3462306a36Sopenharmony_ci | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0) 3562306a36Sopenharmony_ci | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0); 3662306a36Sopenharmony_ci req->device = device; 3762306a36Sopenharmony_ci req->master_bio = bio_src; 3862306a36Sopenharmony_ci req->epoch = 0; 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci drbd_clear_interval(&req->i); 4162306a36Sopenharmony_ci req->i.sector = bio_src->bi_iter.bi_sector; 4262306a36Sopenharmony_ci req->i.size = bio_src->bi_iter.bi_size; 4362306a36Sopenharmony_ci req->i.local = true; 4462306a36Sopenharmony_ci req->i.waiting = false; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci INIT_LIST_HEAD(&req->tl_requests); 4762306a36Sopenharmony_ci INIT_LIST_HEAD(&req->w.list); 4862306a36Sopenharmony_ci INIT_LIST_HEAD(&req->req_pending_master_completion); 4962306a36Sopenharmony_ci INIT_LIST_HEAD(&req->req_pending_local); 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci /* one reference to be put by __drbd_make_request */ 5262306a36Sopenharmony_ci atomic_set(&req->completion_ref, 1); 5362306a36Sopenharmony_ci /* one kref as long as completion_ref > 0 */ 5462306a36Sopenharmony_ci kref_init(&req->kref); 5562306a36Sopenharmony_ci return req; 5662306a36Sopenharmony_ci} 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistatic void drbd_remove_request_interval(struct rb_root *root, 5962306a36Sopenharmony_ci struct drbd_request *req) 6062306a36Sopenharmony_ci{ 6162306a36Sopenharmony_ci struct drbd_device *device = req->device; 6262306a36Sopenharmony_ci struct drbd_interval *i = &req->i; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci drbd_remove_interval(root, i); 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci /* Wake up any processes waiting for this request to complete. */ 6762306a36Sopenharmony_ci if (i->waiting) 6862306a36Sopenharmony_ci wake_up(&device->misc_wait); 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_civoid drbd_req_destroy(struct kref *kref) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci struct drbd_request *req = container_of(kref, struct drbd_request, kref); 7462306a36Sopenharmony_ci struct drbd_device *device = req->device; 7562306a36Sopenharmony_ci const unsigned s = req->rq_state; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci if ((req->master_bio && !(s & RQ_POSTPONED)) || 7862306a36Sopenharmony_ci atomic_read(&req->completion_ref) || 7962306a36Sopenharmony_ci (s & RQ_LOCAL_PENDING) || 8062306a36Sopenharmony_ci ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) { 8162306a36Sopenharmony_ci drbd_err(device, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n", 8262306a36Sopenharmony_ci s, atomic_read(&req->completion_ref)); 8362306a36Sopenharmony_ci return; 8462306a36Sopenharmony_ci } 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci /* If called from mod_rq_state (expected normal case) or 8762306a36Sopenharmony_ci * drbd_send_and_submit (the less likely normal path), this holds the 8862306a36Sopenharmony_ci * req_lock, and req->tl_requests will typicaly be on ->transfer_log, 8962306a36Sopenharmony_ci * though it may be still empty (never added to the transfer log). 9062306a36Sopenharmony_ci * 9162306a36Sopenharmony_ci * If called from do_retry(), we do NOT hold the req_lock, but we are 9262306a36Sopenharmony_ci * still allowed to unconditionally list_del(&req->tl_requests), 9362306a36Sopenharmony_ci * because it will be on a local on-stack list only. */ 9462306a36Sopenharmony_ci list_del_init(&req->tl_requests); 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci /* finally remove the request from the conflict detection 9762306a36Sopenharmony_ci * respective block_id verification interval tree. */ 9862306a36Sopenharmony_ci if (!drbd_interval_empty(&req->i)) { 9962306a36Sopenharmony_ci struct rb_root *root; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci if (s & RQ_WRITE) 10262306a36Sopenharmony_ci root = &device->write_requests; 10362306a36Sopenharmony_ci else 10462306a36Sopenharmony_ci root = &device->read_requests; 10562306a36Sopenharmony_ci drbd_remove_request_interval(root, req); 10662306a36Sopenharmony_ci } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0) 10762306a36Sopenharmony_ci drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n", 10862306a36Sopenharmony_ci s, (unsigned long long)req->i.sector, req->i.size); 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci /* if it was a write, we may have to set the corresponding 11162306a36Sopenharmony_ci * bit(s) out-of-sync first. If it had a local part, we need to 11262306a36Sopenharmony_ci * release the reference to the activity log. */ 11362306a36Sopenharmony_ci if (s & RQ_WRITE) { 11462306a36Sopenharmony_ci /* Set out-of-sync unless both OK flags are set 11562306a36Sopenharmony_ci * (local only or remote failed). 11662306a36Sopenharmony_ci * Other places where we set out-of-sync: 11762306a36Sopenharmony_ci * READ with local io-error */ 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci /* There is a special case: 12062306a36Sopenharmony_ci * we may notice late that IO was suspended, 12162306a36Sopenharmony_ci * and postpone, or schedule for retry, a write, 12262306a36Sopenharmony_ci * before it even was submitted or sent. 12362306a36Sopenharmony_ci * In that case we do not want to touch the bitmap at all. 12462306a36Sopenharmony_ci */ 12562306a36Sopenharmony_ci struct drbd_peer_device *peer_device = first_peer_device(device); 12662306a36Sopenharmony_ci if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) { 12762306a36Sopenharmony_ci if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) 12862306a36Sopenharmony_ci drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) 13162306a36Sopenharmony_ci drbd_set_in_sync(peer_device, req->i.sector, req->i.size); 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci /* one might be tempted to move the drbd_al_complete_io 13562306a36Sopenharmony_ci * to the local io completion callback drbd_request_endio. 13662306a36Sopenharmony_ci * but, if this was a mirror write, we may only 13762306a36Sopenharmony_ci * drbd_al_complete_io after this is RQ_NET_DONE, 13862306a36Sopenharmony_ci * otherwise the extent could be dropped from the al 13962306a36Sopenharmony_ci * before it has actually been written on the peer. 14062306a36Sopenharmony_ci * if we crash before our peer knows about the request, 14162306a36Sopenharmony_ci * but after the extent has been dropped from the al, 14262306a36Sopenharmony_ci * we would forget to resync the corresponding extent. 14362306a36Sopenharmony_ci */ 14462306a36Sopenharmony_ci if (s & RQ_IN_ACT_LOG) { 14562306a36Sopenharmony_ci if (get_ldev_if_state(device, D_FAILED)) { 14662306a36Sopenharmony_ci drbd_al_complete_io(device, &req->i); 14762306a36Sopenharmony_ci put_ldev(device); 14862306a36Sopenharmony_ci } else if (drbd_ratelimit()) { 14962306a36Sopenharmony_ci drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), " 15062306a36Sopenharmony_ci "but my Disk seems to have failed :(\n", 15162306a36Sopenharmony_ci (unsigned long long) req->i.sector, req->i.size); 15262306a36Sopenharmony_ci } 15362306a36Sopenharmony_ci } 15462306a36Sopenharmony_ci } 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci mempool_free(req, &drbd_request_mempool); 15762306a36Sopenharmony_ci} 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_cistatic void wake_all_senders(struct drbd_connection *connection) 16062306a36Sopenharmony_ci{ 16162306a36Sopenharmony_ci wake_up(&connection->sender_work.q_wait); 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci/* must hold resource->req_lock */ 16562306a36Sopenharmony_civoid start_new_tl_epoch(struct drbd_connection *connection) 16662306a36Sopenharmony_ci{ 16762306a36Sopenharmony_ci /* no point closing an epoch, if it is empty, anyways. */ 16862306a36Sopenharmony_ci if (connection->current_tle_writes == 0) 16962306a36Sopenharmony_ci return; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci connection->current_tle_writes = 0; 17262306a36Sopenharmony_ci atomic_inc(&connection->current_tle_nr); 17362306a36Sopenharmony_ci wake_all_senders(connection); 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_civoid complete_master_bio(struct drbd_device *device, 17762306a36Sopenharmony_ci struct bio_and_error *m) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci if (unlikely(m->error)) 18062306a36Sopenharmony_ci m->bio->bi_status = errno_to_blk_status(m->error); 18162306a36Sopenharmony_ci bio_endio(m->bio); 18262306a36Sopenharmony_ci dec_ap_bio(device); 18362306a36Sopenharmony_ci} 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci/* Helper for __req_mod(). 18762306a36Sopenharmony_ci * Set m->bio to the master bio, if it is fit to be completed, 18862306a36Sopenharmony_ci * or leave it alone (it is initialized to NULL in __req_mod), 18962306a36Sopenharmony_ci * if it has already been completed, or cannot be completed yet. 19062306a36Sopenharmony_ci * If m->bio is set, the error status to be returned is placed in m->error. 19162306a36Sopenharmony_ci */ 19262306a36Sopenharmony_cistatic 19362306a36Sopenharmony_civoid drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci const unsigned s = req->rq_state; 19662306a36Sopenharmony_ci struct drbd_device *device = req->device; 19762306a36Sopenharmony_ci int error, ok; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci /* we must not complete the master bio, while it is 20062306a36Sopenharmony_ci * still being processed by _drbd_send_zc_bio (drbd_send_dblock) 20162306a36Sopenharmony_ci * not yet acknowledged by the peer 20262306a36Sopenharmony_ci * not yet completed by the local io subsystem 20362306a36Sopenharmony_ci * these flags may get cleared in any order by 20462306a36Sopenharmony_ci * the worker, 20562306a36Sopenharmony_ci * the receiver, 20662306a36Sopenharmony_ci * the bio_endio completion callbacks. 20762306a36Sopenharmony_ci */ 20862306a36Sopenharmony_ci if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) || 20962306a36Sopenharmony_ci (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) || 21062306a36Sopenharmony_ci (s & RQ_COMPLETION_SUSP)) { 21162306a36Sopenharmony_ci drbd_err(device, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s); 21262306a36Sopenharmony_ci return; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (!req->master_bio) { 21662306a36Sopenharmony_ci drbd_err(device, "drbd_req_complete: Logic BUG, master_bio == NULL!\n"); 21762306a36Sopenharmony_ci return; 21862306a36Sopenharmony_ci } 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci /* 22162306a36Sopenharmony_ci * figure out whether to report success or failure. 22262306a36Sopenharmony_ci * 22362306a36Sopenharmony_ci * report success when at least one of the operations succeeded. 22462306a36Sopenharmony_ci * or, to put the other way, 22562306a36Sopenharmony_ci * only report failure, when both operations failed. 22662306a36Sopenharmony_ci * 22762306a36Sopenharmony_ci * what to do about the failures is handled elsewhere. 22862306a36Sopenharmony_ci * what we need to do here is just: complete the master_bio. 22962306a36Sopenharmony_ci * 23062306a36Sopenharmony_ci * local completion error, if any, has been stored as ERR_PTR 23162306a36Sopenharmony_ci * in private_bio within drbd_request_endio. 23262306a36Sopenharmony_ci */ 23362306a36Sopenharmony_ci ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); 23462306a36Sopenharmony_ci error = PTR_ERR(req->private_bio); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci /* Before we can signal completion to the upper layers, 23762306a36Sopenharmony_ci * we may need to close the current transfer log epoch. 23862306a36Sopenharmony_ci * We are within the request lock, so we can simply compare 23962306a36Sopenharmony_ci * the request epoch number with the current transfer log 24062306a36Sopenharmony_ci * epoch number. If they match, increase the current_tle_nr, 24162306a36Sopenharmony_ci * and reset the transfer log epoch write_cnt. 24262306a36Sopenharmony_ci */ 24362306a36Sopenharmony_ci if (op_is_write(bio_op(req->master_bio)) && 24462306a36Sopenharmony_ci req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr)) 24562306a36Sopenharmony_ci start_new_tl_epoch(first_peer_device(device)->connection); 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci /* Update disk stats */ 24862306a36Sopenharmony_ci bio_end_io_acct(req->master_bio, req->start_jif); 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci /* If READ failed, 25162306a36Sopenharmony_ci * have it be pushed back to the retry work queue, 25262306a36Sopenharmony_ci * so it will re-enter __drbd_make_request(), 25362306a36Sopenharmony_ci * and be re-assigned to a suitable local or remote path, 25462306a36Sopenharmony_ci * or failed if we do not have access to good data anymore. 25562306a36Sopenharmony_ci * 25662306a36Sopenharmony_ci * Unless it was failed early by __drbd_make_request(), 25762306a36Sopenharmony_ci * because no path was available, in which case 25862306a36Sopenharmony_ci * it was not even added to the transfer_log. 25962306a36Sopenharmony_ci * 26062306a36Sopenharmony_ci * read-ahead may fail, and will not be retried. 26162306a36Sopenharmony_ci * 26262306a36Sopenharmony_ci * WRITE should have used all available paths already. 26362306a36Sopenharmony_ci */ 26462306a36Sopenharmony_ci if (!ok && 26562306a36Sopenharmony_ci bio_op(req->master_bio) == REQ_OP_READ && 26662306a36Sopenharmony_ci !(req->master_bio->bi_opf & REQ_RAHEAD) && 26762306a36Sopenharmony_ci !list_empty(&req->tl_requests)) 26862306a36Sopenharmony_ci req->rq_state |= RQ_POSTPONED; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci if (!(req->rq_state & RQ_POSTPONED)) { 27162306a36Sopenharmony_ci m->error = ok ? 0 : (error ?: -EIO); 27262306a36Sopenharmony_ci m->bio = req->master_bio; 27362306a36Sopenharmony_ci req->master_bio = NULL; 27462306a36Sopenharmony_ci /* We leave it in the tree, to be able to verify later 27562306a36Sopenharmony_ci * write-acks in protocol != C during resync. 27662306a36Sopenharmony_ci * But we mark it as "complete", so it won't be counted as 27762306a36Sopenharmony_ci * conflict in a multi-primary setup. */ 27862306a36Sopenharmony_ci req->i.completed = true; 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci if (req->i.waiting) 28262306a36Sopenharmony_ci wake_up(&device->misc_wait); 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci /* Either we are about to complete to upper layers, 28562306a36Sopenharmony_ci * or we will restart this request. 28662306a36Sopenharmony_ci * In either case, the request object will be destroyed soon, 28762306a36Sopenharmony_ci * so better remove it from all lists. */ 28862306a36Sopenharmony_ci list_del_init(&req->req_pending_master_completion); 28962306a36Sopenharmony_ci} 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci/* still holds resource->req_lock */ 29262306a36Sopenharmony_cistatic void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci struct drbd_device *device = req->device; 29562306a36Sopenharmony_ci D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED)); 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci if (!put) 29862306a36Sopenharmony_ci return; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci if (!atomic_sub_and_test(put, &req->completion_ref)) 30162306a36Sopenharmony_ci return; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci drbd_req_complete(req, m); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci /* local completion may still come in later, 30662306a36Sopenharmony_ci * we need to keep the req object around. */ 30762306a36Sopenharmony_ci if (req->rq_state & RQ_LOCAL_ABORTED) 30862306a36Sopenharmony_ci return; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci if (req->rq_state & RQ_POSTPONED) { 31162306a36Sopenharmony_ci /* don't destroy the req object just yet, 31262306a36Sopenharmony_ci * but queue it for retry */ 31362306a36Sopenharmony_ci drbd_restart_request(req); 31462306a36Sopenharmony_ci return; 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci kref_put(&req->kref, drbd_req_destroy); 31862306a36Sopenharmony_ci} 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_cistatic void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 32362306a36Sopenharmony_ci if (!connection) 32462306a36Sopenharmony_ci return; 32562306a36Sopenharmony_ci if (connection->req_next == NULL) 32662306a36Sopenharmony_ci connection->req_next = req; 32762306a36Sopenharmony_ci} 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_cistatic void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 33262306a36Sopenharmony_ci struct drbd_request *iter = req; 33362306a36Sopenharmony_ci if (!connection) 33462306a36Sopenharmony_ci return; 33562306a36Sopenharmony_ci if (connection->req_next != req) 33662306a36Sopenharmony_ci return; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci req = NULL; 33962306a36Sopenharmony_ci list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) { 34062306a36Sopenharmony_ci const unsigned int s = iter->rq_state; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci if (s & RQ_NET_QUEUED) { 34362306a36Sopenharmony_ci req = iter; 34462306a36Sopenharmony_ci break; 34562306a36Sopenharmony_ci } 34662306a36Sopenharmony_ci } 34762306a36Sopenharmony_ci connection->req_next = req; 34862306a36Sopenharmony_ci} 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_cistatic void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req) 35162306a36Sopenharmony_ci{ 35262306a36Sopenharmony_ci struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 35362306a36Sopenharmony_ci if (!connection) 35462306a36Sopenharmony_ci return; 35562306a36Sopenharmony_ci if (connection->req_ack_pending == NULL) 35662306a36Sopenharmony_ci connection->req_ack_pending = req; 35762306a36Sopenharmony_ci} 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_cistatic void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req) 36062306a36Sopenharmony_ci{ 36162306a36Sopenharmony_ci struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 36262306a36Sopenharmony_ci struct drbd_request *iter = req; 36362306a36Sopenharmony_ci if (!connection) 36462306a36Sopenharmony_ci return; 36562306a36Sopenharmony_ci if (connection->req_ack_pending != req) 36662306a36Sopenharmony_ci return; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci req = NULL; 36962306a36Sopenharmony_ci list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) { 37062306a36Sopenharmony_ci const unsigned int s = iter->rq_state; 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING)) { 37362306a36Sopenharmony_ci req = iter; 37462306a36Sopenharmony_ci break; 37562306a36Sopenharmony_ci } 37662306a36Sopenharmony_ci } 37762306a36Sopenharmony_ci connection->req_ack_pending = req; 37862306a36Sopenharmony_ci} 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_cistatic void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req) 38162306a36Sopenharmony_ci{ 38262306a36Sopenharmony_ci struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 38362306a36Sopenharmony_ci if (!connection) 38462306a36Sopenharmony_ci return; 38562306a36Sopenharmony_ci if (connection->req_not_net_done == NULL) 38662306a36Sopenharmony_ci connection->req_not_net_done = req; 38762306a36Sopenharmony_ci} 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_cistatic void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req) 39062306a36Sopenharmony_ci{ 39162306a36Sopenharmony_ci struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 39262306a36Sopenharmony_ci struct drbd_request *iter = req; 39362306a36Sopenharmony_ci if (!connection) 39462306a36Sopenharmony_ci return; 39562306a36Sopenharmony_ci if (connection->req_not_net_done != req) 39662306a36Sopenharmony_ci return; 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci req = NULL; 39962306a36Sopenharmony_ci list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) { 40062306a36Sopenharmony_ci const unsigned int s = iter->rq_state; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE)) { 40362306a36Sopenharmony_ci req = iter; 40462306a36Sopenharmony_ci break; 40562306a36Sopenharmony_ci } 40662306a36Sopenharmony_ci } 40762306a36Sopenharmony_ci connection->req_not_net_done = req; 40862306a36Sopenharmony_ci} 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci/* I'd like this to be the only place that manipulates 41162306a36Sopenharmony_ci * req->completion_ref and req->kref. */ 41262306a36Sopenharmony_cistatic void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, 41362306a36Sopenharmony_ci int clear, int set) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci struct drbd_device *device = req->device; 41662306a36Sopenharmony_ci struct drbd_peer_device *peer_device = first_peer_device(device); 41762306a36Sopenharmony_ci unsigned s = req->rq_state; 41862306a36Sopenharmony_ci int c_put = 0; 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP)) 42162306a36Sopenharmony_ci set |= RQ_COMPLETION_SUSP; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci /* apply */ 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci req->rq_state &= ~clear; 42662306a36Sopenharmony_ci req->rq_state |= set; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci /* no change? */ 42962306a36Sopenharmony_ci if (req->rq_state == s) 43062306a36Sopenharmony_ci return; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci /* intent: get references */ 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci kref_get(&req->kref); 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING)) 43762306a36Sopenharmony_ci atomic_inc(&req->completion_ref); 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) { 44062306a36Sopenharmony_ci inc_ap_pending(device); 44162306a36Sopenharmony_ci atomic_inc(&req->completion_ref); 44262306a36Sopenharmony_ci } 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) { 44562306a36Sopenharmony_ci atomic_inc(&req->completion_ref); 44662306a36Sopenharmony_ci set_if_null_req_next(peer_device, req); 44762306a36Sopenharmony_ci } 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK)) 45062306a36Sopenharmony_ci kref_get(&req->kref); /* wait for the DONE */ 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) { 45362306a36Sopenharmony_ci /* potentially already completed in the ack_receiver thread */ 45462306a36Sopenharmony_ci if (!(s & RQ_NET_DONE)) { 45562306a36Sopenharmony_ci atomic_add(req->i.size >> 9, &device->ap_in_flight); 45662306a36Sopenharmony_ci set_if_null_req_not_net_done(peer_device, req); 45762306a36Sopenharmony_ci } 45862306a36Sopenharmony_ci if (req->rq_state & RQ_NET_PENDING) 45962306a36Sopenharmony_ci set_if_null_req_ack_pending(peer_device, req); 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP)) 46362306a36Sopenharmony_ci atomic_inc(&req->completion_ref); 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci /* progress: put references */ 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP)) 46862306a36Sopenharmony_ci ++c_put; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) { 47162306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING); 47262306a36Sopenharmony_ci ++c_put; 47362306a36Sopenharmony_ci } 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) { 47662306a36Sopenharmony_ci if (req->rq_state & RQ_LOCAL_ABORTED) 47762306a36Sopenharmony_ci kref_put(&req->kref, drbd_req_destroy); 47862306a36Sopenharmony_ci else 47962306a36Sopenharmony_ci ++c_put; 48062306a36Sopenharmony_ci list_del_init(&req->req_pending_local); 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) { 48462306a36Sopenharmony_ci dec_ap_pending(device); 48562306a36Sopenharmony_ci ++c_put; 48662306a36Sopenharmony_ci req->acked_jif = jiffies; 48762306a36Sopenharmony_ci advance_conn_req_ack_pending(peer_device, req); 48862306a36Sopenharmony_ci } 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) { 49162306a36Sopenharmony_ci ++c_put; 49262306a36Sopenharmony_ci advance_conn_req_next(peer_device, req); 49362306a36Sopenharmony_ci } 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { 49662306a36Sopenharmony_ci if (s & RQ_NET_SENT) 49762306a36Sopenharmony_ci atomic_sub(req->i.size >> 9, &device->ap_in_flight); 49862306a36Sopenharmony_ci if (s & RQ_EXP_BARR_ACK) 49962306a36Sopenharmony_ci kref_put(&req->kref, drbd_req_destroy); 50062306a36Sopenharmony_ci req->net_done_jif = jiffies; 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci /* in ahead/behind mode, or just in case, 50362306a36Sopenharmony_ci * before we finally destroy this request, 50462306a36Sopenharmony_ci * the caching pointers must not reference it anymore */ 50562306a36Sopenharmony_ci advance_conn_req_next(peer_device, req); 50662306a36Sopenharmony_ci advance_conn_req_ack_pending(peer_device, req); 50762306a36Sopenharmony_ci advance_conn_req_not_net_done(peer_device, req); 50862306a36Sopenharmony_ci } 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci /* potentially complete and destroy */ 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci /* If we made progress, retry conflicting peer requests, if any. */ 51362306a36Sopenharmony_ci if (req->i.waiting) 51462306a36Sopenharmony_ci wake_up(&device->misc_wait); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci drbd_req_put_completion_ref(req, m, c_put); 51762306a36Sopenharmony_ci kref_put(&req->kref, drbd_req_destroy); 51862306a36Sopenharmony_ci} 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_cistatic void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) 52162306a36Sopenharmony_ci{ 52262306a36Sopenharmony_ci if (!drbd_ratelimit()) 52362306a36Sopenharmony_ci return; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci drbd_warn(device, "local %s IO error sector %llu+%u on %pg\n", 52662306a36Sopenharmony_ci (req->rq_state & RQ_WRITE) ? "WRITE" : "READ", 52762306a36Sopenharmony_ci (unsigned long long)req->i.sector, 52862306a36Sopenharmony_ci req->i.size >> 9, 52962306a36Sopenharmony_ci device->ldev->backing_bdev); 53062306a36Sopenharmony_ci} 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci/* Helper for HANDED_OVER_TO_NETWORK. 53362306a36Sopenharmony_ci * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)? 53462306a36Sopenharmony_ci * Is it also still "PENDING"? 53562306a36Sopenharmony_ci * --> If so, clear PENDING and set NET_OK below. 53662306a36Sopenharmony_ci * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster 53762306a36Sopenharmony_ci * (and we must not set RQ_NET_OK) */ 53862306a36Sopenharmony_cistatic inline bool is_pending_write_protocol_A(struct drbd_request *req) 53962306a36Sopenharmony_ci{ 54062306a36Sopenharmony_ci return (req->rq_state & 54162306a36Sopenharmony_ci (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK)) 54262306a36Sopenharmony_ci == (RQ_WRITE|RQ_NET_PENDING); 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci/* obviously this could be coded as many single functions 54662306a36Sopenharmony_ci * instead of one huge switch, 54762306a36Sopenharmony_ci * or by putting the code directly in the respective locations 54862306a36Sopenharmony_ci * (as it has been before). 54962306a36Sopenharmony_ci * 55062306a36Sopenharmony_ci * but having it this way 55162306a36Sopenharmony_ci * enforces that it is all in this one place, where it is easier to audit, 55262306a36Sopenharmony_ci * it makes it obvious that whatever "event" "happens" to a request should 55362306a36Sopenharmony_ci * happen "atomically" within the req_lock, 55462306a36Sopenharmony_ci * and it enforces that we have to think in a very structured manner 55562306a36Sopenharmony_ci * about the "events" that may happen to a request during its life time ... 55662306a36Sopenharmony_ci * 55762306a36Sopenharmony_ci * 55862306a36Sopenharmony_ci * peer_device == NULL means local disk 55962306a36Sopenharmony_ci */ 56062306a36Sopenharmony_ciint __req_mod(struct drbd_request *req, enum drbd_req_event what, 56162306a36Sopenharmony_ci struct drbd_peer_device *peer_device, 56262306a36Sopenharmony_ci struct bio_and_error *m) 56362306a36Sopenharmony_ci{ 56462306a36Sopenharmony_ci struct drbd_device *const device = req->device; 56562306a36Sopenharmony_ci struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 56662306a36Sopenharmony_ci struct net_conf *nc; 56762306a36Sopenharmony_ci int p, rv = 0; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci if (m) 57062306a36Sopenharmony_ci m->bio = NULL; 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci switch (what) { 57362306a36Sopenharmony_ci default: 57462306a36Sopenharmony_ci drbd_err(device, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); 57562306a36Sopenharmony_ci break; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci /* does not happen... 57862306a36Sopenharmony_ci * initialization done in drbd_req_new 57962306a36Sopenharmony_ci case CREATED: 58062306a36Sopenharmony_ci break; 58162306a36Sopenharmony_ci */ 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci case TO_BE_SENT: /* via network */ 58462306a36Sopenharmony_ci /* reached via __drbd_make_request 58562306a36Sopenharmony_ci * and from w_read_retry_remote */ 58662306a36Sopenharmony_ci D_ASSERT(device, !(req->rq_state & RQ_NET_MASK)); 58762306a36Sopenharmony_ci rcu_read_lock(); 58862306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 58962306a36Sopenharmony_ci p = nc->wire_protocol; 59062306a36Sopenharmony_ci rcu_read_unlock(); 59162306a36Sopenharmony_ci req->rq_state |= 59262306a36Sopenharmony_ci p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK : 59362306a36Sopenharmony_ci p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0; 59462306a36Sopenharmony_ci mod_rq_state(req, m, 0, RQ_NET_PENDING); 59562306a36Sopenharmony_ci break; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci case TO_BE_SUBMITTED: /* locally */ 59862306a36Sopenharmony_ci /* reached via __drbd_make_request */ 59962306a36Sopenharmony_ci D_ASSERT(device, !(req->rq_state & RQ_LOCAL_MASK)); 60062306a36Sopenharmony_ci mod_rq_state(req, m, 0, RQ_LOCAL_PENDING); 60162306a36Sopenharmony_ci break; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci case COMPLETED_OK: 60462306a36Sopenharmony_ci if (req->rq_state & RQ_WRITE) 60562306a36Sopenharmony_ci device->writ_cnt += req->i.size >> 9; 60662306a36Sopenharmony_ci else 60762306a36Sopenharmony_ci device->read_cnt += req->i.size >> 9; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci mod_rq_state(req, m, RQ_LOCAL_PENDING, 61062306a36Sopenharmony_ci RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); 61162306a36Sopenharmony_ci break; 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci case ABORT_DISK_IO: 61462306a36Sopenharmony_ci mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED); 61562306a36Sopenharmony_ci break; 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci case WRITE_COMPLETED_WITH_ERROR: 61862306a36Sopenharmony_ci drbd_report_io_error(device, req); 61962306a36Sopenharmony_ci __drbd_chk_io_error(device, DRBD_WRITE_ERROR); 62062306a36Sopenharmony_ci mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); 62162306a36Sopenharmony_ci break; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci case READ_COMPLETED_WITH_ERROR: 62462306a36Sopenharmony_ci drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); 62562306a36Sopenharmony_ci drbd_report_io_error(device, req); 62662306a36Sopenharmony_ci __drbd_chk_io_error(device, DRBD_READ_ERROR); 62762306a36Sopenharmony_ci fallthrough; 62862306a36Sopenharmony_ci case READ_AHEAD_COMPLETED_WITH_ERROR: 62962306a36Sopenharmony_ci /* it is legal to fail read-ahead, no __drbd_chk_io_error in that case. */ 63062306a36Sopenharmony_ci mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); 63162306a36Sopenharmony_ci break; 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci case DISCARD_COMPLETED_NOTSUPP: 63462306a36Sopenharmony_ci case DISCARD_COMPLETED_WITH_ERROR: 63562306a36Sopenharmony_ci /* I'd rather not detach from local disk just because it 63662306a36Sopenharmony_ci * failed a REQ_OP_DISCARD. */ 63762306a36Sopenharmony_ci mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); 63862306a36Sopenharmony_ci break; 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci case QUEUE_FOR_NET_READ: 64162306a36Sopenharmony_ci /* READ, and 64262306a36Sopenharmony_ci * no local disk, 64362306a36Sopenharmony_ci * or target area marked as invalid, 64462306a36Sopenharmony_ci * or just got an io-error. */ 64562306a36Sopenharmony_ci /* from __drbd_make_request 64662306a36Sopenharmony_ci * or from bio_endio during read io-error recovery */ 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci /* So we can verify the handle in the answer packet. 64962306a36Sopenharmony_ci * Corresponding drbd_remove_request_interval is in 65062306a36Sopenharmony_ci * drbd_req_complete() */ 65162306a36Sopenharmony_ci D_ASSERT(device, drbd_interval_empty(&req->i)); 65262306a36Sopenharmony_ci drbd_insert_interval(&device->read_requests, &req->i); 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci set_bit(UNPLUG_REMOTE, &device->flags); 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_NET_PENDING); 65762306a36Sopenharmony_ci D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0); 65862306a36Sopenharmony_ci mod_rq_state(req, m, 0, RQ_NET_QUEUED); 65962306a36Sopenharmony_ci req->w.cb = w_send_read_req; 66062306a36Sopenharmony_ci drbd_queue_work(&connection->sender_work, 66162306a36Sopenharmony_ci &req->w); 66262306a36Sopenharmony_ci break; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci case QUEUE_FOR_NET_WRITE: 66562306a36Sopenharmony_ci /* assert something? */ 66662306a36Sopenharmony_ci /* from __drbd_make_request only */ 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci /* Corresponding drbd_remove_request_interval is in 66962306a36Sopenharmony_ci * drbd_req_complete() */ 67062306a36Sopenharmony_ci D_ASSERT(device, drbd_interval_empty(&req->i)); 67162306a36Sopenharmony_ci drbd_insert_interval(&device->write_requests, &req->i); 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci /* NOTE 67462306a36Sopenharmony_ci * In case the req ended up on the transfer log before being 67562306a36Sopenharmony_ci * queued on the worker, it could lead to this request being 67662306a36Sopenharmony_ci * missed during cleanup after connection loss. 67762306a36Sopenharmony_ci * So we have to do both operations here, 67862306a36Sopenharmony_ci * within the same lock that protects the transfer log. 67962306a36Sopenharmony_ci * 68062306a36Sopenharmony_ci * _req_add_to_epoch(req); this has to be after the 68162306a36Sopenharmony_ci * _maybe_start_new_epoch(req); which happened in 68262306a36Sopenharmony_ci * __drbd_make_request, because we now may set the bit 68362306a36Sopenharmony_ci * again ourselves to close the current epoch. 68462306a36Sopenharmony_ci * 68562306a36Sopenharmony_ci * Add req to the (now) current epoch (barrier). */ 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci /* otherwise we may lose an unplug, which may cause some remote 68862306a36Sopenharmony_ci * io-scheduler timeout to expire, increasing maximum latency, 68962306a36Sopenharmony_ci * hurting performance. */ 69062306a36Sopenharmony_ci set_bit(UNPLUG_REMOTE, &device->flags); 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci /* queue work item to send data */ 69362306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_NET_PENDING); 69462306a36Sopenharmony_ci mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK); 69562306a36Sopenharmony_ci req->w.cb = w_send_dblock; 69662306a36Sopenharmony_ci drbd_queue_work(&connection->sender_work, 69762306a36Sopenharmony_ci &req->w); 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci /* close the epoch, in case it outgrew the limit */ 70062306a36Sopenharmony_ci rcu_read_lock(); 70162306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 70262306a36Sopenharmony_ci p = nc->max_epoch_size; 70362306a36Sopenharmony_ci rcu_read_unlock(); 70462306a36Sopenharmony_ci if (connection->current_tle_writes >= p) 70562306a36Sopenharmony_ci start_new_tl_epoch(connection); 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci break; 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci case QUEUE_FOR_SEND_OOS: 71062306a36Sopenharmony_ci mod_rq_state(req, m, 0, RQ_NET_QUEUED); 71162306a36Sopenharmony_ci req->w.cb = w_send_out_of_sync; 71262306a36Sopenharmony_ci drbd_queue_work(&connection->sender_work, 71362306a36Sopenharmony_ci &req->w); 71462306a36Sopenharmony_ci break; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci case READ_RETRY_REMOTE_CANCELED: 71762306a36Sopenharmony_ci case SEND_CANCELED: 71862306a36Sopenharmony_ci case SEND_FAILED: 71962306a36Sopenharmony_ci /* real cleanup will be done from tl_clear. just update flags 72062306a36Sopenharmony_ci * so it is no longer marked as on the worker queue */ 72162306a36Sopenharmony_ci mod_rq_state(req, m, RQ_NET_QUEUED, 0); 72262306a36Sopenharmony_ci break; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci case HANDED_OVER_TO_NETWORK: 72562306a36Sopenharmony_ci /* assert something? */ 72662306a36Sopenharmony_ci if (is_pending_write_protocol_A(req)) 72762306a36Sopenharmony_ci /* this is what is dangerous about protocol A: 72862306a36Sopenharmony_ci * pretend it was successfully written on the peer. */ 72962306a36Sopenharmony_ci mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING, 73062306a36Sopenharmony_ci RQ_NET_SENT|RQ_NET_OK); 73162306a36Sopenharmony_ci else 73262306a36Sopenharmony_ci mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT); 73362306a36Sopenharmony_ci /* It is still not yet RQ_NET_DONE until the 73462306a36Sopenharmony_ci * corresponding epoch barrier got acked as well, 73562306a36Sopenharmony_ci * so we know what to dirty on connection loss. */ 73662306a36Sopenharmony_ci break; 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci case OOS_HANDED_TO_NETWORK: 73962306a36Sopenharmony_ci /* Was not set PENDING, no longer QUEUED, so is now DONE 74062306a36Sopenharmony_ci * as far as this connection is concerned. */ 74162306a36Sopenharmony_ci mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE); 74262306a36Sopenharmony_ci break; 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci case CONNECTION_LOST_WHILE_PENDING: 74562306a36Sopenharmony_ci /* transfer log cleanup after connection loss */ 74662306a36Sopenharmony_ci mod_rq_state(req, m, 74762306a36Sopenharmony_ci RQ_NET_OK|RQ_NET_PENDING|RQ_COMPLETION_SUSP, 74862306a36Sopenharmony_ci RQ_NET_DONE); 74962306a36Sopenharmony_ci break; 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci case CONFLICT_RESOLVED: 75262306a36Sopenharmony_ci /* for superseded conflicting writes of multiple primaries, 75362306a36Sopenharmony_ci * there is no need to keep anything in the tl, potential 75462306a36Sopenharmony_ci * node crashes are covered by the activity log. 75562306a36Sopenharmony_ci * 75662306a36Sopenharmony_ci * If this request had been marked as RQ_POSTPONED before, 75762306a36Sopenharmony_ci * it will actually not be completed, but "restarted", 75862306a36Sopenharmony_ci * resubmitted from the retry worker context. */ 75962306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_NET_PENDING); 76062306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); 76162306a36Sopenharmony_ci mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE|RQ_NET_OK); 76262306a36Sopenharmony_ci break; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci case WRITE_ACKED_BY_PEER_AND_SIS: 76562306a36Sopenharmony_ci req->rq_state |= RQ_NET_SIS; 76662306a36Sopenharmony_ci fallthrough; 76762306a36Sopenharmony_ci case WRITE_ACKED_BY_PEER: 76862306a36Sopenharmony_ci /* Normal operation protocol C: successfully written on peer. 76962306a36Sopenharmony_ci * During resync, even in protocol != C, 77062306a36Sopenharmony_ci * we requested an explicit write ack anyways. 77162306a36Sopenharmony_ci * Which means we cannot even assert anything here. 77262306a36Sopenharmony_ci * Nothing more to do here. 77362306a36Sopenharmony_ci * We want to keep the tl in place for all protocols, to cater 77462306a36Sopenharmony_ci * for volatile write-back caches on lower level devices. */ 77562306a36Sopenharmony_ci goto ack_common; 77662306a36Sopenharmony_ci case RECV_ACKED_BY_PEER: 77762306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK); 77862306a36Sopenharmony_ci /* protocol B; pretends to be successfully written on peer. 77962306a36Sopenharmony_ci * see also notes above in HANDED_OVER_TO_NETWORK about 78062306a36Sopenharmony_ci * protocol != C */ 78162306a36Sopenharmony_ci ack_common: 78262306a36Sopenharmony_ci mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); 78362306a36Sopenharmony_ci break; 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci case POSTPONE_WRITE: 78662306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); 78762306a36Sopenharmony_ci /* If this node has already detected the write conflict, the 78862306a36Sopenharmony_ci * worker will be waiting on misc_wait. Wake it up once this 78962306a36Sopenharmony_ci * request has completed locally. 79062306a36Sopenharmony_ci */ 79162306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_NET_PENDING); 79262306a36Sopenharmony_ci req->rq_state |= RQ_POSTPONED; 79362306a36Sopenharmony_ci if (req->i.waiting) 79462306a36Sopenharmony_ci wake_up(&device->misc_wait); 79562306a36Sopenharmony_ci /* Do not clear RQ_NET_PENDING. This request will make further 79662306a36Sopenharmony_ci * progress via restart_conflicting_writes() or 79762306a36Sopenharmony_ci * fail_postponed_requests(). Hopefully. */ 79862306a36Sopenharmony_ci break; 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci case NEG_ACKED: 80162306a36Sopenharmony_ci mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, 0); 80262306a36Sopenharmony_ci break; 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci case FAIL_FROZEN_DISK_IO: 80562306a36Sopenharmony_ci if (!(req->rq_state & RQ_LOCAL_COMPLETED)) 80662306a36Sopenharmony_ci break; 80762306a36Sopenharmony_ci mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0); 80862306a36Sopenharmony_ci break; 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci case RESTART_FROZEN_DISK_IO: 81162306a36Sopenharmony_ci if (!(req->rq_state & RQ_LOCAL_COMPLETED)) 81262306a36Sopenharmony_ci break; 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci mod_rq_state(req, m, 81562306a36Sopenharmony_ci RQ_COMPLETION_SUSP|RQ_LOCAL_COMPLETED, 81662306a36Sopenharmony_ci RQ_LOCAL_PENDING); 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci rv = MR_READ; 81962306a36Sopenharmony_ci if (bio_data_dir(req->master_bio) == WRITE) 82062306a36Sopenharmony_ci rv = MR_WRITE; 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci get_ldev(device); /* always succeeds in this call path */ 82362306a36Sopenharmony_ci req->w.cb = w_restart_disk_io; 82462306a36Sopenharmony_ci drbd_queue_work(&connection->sender_work, 82562306a36Sopenharmony_ci &req->w); 82662306a36Sopenharmony_ci break; 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci case RESEND: 82962306a36Sopenharmony_ci /* Simply complete (local only) READs. */ 83062306a36Sopenharmony_ci if (!(req->rq_state & RQ_WRITE) && !req->w.cb) { 83162306a36Sopenharmony_ci mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0); 83262306a36Sopenharmony_ci break; 83362306a36Sopenharmony_ci } 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ci /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK 83662306a36Sopenharmony_ci before the connection loss (B&C only); only P_BARRIER_ACK 83762306a36Sopenharmony_ci (or the local completion?) was missing when we suspended. 83862306a36Sopenharmony_ci Throwing them out of the TL here by pretending we got a BARRIER_ACK. 83962306a36Sopenharmony_ci During connection handshake, we ensure that the peer was not rebooted. */ 84062306a36Sopenharmony_ci if (!(req->rq_state & RQ_NET_OK)) { 84162306a36Sopenharmony_ci /* FIXME could this possibly be a req->dw.cb == w_send_out_of_sync? 84262306a36Sopenharmony_ci * in that case we must not set RQ_NET_PENDING. */ 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING); 84562306a36Sopenharmony_ci if (req->w.cb) { 84662306a36Sopenharmony_ci /* w.cb expected to be w_send_dblock, or w_send_read_req */ 84762306a36Sopenharmony_ci drbd_queue_work(&connection->sender_work, 84862306a36Sopenharmony_ci &req->w); 84962306a36Sopenharmony_ci rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; 85062306a36Sopenharmony_ci } /* else: FIXME can this happen? */ 85162306a36Sopenharmony_ci break; 85262306a36Sopenharmony_ci } 85362306a36Sopenharmony_ci fallthrough; /* to BARRIER_ACKED */ 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci case BARRIER_ACKED: 85662306a36Sopenharmony_ci /* barrier ack for READ requests does not make sense */ 85762306a36Sopenharmony_ci if (!(req->rq_state & RQ_WRITE)) 85862306a36Sopenharmony_ci break; 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci if (req->rq_state & RQ_NET_PENDING) { 86162306a36Sopenharmony_ci /* barrier came in before all requests were acked. 86262306a36Sopenharmony_ci * this is bad, because if the connection is lost now, 86362306a36Sopenharmony_ci * we won't be able to clean them up... */ 86462306a36Sopenharmony_ci drbd_err(device, "FIXME (BARRIER_ACKED but pending)\n"); 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci /* Allowed to complete requests, even while suspended. 86762306a36Sopenharmony_ci * As this is called for all requests within a matching epoch, 86862306a36Sopenharmony_ci * we need to filter, and only set RQ_NET_DONE for those that 86962306a36Sopenharmony_ci * have actually been on the wire. */ 87062306a36Sopenharmony_ci mod_rq_state(req, m, RQ_COMPLETION_SUSP, 87162306a36Sopenharmony_ci (req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0); 87262306a36Sopenharmony_ci break; 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci case DATA_RECEIVED: 87562306a36Sopenharmony_ci D_ASSERT(device, req->rq_state & RQ_NET_PENDING); 87662306a36Sopenharmony_ci mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE); 87762306a36Sopenharmony_ci break; 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci case QUEUE_AS_DRBD_BARRIER: 88062306a36Sopenharmony_ci start_new_tl_epoch(connection); 88162306a36Sopenharmony_ci mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE); 88262306a36Sopenharmony_ci break; 88362306a36Sopenharmony_ci } 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci return rv; 88662306a36Sopenharmony_ci} 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci/* we may do a local read if: 88962306a36Sopenharmony_ci * - we are consistent (of course), 89062306a36Sopenharmony_ci * - or we are generally inconsistent, 89162306a36Sopenharmony_ci * BUT we are still/already IN SYNC for this area. 89262306a36Sopenharmony_ci * since size may be bigger than BM_BLOCK_SIZE, 89362306a36Sopenharmony_ci * we may need to check several bits. 89462306a36Sopenharmony_ci */ 89562306a36Sopenharmony_cistatic bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size) 89662306a36Sopenharmony_ci{ 89762306a36Sopenharmony_ci unsigned long sbnr, ebnr; 89862306a36Sopenharmony_ci sector_t esector, nr_sectors; 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_ci if (device->state.disk == D_UP_TO_DATE) 90162306a36Sopenharmony_ci return true; 90262306a36Sopenharmony_ci if (device->state.disk != D_INCONSISTENT) 90362306a36Sopenharmony_ci return false; 90462306a36Sopenharmony_ci esector = sector + (size >> 9) - 1; 90562306a36Sopenharmony_ci nr_sectors = get_capacity(device->vdisk); 90662306a36Sopenharmony_ci D_ASSERT(device, sector < nr_sectors); 90762306a36Sopenharmony_ci D_ASSERT(device, esector < nr_sectors); 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci sbnr = BM_SECT_TO_BIT(sector); 91062306a36Sopenharmony_ci ebnr = BM_SECT_TO_BIT(esector); 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci return drbd_bm_count_bits(device, sbnr, ebnr) == 0; 91362306a36Sopenharmony_ci} 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_cistatic bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector, 91662306a36Sopenharmony_ci enum drbd_read_balancing rbm) 91762306a36Sopenharmony_ci{ 91862306a36Sopenharmony_ci int stripe_shift; 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci switch (rbm) { 92162306a36Sopenharmony_ci case RB_CONGESTED_REMOTE: 92262306a36Sopenharmony_ci return false; 92362306a36Sopenharmony_ci case RB_LEAST_PENDING: 92462306a36Sopenharmony_ci return atomic_read(&device->local_cnt) > 92562306a36Sopenharmony_ci atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt); 92662306a36Sopenharmony_ci case RB_32K_STRIPING: /* stripe_shift = 15 */ 92762306a36Sopenharmony_ci case RB_64K_STRIPING: 92862306a36Sopenharmony_ci case RB_128K_STRIPING: 92962306a36Sopenharmony_ci case RB_256K_STRIPING: 93062306a36Sopenharmony_ci case RB_512K_STRIPING: 93162306a36Sopenharmony_ci case RB_1M_STRIPING: /* stripe_shift = 20 */ 93262306a36Sopenharmony_ci stripe_shift = (rbm - RB_32K_STRIPING + 15); 93362306a36Sopenharmony_ci return (sector >> (stripe_shift - 9)) & 1; 93462306a36Sopenharmony_ci case RB_ROUND_ROBIN: 93562306a36Sopenharmony_ci return test_and_change_bit(READ_BALANCE_RR, &device->flags); 93662306a36Sopenharmony_ci case RB_PREFER_REMOTE: 93762306a36Sopenharmony_ci return true; 93862306a36Sopenharmony_ci case RB_PREFER_LOCAL: 93962306a36Sopenharmony_ci default: 94062306a36Sopenharmony_ci return false; 94162306a36Sopenharmony_ci } 94262306a36Sopenharmony_ci} 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci/* 94562306a36Sopenharmony_ci * complete_conflicting_writes - wait for any conflicting write requests 94662306a36Sopenharmony_ci * 94762306a36Sopenharmony_ci * The write_requests tree contains all active write requests which we 94862306a36Sopenharmony_ci * currently know about. Wait for any requests to complete which conflict with 94962306a36Sopenharmony_ci * the new one. 95062306a36Sopenharmony_ci * 95162306a36Sopenharmony_ci * Only way out: remove the conflicting intervals from the tree. 95262306a36Sopenharmony_ci */ 95362306a36Sopenharmony_cistatic void complete_conflicting_writes(struct drbd_request *req) 95462306a36Sopenharmony_ci{ 95562306a36Sopenharmony_ci DEFINE_WAIT(wait); 95662306a36Sopenharmony_ci struct drbd_device *device = req->device; 95762306a36Sopenharmony_ci struct drbd_interval *i; 95862306a36Sopenharmony_ci sector_t sector = req->i.sector; 95962306a36Sopenharmony_ci int size = req->i.size; 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci for (;;) { 96262306a36Sopenharmony_ci drbd_for_each_overlap(i, &device->write_requests, sector, size) { 96362306a36Sopenharmony_ci /* Ignore, if already completed to upper layers. */ 96462306a36Sopenharmony_ci if (i->completed) 96562306a36Sopenharmony_ci continue; 96662306a36Sopenharmony_ci /* Handle the first found overlap. After the schedule 96762306a36Sopenharmony_ci * we have to restart the tree walk. */ 96862306a36Sopenharmony_ci break; 96962306a36Sopenharmony_ci } 97062306a36Sopenharmony_ci if (!i) /* if any */ 97162306a36Sopenharmony_ci break; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci /* Indicate to wake up device->misc_wait on progress. */ 97462306a36Sopenharmony_ci prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE); 97562306a36Sopenharmony_ci i->waiting = true; 97662306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 97762306a36Sopenharmony_ci schedule(); 97862306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 97962306a36Sopenharmony_ci } 98062306a36Sopenharmony_ci finish_wait(&device->misc_wait, &wait); 98162306a36Sopenharmony_ci} 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci/* called within req_lock */ 98462306a36Sopenharmony_cistatic void maybe_pull_ahead(struct drbd_device *device) 98562306a36Sopenharmony_ci{ 98662306a36Sopenharmony_ci struct drbd_connection *connection = first_peer_device(device)->connection; 98762306a36Sopenharmony_ci struct net_conf *nc; 98862306a36Sopenharmony_ci bool congested = false; 98962306a36Sopenharmony_ci enum drbd_on_congestion on_congestion; 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci rcu_read_lock(); 99262306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 99362306a36Sopenharmony_ci on_congestion = nc ? nc->on_congestion : OC_BLOCK; 99462306a36Sopenharmony_ci rcu_read_unlock(); 99562306a36Sopenharmony_ci if (on_congestion == OC_BLOCK || 99662306a36Sopenharmony_ci connection->agreed_pro_version < 96) 99762306a36Sopenharmony_ci return; 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD) 100062306a36Sopenharmony_ci return; /* nothing to do ... */ 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci /* If I don't even have good local storage, we can not reasonably try 100362306a36Sopenharmony_ci * to pull ahead of the peer. We also need the local reference to make 100462306a36Sopenharmony_ci * sure device->act_log is there. 100562306a36Sopenharmony_ci */ 100662306a36Sopenharmony_ci if (!get_ldev_if_state(device, D_UP_TO_DATE)) 100762306a36Sopenharmony_ci return; 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci if (nc->cong_fill && 101062306a36Sopenharmony_ci atomic_read(&device->ap_in_flight) >= nc->cong_fill) { 101162306a36Sopenharmony_ci drbd_info(device, "Congestion-fill threshold reached\n"); 101262306a36Sopenharmony_ci congested = true; 101362306a36Sopenharmony_ci } 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci if (device->act_log->used >= nc->cong_extents) { 101662306a36Sopenharmony_ci drbd_info(device, "Congestion-extents threshold reached\n"); 101762306a36Sopenharmony_ci congested = true; 101862306a36Sopenharmony_ci } 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci if (congested) { 102162306a36Sopenharmony_ci /* start a new epoch for non-mirrored writes */ 102262306a36Sopenharmony_ci start_new_tl_epoch(first_peer_device(device)->connection); 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci if (on_congestion == OC_PULL_AHEAD) 102562306a36Sopenharmony_ci _drbd_set_state(_NS(device, conn, C_AHEAD), 0, NULL); 102662306a36Sopenharmony_ci else /*nc->on_congestion == OC_DISCONNECT */ 102762306a36Sopenharmony_ci _drbd_set_state(_NS(device, conn, C_DISCONNECTING), 0, NULL); 102862306a36Sopenharmony_ci } 102962306a36Sopenharmony_ci put_ldev(device); 103062306a36Sopenharmony_ci} 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci/* If this returns false, and req->private_bio is still set, 103362306a36Sopenharmony_ci * this should be submitted locally. 103462306a36Sopenharmony_ci * 103562306a36Sopenharmony_ci * If it returns false, but req->private_bio is not set, 103662306a36Sopenharmony_ci * we do not have access to good data :( 103762306a36Sopenharmony_ci * 103862306a36Sopenharmony_ci * Otherwise, this destroys req->private_bio, if any, 103962306a36Sopenharmony_ci * and returns true. 104062306a36Sopenharmony_ci */ 104162306a36Sopenharmony_cistatic bool do_remote_read(struct drbd_request *req) 104262306a36Sopenharmony_ci{ 104362306a36Sopenharmony_ci struct drbd_device *device = req->device; 104462306a36Sopenharmony_ci enum drbd_read_balancing rbm; 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci if (req->private_bio) { 104762306a36Sopenharmony_ci if (!drbd_may_do_local_read(device, 104862306a36Sopenharmony_ci req->i.sector, req->i.size)) { 104962306a36Sopenharmony_ci bio_put(req->private_bio); 105062306a36Sopenharmony_ci req->private_bio = NULL; 105162306a36Sopenharmony_ci put_ldev(device); 105262306a36Sopenharmony_ci } 105362306a36Sopenharmony_ci } 105462306a36Sopenharmony_ci 105562306a36Sopenharmony_ci if (device->state.pdsk != D_UP_TO_DATE) 105662306a36Sopenharmony_ci return false; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci if (req->private_bio == NULL) 105962306a36Sopenharmony_ci return true; 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci /* TODO: improve read balancing decisions, take into account drbd 106262306a36Sopenharmony_ci * protocol, pending requests etc. */ 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci rcu_read_lock(); 106562306a36Sopenharmony_ci rbm = rcu_dereference(device->ldev->disk_conf)->read_balancing; 106662306a36Sopenharmony_ci rcu_read_unlock(); 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci if (rbm == RB_PREFER_LOCAL && req->private_bio) 106962306a36Sopenharmony_ci return false; /* submit locally */ 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_ci if (remote_due_to_read_balancing(device, req->i.sector, rbm)) { 107262306a36Sopenharmony_ci if (req->private_bio) { 107362306a36Sopenharmony_ci bio_put(req->private_bio); 107462306a36Sopenharmony_ci req->private_bio = NULL; 107562306a36Sopenharmony_ci put_ldev(device); 107662306a36Sopenharmony_ci } 107762306a36Sopenharmony_ci return true; 107862306a36Sopenharmony_ci } 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci return false; 108162306a36Sopenharmony_ci} 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_cibool drbd_should_do_remote(union drbd_dev_state s) 108462306a36Sopenharmony_ci{ 108562306a36Sopenharmony_ci return s.pdsk == D_UP_TO_DATE || 108662306a36Sopenharmony_ci (s.pdsk >= D_INCONSISTENT && 108762306a36Sopenharmony_ci s.conn >= C_WF_BITMAP_T && 108862306a36Sopenharmony_ci s.conn < C_AHEAD); 108962306a36Sopenharmony_ci /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T. 109062306a36Sopenharmony_ci That is equivalent since before 96 IO was frozen in the C_WF_BITMAP* 109162306a36Sopenharmony_ci states. */ 109262306a36Sopenharmony_ci} 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_cistatic bool drbd_should_send_out_of_sync(union drbd_dev_state s) 109562306a36Sopenharmony_ci{ 109662306a36Sopenharmony_ci return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S; 109762306a36Sopenharmony_ci /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary 109862306a36Sopenharmony_ci since we enter state C_AHEAD only if proto >= 96 */ 109962306a36Sopenharmony_ci} 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci/* returns number of connections (== 1, for drbd 8.4) 110262306a36Sopenharmony_ci * expected to actually write this data, 110362306a36Sopenharmony_ci * which does NOT include those that we are L_AHEAD for. */ 110462306a36Sopenharmony_cistatic int drbd_process_write_request(struct drbd_request *req) 110562306a36Sopenharmony_ci{ 110662306a36Sopenharmony_ci struct drbd_device *device = req->device; 110762306a36Sopenharmony_ci struct drbd_peer_device *peer_device = first_peer_device(device); 110862306a36Sopenharmony_ci int remote, send_oos; 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci remote = drbd_should_do_remote(device->state); 111162306a36Sopenharmony_ci send_oos = drbd_should_send_out_of_sync(device->state); 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci /* Need to replicate writes. Unless it is an empty flush, 111462306a36Sopenharmony_ci * which is better mapped to a DRBD P_BARRIER packet, 111562306a36Sopenharmony_ci * also for drbd wire protocol compatibility reasons. 111662306a36Sopenharmony_ci * If this was a flush, just start a new epoch. 111762306a36Sopenharmony_ci * Unless the current epoch was empty anyways, or we are not currently 111862306a36Sopenharmony_ci * replicating, in which case there is no point. */ 111962306a36Sopenharmony_ci if (unlikely(req->i.size == 0)) { 112062306a36Sopenharmony_ci /* The only size==0 bios we expect are empty flushes. */ 112162306a36Sopenharmony_ci D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH); 112262306a36Sopenharmony_ci if (remote) 112362306a36Sopenharmony_ci _req_mod(req, QUEUE_AS_DRBD_BARRIER, peer_device); 112462306a36Sopenharmony_ci return remote; 112562306a36Sopenharmony_ci } 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ci if (!remote && !send_oos) 112862306a36Sopenharmony_ci return 0; 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci D_ASSERT(device, !(remote && send_oos)); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci if (remote) { 113362306a36Sopenharmony_ci _req_mod(req, TO_BE_SENT, peer_device); 113462306a36Sopenharmony_ci _req_mod(req, QUEUE_FOR_NET_WRITE, peer_device); 113562306a36Sopenharmony_ci } else if (drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size)) 113662306a36Sopenharmony_ci _req_mod(req, QUEUE_FOR_SEND_OOS, peer_device); 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci return remote; 113962306a36Sopenharmony_ci} 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_cistatic void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags) 114262306a36Sopenharmony_ci{ 114362306a36Sopenharmony_ci int err = drbd_issue_discard_or_zero_out(req->device, 114462306a36Sopenharmony_ci req->i.sector, req->i.size >> 9, flags); 114562306a36Sopenharmony_ci if (err) 114662306a36Sopenharmony_ci req->private_bio->bi_status = BLK_STS_IOERR; 114762306a36Sopenharmony_ci bio_endio(req->private_bio); 114862306a36Sopenharmony_ci} 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_cistatic void 115162306a36Sopenharmony_cidrbd_submit_req_private_bio(struct drbd_request *req) 115262306a36Sopenharmony_ci{ 115362306a36Sopenharmony_ci struct drbd_device *device = req->device; 115462306a36Sopenharmony_ci struct bio *bio = req->private_bio; 115562306a36Sopenharmony_ci unsigned int type; 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci if (bio_op(bio) != REQ_OP_READ) 115862306a36Sopenharmony_ci type = DRBD_FAULT_DT_WR; 115962306a36Sopenharmony_ci else if (bio->bi_opf & REQ_RAHEAD) 116062306a36Sopenharmony_ci type = DRBD_FAULT_DT_RA; 116162306a36Sopenharmony_ci else 116262306a36Sopenharmony_ci type = DRBD_FAULT_DT_RD; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci /* State may have changed since we grabbed our reference on the 116562306a36Sopenharmony_ci * ->ldev member. Double check, and short-circuit to endio. 116662306a36Sopenharmony_ci * In case the last activity log transaction failed to get on 116762306a36Sopenharmony_ci * stable storage, and this is a WRITE, we may not even submit 116862306a36Sopenharmony_ci * this bio. */ 116962306a36Sopenharmony_ci if (get_ldev(device)) { 117062306a36Sopenharmony_ci if (drbd_insert_fault(device, type)) 117162306a36Sopenharmony_ci bio_io_error(bio); 117262306a36Sopenharmony_ci else if (bio_op(bio) == REQ_OP_WRITE_ZEROES) 117362306a36Sopenharmony_ci drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT | 117462306a36Sopenharmony_ci ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM)); 117562306a36Sopenharmony_ci else if (bio_op(bio) == REQ_OP_DISCARD) 117662306a36Sopenharmony_ci drbd_process_discard_or_zeroes_req(req, EE_TRIM); 117762306a36Sopenharmony_ci else 117862306a36Sopenharmony_ci submit_bio_noacct(bio); 117962306a36Sopenharmony_ci put_ldev(device); 118062306a36Sopenharmony_ci } else 118162306a36Sopenharmony_ci bio_io_error(bio); 118262306a36Sopenharmony_ci} 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_cistatic void drbd_queue_write(struct drbd_device *device, struct drbd_request *req) 118562306a36Sopenharmony_ci{ 118662306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 118762306a36Sopenharmony_ci list_add_tail(&req->tl_requests, &device->submit.writes); 118862306a36Sopenharmony_ci list_add_tail(&req->req_pending_master_completion, 118962306a36Sopenharmony_ci &device->pending_master_completion[1 /* WRITE */]); 119062306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 119162306a36Sopenharmony_ci queue_work(device->submit.wq, &device->submit.worker); 119262306a36Sopenharmony_ci /* do_submit() may sleep internally on al_wait, too */ 119362306a36Sopenharmony_ci wake_up(&device->al_wait); 119462306a36Sopenharmony_ci} 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_ci/* returns the new drbd_request pointer, if the caller is expected to 119762306a36Sopenharmony_ci * drbd_send_and_submit() it (to save latency), or NULL if we queued the 119862306a36Sopenharmony_ci * request on the submitter thread. 119962306a36Sopenharmony_ci * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. 120062306a36Sopenharmony_ci */ 120162306a36Sopenharmony_cistatic struct drbd_request * 120262306a36Sopenharmony_cidrbd_request_prepare(struct drbd_device *device, struct bio *bio) 120362306a36Sopenharmony_ci{ 120462306a36Sopenharmony_ci const int rw = bio_data_dir(bio); 120562306a36Sopenharmony_ci struct drbd_request *req; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci /* allocate outside of all locks; */ 120862306a36Sopenharmony_ci req = drbd_req_new(device, bio); 120962306a36Sopenharmony_ci if (!req) { 121062306a36Sopenharmony_ci dec_ap_bio(device); 121162306a36Sopenharmony_ci /* only pass the error to the upper layers. 121262306a36Sopenharmony_ci * if user cannot handle io errors, that's not our business. */ 121362306a36Sopenharmony_ci drbd_err(device, "could not kmalloc() req\n"); 121462306a36Sopenharmony_ci bio->bi_status = BLK_STS_RESOURCE; 121562306a36Sopenharmony_ci bio_endio(bio); 121662306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 121762306a36Sopenharmony_ci } 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci /* Update disk stats */ 122062306a36Sopenharmony_ci req->start_jif = bio_start_io_acct(req->master_bio); 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci if (get_ldev(device)) { 122362306a36Sopenharmony_ci req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, 122462306a36Sopenharmony_ci bio, GFP_NOIO, 122562306a36Sopenharmony_ci &drbd_io_bio_set); 122662306a36Sopenharmony_ci req->private_bio->bi_private = req; 122762306a36Sopenharmony_ci req->private_bio->bi_end_io = drbd_request_endio; 122862306a36Sopenharmony_ci } 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_ci /* process discards always from our submitter thread */ 123162306a36Sopenharmony_ci if (bio_op(bio) == REQ_OP_WRITE_ZEROES || 123262306a36Sopenharmony_ci bio_op(bio) == REQ_OP_DISCARD) 123362306a36Sopenharmony_ci goto queue_for_submitter_thread; 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci if (rw == WRITE && req->private_bio && req->i.size 123662306a36Sopenharmony_ci && !test_bit(AL_SUSPENDED, &device->flags)) { 123762306a36Sopenharmony_ci if (!drbd_al_begin_io_fastpath(device, &req->i)) 123862306a36Sopenharmony_ci goto queue_for_submitter_thread; 123962306a36Sopenharmony_ci req->rq_state |= RQ_IN_ACT_LOG; 124062306a36Sopenharmony_ci req->in_actlog_jif = jiffies; 124162306a36Sopenharmony_ci } 124262306a36Sopenharmony_ci return req; 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci queue_for_submitter_thread: 124562306a36Sopenharmony_ci atomic_inc(&device->ap_actlog_cnt); 124662306a36Sopenharmony_ci drbd_queue_write(device, req); 124762306a36Sopenharmony_ci return NULL; 124862306a36Sopenharmony_ci} 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci/* Require at least one path to current data. 125162306a36Sopenharmony_ci * We don't want to allow writes on C_STANDALONE D_INCONSISTENT: 125262306a36Sopenharmony_ci * We would not allow to read what was written, 125362306a36Sopenharmony_ci * we would not have bumped the data generation uuids, 125462306a36Sopenharmony_ci * we would cause data divergence for all the wrong reasons. 125562306a36Sopenharmony_ci * 125662306a36Sopenharmony_ci * If we don't see at least one D_UP_TO_DATE, we will fail this request, 125762306a36Sopenharmony_ci * which either returns EIO, or, if OND_SUSPEND_IO is set, suspends IO, 125862306a36Sopenharmony_ci * and queues for retry later. 125962306a36Sopenharmony_ci */ 126062306a36Sopenharmony_cistatic bool may_do_writes(struct drbd_device *device) 126162306a36Sopenharmony_ci{ 126262306a36Sopenharmony_ci const union drbd_dev_state s = device->state; 126362306a36Sopenharmony_ci return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE; 126462306a36Sopenharmony_ci} 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_cistruct drbd_plug_cb { 126762306a36Sopenharmony_ci struct blk_plug_cb cb; 126862306a36Sopenharmony_ci struct drbd_request *most_recent_req; 126962306a36Sopenharmony_ci /* do we need more? */ 127062306a36Sopenharmony_ci}; 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_cistatic void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule) 127362306a36Sopenharmony_ci{ 127462306a36Sopenharmony_ci struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb); 127562306a36Sopenharmony_ci struct drbd_resource *resource = plug->cb.data; 127662306a36Sopenharmony_ci struct drbd_request *req = plug->most_recent_req; 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci kfree(cb); 127962306a36Sopenharmony_ci if (!req) 128062306a36Sopenharmony_ci return; 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci spin_lock_irq(&resource->req_lock); 128362306a36Sopenharmony_ci /* In case the sender did not process it yet, raise the flag to 128462306a36Sopenharmony_ci * have it followed with P_UNPLUG_REMOTE just after. */ 128562306a36Sopenharmony_ci req->rq_state |= RQ_UNPLUG; 128662306a36Sopenharmony_ci /* but also queue a generic unplug */ 128762306a36Sopenharmony_ci drbd_queue_unplug(req->device); 128862306a36Sopenharmony_ci kref_put(&req->kref, drbd_req_destroy); 128962306a36Sopenharmony_ci spin_unlock_irq(&resource->req_lock); 129062306a36Sopenharmony_ci} 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_cistatic struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource) 129362306a36Sopenharmony_ci{ 129462306a36Sopenharmony_ci /* A lot of text to say 129562306a36Sopenharmony_ci * return (struct drbd_plug_cb*)blk_check_plugged(); */ 129662306a36Sopenharmony_ci struct drbd_plug_cb *plug; 129762306a36Sopenharmony_ci struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug)); 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci if (cb) 130062306a36Sopenharmony_ci plug = container_of(cb, struct drbd_plug_cb, cb); 130162306a36Sopenharmony_ci else 130262306a36Sopenharmony_ci plug = NULL; 130362306a36Sopenharmony_ci return plug; 130462306a36Sopenharmony_ci} 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_cistatic void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req) 130762306a36Sopenharmony_ci{ 130862306a36Sopenharmony_ci struct drbd_request *tmp = plug->most_recent_req; 130962306a36Sopenharmony_ci /* Will be sent to some peer. 131062306a36Sopenharmony_ci * Remember to tag it with UNPLUG_REMOTE on unplug */ 131162306a36Sopenharmony_ci kref_get(&req->kref); 131262306a36Sopenharmony_ci plug->most_recent_req = req; 131362306a36Sopenharmony_ci if (tmp) 131462306a36Sopenharmony_ci kref_put(&tmp->kref, drbd_req_destroy); 131562306a36Sopenharmony_ci} 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_cistatic void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) 131862306a36Sopenharmony_ci{ 131962306a36Sopenharmony_ci struct drbd_resource *resource = device->resource; 132062306a36Sopenharmony_ci struct drbd_peer_device *peer_device = first_peer_device(device); 132162306a36Sopenharmony_ci const int rw = bio_data_dir(req->master_bio); 132262306a36Sopenharmony_ci struct bio_and_error m = { NULL, }; 132362306a36Sopenharmony_ci bool no_remote = false; 132462306a36Sopenharmony_ci bool submit_private_bio = false; 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_ci spin_lock_irq(&resource->req_lock); 132762306a36Sopenharmony_ci if (rw == WRITE) { 132862306a36Sopenharmony_ci /* This may temporarily give up the req_lock, 132962306a36Sopenharmony_ci * but will re-aquire it before it returns here. 133062306a36Sopenharmony_ci * Needs to be before the check on drbd_suspended() */ 133162306a36Sopenharmony_ci complete_conflicting_writes(req); 133262306a36Sopenharmony_ci /* no more giving up req_lock from now on! */ 133362306a36Sopenharmony_ci 133462306a36Sopenharmony_ci /* check for congestion, and potentially stop sending 133562306a36Sopenharmony_ci * full data updates, but start sending "dirty bits" only. */ 133662306a36Sopenharmony_ci maybe_pull_ahead(device); 133762306a36Sopenharmony_ci } 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci if (drbd_suspended(device)) { 134162306a36Sopenharmony_ci /* push back and retry: */ 134262306a36Sopenharmony_ci req->rq_state |= RQ_POSTPONED; 134362306a36Sopenharmony_ci if (req->private_bio) { 134462306a36Sopenharmony_ci bio_put(req->private_bio); 134562306a36Sopenharmony_ci req->private_bio = NULL; 134662306a36Sopenharmony_ci put_ldev(device); 134762306a36Sopenharmony_ci } 134862306a36Sopenharmony_ci goto out; 134962306a36Sopenharmony_ci } 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci /* We fail READ early, if we can not serve it. 135262306a36Sopenharmony_ci * We must do this before req is registered on any lists. 135362306a36Sopenharmony_ci * Otherwise, drbd_req_complete() will queue failed READ for retry. */ 135462306a36Sopenharmony_ci if (rw != WRITE) { 135562306a36Sopenharmony_ci if (!do_remote_read(req) && !req->private_bio) 135662306a36Sopenharmony_ci goto nodata; 135762306a36Sopenharmony_ci } 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci /* which transfer log epoch does this belong to? */ 136062306a36Sopenharmony_ci req->epoch = atomic_read(&first_peer_device(device)->connection->current_tle_nr); 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci /* no point in adding empty flushes to the transfer log, 136362306a36Sopenharmony_ci * they are mapped to drbd barriers already. */ 136462306a36Sopenharmony_ci if (likely(req->i.size!=0)) { 136562306a36Sopenharmony_ci if (rw == WRITE) 136662306a36Sopenharmony_ci first_peer_device(device)->connection->current_tle_writes++; 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_ci list_add_tail(&req->tl_requests, &first_peer_device(device)->connection->transfer_log); 136962306a36Sopenharmony_ci } 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci if (rw == WRITE) { 137262306a36Sopenharmony_ci if (req->private_bio && !may_do_writes(device)) { 137362306a36Sopenharmony_ci bio_put(req->private_bio); 137462306a36Sopenharmony_ci req->private_bio = NULL; 137562306a36Sopenharmony_ci put_ldev(device); 137662306a36Sopenharmony_ci goto nodata; 137762306a36Sopenharmony_ci } 137862306a36Sopenharmony_ci if (!drbd_process_write_request(req)) 137962306a36Sopenharmony_ci no_remote = true; 138062306a36Sopenharmony_ci } else { 138162306a36Sopenharmony_ci /* We either have a private_bio, or we can read from remote. 138262306a36Sopenharmony_ci * Otherwise we had done the goto nodata above. */ 138362306a36Sopenharmony_ci if (req->private_bio == NULL) { 138462306a36Sopenharmony_ci _req_mod(req, TO_BE_SENT, peer_device); 138562306a36Sopenharmony_ci _req_mod(req, QUEUE_FOR_NET_READ, peer_device); 138662306a36Sopenharmony_ci } else 138762306a36Sopenharmony_ci no_remote = true; 138862306a36Sopenharmony_ci } 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ci if (no_remote == false) { 139162306a36Sopenharmony_ci struct drbd_plug_cb *plug = drbd_check_plugged(resource); 139262306a36Sopenharmony_ci if (plug) 139362306a36Sopenharmony_ci drbd_update_plug(plug, req); 139462306a36Sopenharmony_ci } 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_ci /* If it took the fast path in drbd_request_prepare, add it here. 139762306a36Sopenharmony_ci * The slow path has added it already. */ 139862306a36Sopenharmony_ci if (list_empty(&req->req_pending_master_completion)) 139962306a36Sopenharmony_ci list_add_tail(&req->req_pending_master_completion, 140062306a36Sopenharmony_ci &device->pending_master_completion[rw == WRITE]); 140162306a36Sopenharmony_ci if (req->private_bio) { 140262306a36Sopenharmony_ci /* needs to be marked within the same spinlock */ 140362306a36Sopenharmony_ci req->pre_submit_jif = jiffies; 140462306a36Sopenharmony_ci list_add_tail(&req->req_pending_local, 140562306a36Sopenharmony_ci &device->pending_completion[rw == WRITE]); 140662306a36Sopenharmony_ci _req_mod(req, TO_BE_SUBMITTED, NULL); 140762306a36Sopenharmony_ci /* but we need to give up the spinlock to submit */ 140862306a36Sopenharmony_ci submit_private_bio = true; 140962306a36Sopenharmony_ci } else if (no_remote) { 141062306a36Sopenharmony_cinodata: 141162306a36Sopenharmony_ci if (drbd_ratelimit()) 141262306a36Sopenharmony_ci drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n", 141362306a36Sopenharmony_ci (unsigned long long)req->i.sector, req->i.size >> 9); 141462306a36Sopenharmony_ci /* A write may have been queued for send_oos, however. 141562306a36Sopenharmony_ci * So we can not simply free it, we must go through drbd_req_put_completion_ref() */ 141662306a36Sopenharmony_ci } 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ciout: 141962306a36Sopenharmony_ci drbd_req_put_completion_ref(req, &m, 1); 142062306a36Sopenharmony_ci spin_unlock_irq(&resource->req_lock); 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_ci /* Even though above is a kref_put(), this is safe. 142362306a36Sopenharmony_ci * As long as we still need to submit our private bio, 142462306a36Sopenharmony_ci * we hold a completion ref, and the request cannot disappear. 142562306a36Sopenharmony_ci * If however this request did not even have a private bio to submit 142662306a36Sopenharmony_ci * (e.g. remote read), req may already be invalid now. 142762306a36Sopenharmony_ci * That's why we cannot check on req->private_bio. */ 142862306a36Sopenharmony_ci if (submit_private_bio) 142962306a36Sopenharmony_ci drbd_submit_req_private_bio(req); 143062306a36Sopenharmony_ci if (m.bio) 143162306a36Sopenharmony_ci complete_master_bio(device, &m); 143262306a36Sopenharmony_ci} 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_civoid __drbd_make_request(struct drbd_device *device, struct bio *bio) 143562306a36Sopenharmony_ci{ 143662306a36Sopenharmony_ci struct drbd_request *req = drbd_request_prepare(device, bio); 143762306a36Sopenharmony_ci if (IS_ERR_OR_NULL(req)) 143862306a36Sopenharmony_ci return; 143962306a36Sopenharmony_ci drbd_send_and_submit(device, req); 144062306a36Sopenharmony_ci} 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_cistatic void submit_fast_path(struct drbd_device *device, struct list_head *incoming) 144362306a36Sopenharmony_ci{ 144462306a36Sopenharmony_ci struct blk_plug plug; 144562306a36Sopenharmony_ci struct drbd_request *req, *tmp; 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci blk_start_plug(&plug); 144862306a36Sopenharmony_ci list_for_each_entry_safe(req, tmp, incoming, tl_requests) { 144962306a36Sopenharmony_ci const int rw = bio_data_dir(req->master_bio); 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci if (rw == WRITE /* rw != WRITE should not even end up here! */ 145262306a36Sopenharmony_ci && req->private_bio && req->i.size 145362306a36Sopenharmony_ci && !test_bit(AL_SUSPENDED, &device->flags)) { 145462306a36Sopenharmony_ci if (!drbd_al_begin_io_fastpath(device, &req->i)) 145562306a36Sopenharmony_ci continue; 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci req->rq_state |= RQ_IN_ACT_LOG; 145862306a36Sopenharmony_ci req->in_actlog_jif = jiffies; 145962306a36Sopenharmony_ci atomic_dec(&device->ap_actlog_cnt); 146062306a36Sopenharmony_ci } 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci list_del_init(&req->tl_requests); 146362306a36Sopenharmony_ci drbd_send_and_submit(device, req); 146462306a36Sopenharmony_ci } 146562306a36Sopenharmony_ci blk_finish_plug(&plug); 146662306a36Sopenharmony_ci} 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_cistatic bool prepare_al_transaction_nonblock(struct drbd_device *device, 146962306a36Sopenharmony_ci struct list_head *incoming, 147062306a36Sopenharmony_ci struct list_head *pending, 147162306a36Sopenharmony_ci struct list_head *later) 147262306a36Sopenharmony_ci{ 147362306a36Sopenharmony_ci struct drbd_request *req; 147462306a36Sopenharmony_ci int wake = 0; 147562306a36Sopenharmony_ci int err; 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 147862306a36Sopenharmony_ci while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) { 147962306a36Sopenharmony_ci err = drbd_al_begin_io_nonblock(device, &req->i); 148062306a36Sopenharmony_ci if (err == -ENOBUFS) 148162306a36Sopenharmony_ci break; 148262306a36Sopenharmony_ci if (err == -EBUSY) 148362306a36Sopenharmony_ci wake = 1; 148462306a36Sopenharmony_ci if (err) 148562306a36Sopenharmony_ci list_move_tail(&req->tl_requests, later); 148662306a36Sopenharmony_ci else 148762306a36Sopenharmony_ci list_move_tail(&req->tl_requests, pending); 148862306a36Sopenharmony_ci } 148962306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 149062306a36Sopenharmony_ci if (wake) 149162306a36Sopenharmony_ci wake_up(&device->al_wait); 149262306a36Sopenharmony_ci return !list_empty(pending); 149362306a36Sopenharmony_ci} 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_cistatic void send_and_submit_pending(struct drbd_device *device, struct list_head *pending) 149662306a36Sopenharmony_ci{ 149762306a36Sopenharmony_ci struct blk_plug plug; 149862306a36Sopenharmony_ci struct drbd_request *req; 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci blk_start_plug(&plug); 150162306a36Sopenharmony_ci while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) { 150262306a36Sopenharmony_ci req->rq_state |= RQ_IN_ACT_LOG; 150362306a36Sopenharmony_ci req->in_actlog_jif = jiffies; 150462306a36Sopenharmony_ci atomic_dec(&device->ap_actlog_cnt); 150562306a36Sopenharmony_ci list_del_init(&req->tl_requests); 150662306a36Sopenharmony_ci drbd_send_and_submit(device, req); 150762306a36Sopenharmony_ci } 150862306a36Sopenharmony_ci blk_finish_plug(&plug); 150962306a36Sopenharmony_ci} 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_civoid do_submit(struct work_struct *ws) 151262306a36Sopenharmony_ci{ 151362306a36Sopenharmony_ci struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker); 151462306a36Sopenharmony_ci LIST_HEAD(incoming); /* from drbd_make_request() */ 151562306a36Sopenharmony_ci LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */ 151662306a36Sopenharmony_ci LIST_HEAD(busy); /* blocked by resync requests */ 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci /* grab new incoming requests */ 151962306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 152062306a36Sopenharmony_ci list_splice_tail_init(&device->submit.writes, &incoming); 152162306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_ci for (;;) { 152462306a36Sopenharmony_ci DEFINE_WAIT(wait); 152562306a36Sopenharmony_ci 152662306a36Sopenharmony_ci /* move used-to-be-busy back to front of incoming */ 152762306a36Sopenharmony_ci list_splice_init(&busy, &incoming); 152862306a36Sopenharmony_ci submit_fast_path(device, &incoming); 152962306a36Sopenharmony_ci if (list_empty(&incoming)) 153062306a36Sopenharmony_ci break; 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci for (;;) { 153362306a36Sopenharmony_ci prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE); 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci list_splice_init(&busy, &incoming); 153662306a36Sopenharmony_ci prepare_al_transaction_nonblock(device, &incoming, &pending, &busy); 153762306a36Sopenharmony_ci if (!list_empty(&pending)) 153862306a36Sopenharmony_ci break; 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci schedule(); 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci /* If all currently "hot" activity log extents are kept busy by 154362306a36Sopenharmony_ci * incoming requests, we still must not totally starve new 154462306a36Sopenharmony_ci * requests to "cold" extents. 154562306a36Sopenharmony_ci * Something left on &incoming means there had not been 154662306a36Sopenharmony_ci * enough update slots available, and the activity log 154762306a36Sopenharmony_ci * has been marked as "starving". 154862306a36Sopenharmony_ci * 154962306a36Sopenharmony_ci * Try again now, without looking for new requests, 155062306a36Sopenharmony_ci * effectively blocking all new requests until we made 155162306a36Sopenharmony_ci * at least _some_ progress with what we currently have. 155262306a36Sopenharmony_ci */ 155362306a36Sopenharmony_ci if (!list_empty(&incoming)) 155462306a36Sopenharmony_ci continue; 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci /* Nothing moved to pending, but nothing left 155762306a36Sopenharmony_ci * on incoming: all moved to busy! 155862306a36Sopenharmony_ci * Grab new and iterate. */ 155962306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 156062306a36Sopenharmony_ci list_splice_tail_init(&device->submit.writes, &incoming); 156162306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 156262306a36Sopenharmony_ci } 156362306a36Sopenharmony_ci finish_wait(&device->al_wait, &wait); 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ci /* If the transaction was full, before all incoming requests 156662306a36Sopenharmony_ci * had been processed, skip ahead to commit, and iterate 156762306a36Sopenharmony_ci * without splicing in more incoming requests from upper layers. 156862306a36Sopenharmony_ci * 156962306a36Sopenharmony_ci * Else, if all incoming have been processed, 157062306a36Sopenharmony_ci * they have become either "pending" (to be submitted after 157162306a36Sopenharmony_ci * next transaction commit) or "busy" (blocked by resync). 157262306a36Sopenharmony_ci * 157362306a36Sopenharmony_ci * Maybe more was queued, while we prepared the transaction? 157462306a36Sopenharmony_ci * Try to stuff those into this transaction as well. 157562306a36Sopenharmony_ci * Be strictly non-blocking here, 157662306a36Sopenharmony_ci * we already have something to commit. 157762306a36Sopenharmony_ci * 157862306a36Sopenharmony_ci * Commit if we don't make any more progres. 157962306a36Sopenharmony_ci */ 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci while (list_empty(&incoming)) { 158262306a36Sopenharmony_ci LIST_HEAD(more_pending); 158362306a36Sopenharmony_ci LIST_HEAD(more_incoming); 158462306a36Sopenharmony_ci bool made_progress; 158562306a36Sopenharmony_ci 158662306a36Sopenharmony_ci /* It is ok to look outside the lock, 158762306a36Sopenharmony_ci * it's only an optimization anyways */ 158862306a36Sopenharmony_ci if (list_empty(&device->submit.writes)) 158962306a36Sopenharmony_ci break; 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 159262306a36Sopenharmony_ci list_splice_tail_init(&device->submit.writes, &more_incoming); 159362306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_ci if (list_empty(&more_incoming)) 159662306a36Sopenharmony_ci break; 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ci made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy); 159962306a36Sopenharmony_ci 160062306a36Sopenharmony_ci list_splice_tail_init(&more_pending, &pending); 160162306a36Sopenharmony_ci list_splice_tail_init(&more_incoming, &incoming); 160262306a36Sopenharmony_ci if (!made_progress) 160362306a36Sopenharmony_ci break; 160462306a36Sopenharmony_ci } 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci drbd_al_begin_io_commit(device); 160762306a36Sopenharmony_ci send_and_submit_pending(device, &pending); 160862306a36Sopenharmony_ci } 160962306a36Sopenharmony_ci} 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_civoid drbd_submit_bio(struct bio *bio) 161262306a36Sopenharmony_ci{ 161362306a36Sopenharmony_ci struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; 161462306a36Sopenharmony_ci 161562306a36Sopenharmony_ci bio = bio_split_to_limits(bio); 161662306a36Sopenharmony_ci if (!bio) 161762306a36Sopenharmony_ci return; 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci /* 162062306a36Sopenharmony_ci * what we "blindly" assume: 162162306a36Sopenharmony_ci */ 162262306a36Sopenharmony_ci D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512)); 162362306a36Sopenharmony_ci 162462306a36Sopenharmony_ci inc_ap_bio(device); 162562306a36Sopenharmony_ci __drbd_make_request(device, bio); 162662306a36Sopenharmony_ci} 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_cistatic bool net_timeout_reached(struct drbd_request *net_req, 162962306a36Sopenharmony_ci struct drbd_connection *connection, 163062306a36Sopenharmony_ci unsigned long now, unsigned long ent, 163162306a36Sopenharmony_ci unsigned int ko_count, unsigned int timeout) 163262306a36Sopenharmony_ci{ 163362306a36Sopenharmony_ci struct drbd_device *device = net_req->device; 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci if (!time_after(now, net_req->pre_send_jif + ent)) 163662306a36Sopenharmony_ci return false; 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) 163962306a36Sopenharmony_ci return false; 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci if (net_req->rq_state & RQ_NET_PENDING) { 164262306a36Sopenharmony_ci drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n", 164362306a36Sopenharmony_ci jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout); 164462306a36Sopenharmony_ci return true; 164562306a36Sopenharmony_ci } 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_ci /* We received an ACK already (or are using protocol A), 164862306a36Sopenharmony_ci * but are waiting for the epoch closing barrier ack. 164962306a36Sopenharmony_ci * Check if we sent the barrier already. We should not blame the peer 165062306a36Sopenharmony_ci * for being unresponsive, if we did not even ask it yet. */ 165162306a36Sopenharmony_ci if (net_req->epoch == connection->send.current_epoch_nr) { 165262306a36Sopenharmony_ci drbd_warn(device, 165362306a36Sopenharmony_ci "We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n", 165462306a36Sopenharmony_ci jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout); 165562306a36Sopenharmony_ci return false; 165662306a36Sopenharmony_ci } 165762306a36Sopenharmony_ci 165862306a36Sopenharmony_ci /* Worst case: we may have been blocked for whatever reason, then 165962306a36Sopenharmony_ci * suddenly are able to send a lot of requests (and epoch separating 166062306a36Sopenharmony_ci * barriers) in quick succession. 166162306a36Sopenharmony_ci * The timestamp of the net_req may be much too old and not correspond 166262306a36Sopenharmony_ci * to the sending time of the relevant unack'ed barrier packet, so 166362306a36Sopenharmony_ci * would trigger a spurious timeout. The latest barrier packet may 166462306a36Sopenharmony_ci * have a too recent timestamp to trigger the timeout, potentially miss 166562306a36Sopenharmony_ci * a timeout. Right now we don't have a place to conveniently store 166662306a36Sopenharmony_ci * these timestamps. 166762306a36Sopenharmony_ci * But in this particular situation, the application requests are still 166862306a36Sopenharmony_ci * completed to upper layers, DRBD should still "feel" responsive. 166962306a36Sopenharmony_ci * No need yet to kill this connection, it may still recover. 167062306a36Sopenharmony_ci * If not, eventually we will have queued enough into the network for 167162306a36Sopenharmony_ci * us to block. From that point of view, the timestamp of the last sent 167262306a36Sopenharmony_ci * barrier packet is relevant enough. 167362306a36Sopenharmony_ci */ 167462306a36Sopenharmony_ci if (time_after(now, connection->send.last_sent_barrier_jif + ent)) { 167562306a36Sopenharmony_ci drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n", 167662306a36Sopenharmony_ci connection->send.last_sent_barrier_jif, now, 167762306a36Sopenharmony_ci jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout); 167862306a36Sopenharmony_ci return true; 167962306a36Sopenharmony_ci } 168062306a36Sopenharmony_ci return false; 168162306a36Sopenharmony_ci} 168262306a36Sopenharmony_ci 168362306a36Sopenharmony_ci/* A request is considered timed out, if 168462306a36Sopenharmony_ci * - we have some effective timeout from the configuration, 168562306a36Sopenharmony_ci * with some state restrictions applied, 168662306a36Sopenharmony_ci * - the oldest request is waiting for a response from the network 168762306a36Sopenharmony_ci * resp. the local disk, 168862306a36Sopenharmony_ci * - the oldest request is in fact older than the effective timeout, 168962306a36Sopenharmony_ci * - the connection was established (resp. disk was attached) 169062306a36Sopenharmony_ci * for longer than the timeout already. 169162306a36Sopenharmony_ci * Note that for 32bit jiffies and very stable connections/disks, 169262306a36Sopenharmony_ci * we may have a wrap around, which is catched by 169362306a36Sopenharmony_ci * !time_in_range(now, last_..._jif, last_..._jif + timeout). 169462306a36Sopenharmony_ci * 169562306a36Sopenharmony_ci * Side effect: once per 32bit wrap-around interval, which means every 169662306a36Sopenharmony_ci * ~198 days with 250 HZ, we have a window where the timeout would need 169762306a36Sopenharmony_ci * to expire twice (worst case) to become effective. Good enough. 169862306a36Sopenharmony_ci */ 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_civoid request_timer_fn(struct timer_list *t) 170162306a36Sopenharmony_ci{ 170262306a36Sopenharmony_ci struct drbd_device *device = from_timer(device, t, request_timer); 170362306a36Sopenharmony_ci struct drbd_connection *connection = first_peer_device(device)->connection; 170462306a36Sopenharmony_ci struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */ 170562306a36Sopenharmony_ci struct net_conf *nc; 170662306a36Sopenharmony_ci unsigned long oldest_submit_jif; 170762306a36Sopenharmony_ci unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ 170862306a36Sopenharmony_ci unsigned long now; 170962306a36Sopenharmony_ci unsigned int ko_count = 0, timeout = 0; 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci rcu_read_lock(); 171262306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 171362306a36Sopenharmony_ci if (nc && device->state.conn >= C_WF_REPORT_PARAMS) { 171462306a36Sopenharmony_ci ko_count = nc->ko_count; 171562306a36Sopenharmony_ci timeout = nc->timeout; 171662306a36Sopenharmony_ci } 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_ci if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */ 171962306a36Sopenharmony_ci dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10; 172062306a36Sopenharmony_ci put_ldev(device); 172162306a36Sopenharmony_ci } 172262306a36Sopenharmony_ci rcu_read_unlock(); 172362306a36Sopenharmony_ci 172462306a36Sopenharmony_ci 172562306a36Sopenharmony_ci ent = timeout * HZ/10 * ko_count; 172662306a36Sopenharmony_ci et = min_not_zero(dt, ent); 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_ci if (!et) 172962306a36Sopenharmony_ci return; /* Recurring timer stopped */ 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci now = jiffies; 173262306a36Sopenharmony_ci nt = now + et; 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 173562306a36Sopenharmony_ci req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local); 173662306a36Sopenharmony_ci req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local); 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci /* maybe the oldest request waiting for the peer is in fact still 173962306a36Sopenharmony_ci * blocking in tcp sendmsg. That's ok, though, that's handled via the 174062306a36Sopenharmony_ci * socket send timeout, requesting a ping, and bumping ko-count in 174162306a36Sopenharmony_ci * we_should_drop_the_connection(). 174262306a36Sopenharmony_ci */ 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ci /* check the oldest request we did successfully sent, 174562306a36Sopenharmony_ci * but which is still waiting for an ACK. */ 174662306a36Sopenharmony_ci req_peer = connection->req_ack_pending; 174762306a36Sopenharmony_ci 174862306a36Sopenharmony_ci /* if we don't have such request (e.g. protocoll A) 174962306a36Sopenharmony_ci * check the oldest requests which is still waiting on its epoch 175062306a36Sopenharmony_ci * closing barrier ack. */ 175162306a36Sopenharmony_ci if (!req_peer) 175262306a36Sopenharmony_ci req_peer = connection->req_not_net_done; 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ci /* evaluate the oldest peer request only in one timer! */ 175562306a36Sopenharmony_ci if (req_peer && req_peer->device != device) 175662306a36Sopenharmony_ci req_peer = NULL; 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci /* do we have something to evaluate? */ 175962306a36Sopenharmony_ci if (req_peer == NULL && req_write == NULL && req_read == NULL) 176062306a36Sopenharmony_ci goto out; 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci oldest_submit_jif = 176362306a36Sopenharmony_ci (req_write && req_read) 176462306a36Sopenharmony_ci ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif) 176562306a36Sopenharmony_ci ? req_write->pre_submit_jif : req_read->pre_submit_jif ) 176662306a36Sopenharmony_ci : req_write ? req_write->pre_submit_jif 176762306a36Sopenharmony_ci : req_read ? req_read->pre_submit_jif : now; 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout)) 177062306a36Sopenharmony_ci _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD); 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_ci if (dt && oldest_submit_jif != now && 177362306a36Sopenharmony_ci time_after(now, oldest_submit_jif + dt) && 177462306a36Sopenharmony_ci !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { 177562306a36Sopenharmony_ci drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); 177662306a36Sopenharmony_ci __drbd_chk_io_error(device, DRBD_FORCE_DETACH); 177762306a36Sopenharmony_ci } 177862306a36Sopenharmony_ci 177962306a36Sopenharmony_ci /* Reschedule timer for the nearest not already expired timeout. 178062306a36Sopenharmony_ci * Fallback to now + min(effective network timeout, disk timeout). */ 178162306a36Sopenharmony_ci ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent)) 178262306a36Sopenharmony_ci ? req_peer->pre_send_jif + ent : now + et; 178362306a36Sopenharmony_ci dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt)) 178462306a36Sopenharmony_ci ? oldest_submit_jif + dt : now + et; 178562306a36Sopenharmony_ci nt = time_before(ent, dt) ? ent : dt; 178662306a36Sopenharmony_ciout: 178762306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 178862306a36Sopenharmony_ci mod_timer(&device->request_timer, nt); 178962306a36Sopenharmony_ci} 1790