162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci   drbd_req.c
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
862306a36Sopenharmony_ci   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
962306a36Sopenharmony_ci   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/module.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <linux/slab.h>
1762306a36Sopenharmony_ci#include <linux/drbd.h>
1862306a36Sopenharmony_ci#include "drbd_int.h"
1962306a36Sopenharmony_ci#include "drbd_req.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cistatic bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size);
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_cistatic struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src)
2562306a36Sopenharmony_ci{
2662306a36Sopenharmony_ci	struct drbd_request *req;
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	req = mempool_alloc(&drbd_request_mempool, GFP_NOIO);
2962306a36Sopenharmony_ci	if (!req)
3062306a36Sopenharmony_ci		return NULL;
3162306a36Sopenharmony_ci	memset(req, 0, sizeof(*req));
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci	req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
3462306a36Sopenharmony_ci		      | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
3562306a36Sopenharmony_ci		      | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
3662306a36Sopenharmony_ci	req->device = device;
3762306a36Sopenharmony_ci	req->master_bio = bio_src;
3862306a36Sopenharmony_ci	req->epoch = 0;
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	drbd_clear_interval(&req->i);
4162306a36Sopenharmony_ci	req->i.sector     = bio_src->bi_iter.bi_sector;
4262306a36Sopenharmony_ci	req->i.size      = bio_src->bi_iter.bi_size;
4362306a36Sopenharmony_ci	req->i.local = true;
4462306a36Sopenharmony_ci	req->i.waiting = false;
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci	INIT_LIST_HEAD(&req->tl_requests);
4762306a36Sopenharmony_ci	INIT_LIST_HEAD(&req->w.list);
4862306a36Sopenharmony_ci	INIT_LIST_HEAD(&req->req_pending_master_completion);
4962306a36Sopenharmony_ci	INIT_LIST_HEAD(&req->req_pending_local);
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	/* one reference to be put by __drbd_make_request */
5262306a36Sopenharmony_ci	atomic_set(&req->completion_ref, 1);
5362306a36Sopenharmony_ci	/* one kref as long as completion_ref > 0 */
5462306a36Sopenharmony_ci	kref_init(&req->kref);
5562306a36Sopenharmony_ci	return req;
5662306a36Sopenharmony_ci}
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_cistatic void drbd_remove_request_interval(struct rb_root *root,
5962306a36Sopenharmony_ci					 struct drbd_request *req)
6062306a36Sopenharmony_ci{
6162306a36Sopenharmony_ci	struct drbd_device *device = req->device;
6262306a36Sopenharmony_ci	struct drbd_interval *i = &req->i;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	drbd_remove_interval(root, i);
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	/* Wake up any processes waiting for this request to complete.  */
6762306a36Sopenharmony_ci	if (i->waiting)
6862306a36Sopenharmony_ci		wake_up(&device->misc_wait);
6962306a36Sopenharmony_ci}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_civoid drbd_req_destroy(struct kref *kref)
7262306a36Sopenharmony_ci{
7362306a36Sopenharmony_ci	struct drbd_request *req = container_of(kref, struct drbd_request, kref);
7462306a36Sopenharmony_ci	struct drbd_device *device = req->device;
7562306a36Sopenharmony_ci	const unsigned s = req->rq_state;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	if ((req->master_bio && !(s & RQ_POSTPONED)) ||
7862306a36Sopenharmony_ci		atomic_read(&req->completion_ref) ||
7962306a36Sopenharmony_ci		(s & RQ_LOCAL_PENDING) ||
8062306a36Sopenharmony_ci		((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) {
8162306a36Sopenharmony_ci		drbd_err(device, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n",
8262306a36Sopenharmony_ci				s, atomic_read(&req->completion_ref));
8362306a36Sopenharmony_ci		return;
8462306a36Sopenharmony_ci	}
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	/* If called from mod_rq_state (expected normal case) or
8762306a36Sopenharmony_ci	 * drbd_send_and_submit (the less likely normal path), this holds the
8862306a36Sopenharmony_ci	 * req_lock, and req->tl_requests will typicaly be on ->transfer_log,
8962306a36Sopenharmony_ci	 * though it may be still empty (never added to the transfer log).
9062306a36Sopenharmony_ci	 *
9162306a36Sopenharmony_ci	 * If called from do_retry(), we do NOT hold the req_lock, but we are
9262306a36Sopenharmony_ci	 * still allowed to unconditionally list_del(&req->tl_requests),
9362306a36Sopenharmony_ci	 * because it will be on a local on-stack list only. */
9462306a36Sopenharmony_ci	list_del_init(&req->tl_requests);
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	/* finally remove the request from the conflict detection
9762306a36Sopenharmony_ci	 * respective block_id verification interval tree. */
9862306a36Sopenharmony_ci	if (!drbd_interval_empty(&req->i)) {
9962306a36Sopenharmony_ci		struct rb_root *root;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci		if (s & RQ_WRITE)
10262306a36Sopenharmony_ci			root = &device->write_requests;
10362306a36Sopenharmony_ci		else
10462306a36Sopenharmony_ci			root = &device->read_requests;
10562306a36Sopenharmony_ci		drbd_remove_request_interval(root, req);
10662306a36Sopenharmony_ci	} else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
10762306a36Sopenharmony_ci		drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
10862306a36Sopenharmony_ci			s, (unsigned long long)req->i.sector, req->i.size);
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	/* if it was a write, we may have to set the corresponding
11162306a36Sopenharmony_ci	 * bit(s) out-of-sync first. If it had a local part, we need to
11262306a36Sopenharmony_ci	 * release the reference to the activity log. */
11362306a36Sopenharmony_ci	if (s & RQ_WRITE) {
11462306a36Sopenharmony_ci		/* Set out-of-sync unless both OK flags are set
11562306a36Sopenharmony_ci		 * (local only or remote failed).
11662306a36Sopenharmony_ci		 * Other places where we set out-of-sync:
11762306a36Sopenharmony_ci		 * READ with local io-error */
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci		/* There is a special case:
12062306a36Sopenharmony_ci		 * we may notice late that IO was suspended,
12162306a36Sopenharmony_ci		 * and postpone, or schedule for retry, a write,
12262306a36Sopenharmony_ci		 * before it even was submitted or sent.
12362306a36Sopenharmony_ci		 * In that case we do not want to touch the bitmap at all.
12462306a36Sopenharmony_ci		 */
12562306a36Sopenharmony_ci		struct drbd_peer_device *peer_device = first_peer_device(device);
12662306a36Sopenharmony_ci		if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) {
12762306a36Sopenharmony_ci			if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
12862306a36Sopenharmony_ci				drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size);
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci			if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
13162306a36Sopenharmony_ci				drbd_set_in_sync(peer_device, req->i.sector, req->i.size);
13262306a36Sopenharmony_ci		}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci		/* one might be tempted to move the drbd_al_complete_io
13562306a36Sopenharmony_ci		 * to the local io completion callback drbd_request_endio.
13662306a36Sopenharmony_ci		 * but, if this was a mirror write, we may only
13762306a36Sopenharmony_ci		 * drbd_al_complete_io after this is RQ_NET_DONE,
13862306a36Sopenharmony_ci		 * otherwise the extent could be dropped from the al
13962306a36Sopenharmony_ci		 * before it has actually been written on the peer.
14062306a36Sopenharmony_ci		 * if we crash before our peer knows about the request,
14162306a36Sopenharmony_ci		 * but after the extent has been dropped from the al,
14262306a36Sopenharmony_ci		 * we would forget to resync the corresponding extent.
14362306a36Sopenharmony_ci		 */
14462306a36Sopenharmony_ci		if (s & RQ_IN_ACT_LOG) {
14562306a36Sopenharmony_ci			if (get_ldev_if_state(device, D_FAILED)) {
14662306a36Sopenharmony_ci				drbd_al_complete_io(device, &req->i);
14762306a36Sopenharmony_ci				put_ldev(device);
14862306a36Sopenharmony_ci			} else if (drbd_ratelimit()) {
14962306a36Sopenharmony_ci				drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), "
15062306a36Sopenharmony_ci					 "but my Disk seems to have failed :(\n",
15162306a36Sopenharmony_ci					 (unsigned long long) req->i.sector, req->i.size);
15262306a36Sopenharmony_ci			}
15362306a36Sopenharmony_ci		}
15462306a36Sopenharmony_ci	}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	mempool_free(req, &drbd_request_mempool);
15762306a36Sopenharmony_ci}
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_cistatic void wake_all_senders(struct drbd_connection *connection)
16062306a36Sopenharmony_ci{
16162306a36Sopenharmony_ci	wake_up(&connection->sender_work.q_wait);
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci/* must hold resource->req_lock */
16562306a36Sopenharmony_civoid start_new_tl_epoch(struct drbd_connection *connection)
16662306a36Sopenharmony_ci{
16762306a36Sopenharmony_ci	/* no point closing an epoch, if it is empty, anyways. */
16862306a36Sopenharmony_ci	if (connection->current_tle_writes == 0)
16962306a36Sopenharmony_ci		return;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	connection->current_tle_writes = 0;
17262306a36Sopenharmony_ci	atomic_inc(&connection->current_tle_nr);
17362306a36Sopenharmony_ci	wake_all_senders(connection);
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_civoid complete_master_bio(struct drbd_device *device,
17762306a36Sopenharmony_ci		struct bio_and_error *m)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	if (unlikely(m->error))
18062306a36Sopenharmony_ci		m->bio->bi_status = errno_to_blk_status(m->error);
18162306a36Sopenharmony_ci	bio_endio(m->bio);
18262306a36Sopenharmony_ci	dec_ap_bio(device);
18362306a36Sopenharmony_ci}
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci/* Helper for __req_mod().
18762306a36Sopenharmony_ci * Set m->bio to the master bio, if it is fit to be completed,
18862306a36Sopenharmony_ci * or leave it alone (it is initialized to NULL in __req_mod),
18962306a36Sopenharmony_ci * if it has already been completed, or cannot be completed yet.
19062306a36Sopenharmony_ci * If m->bio is set, the error status to be returned is placed in m->error.
19162306a36Sopenharmony_ci */
19262306a36Sopenharmony_cistatic
19362306a36Sopenharmony_civoid drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	const unsigned s = req->rq_state;
19662306a36Sopenharmony_ci	struct drbd_device *device = req->device;
19762306a36Sopenharmony_ci	int error, ok;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	/* we must not complete the master bio, while it is
20062306a36Sopenharmony_ci	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock)
20162306a36Sopenharmony_ci	 *	not yet acknowledged by the peer
20262306a36Sopenharmony_ci	 *	not yet completed by the local io subsystem
20362306a36Sopenharmony_ci	 * these flags may get cleared in any order by
20462306a36Sopenharmony_ci	 *	the worker,
20562306a36Sopenharmony_ci	 *	the receiver,
20662306a36Sopenharmony_ci	 *	the bio_endio completion callbacks.
20762306a36Sopenharmony_ci	 */
20862306a36Sopenharmony_ci	if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) ||
20962306a36Sopenharmony_ci	    (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) ||
21062306a36Sopenharmony_ci	    (s & RQ_COMPLETION_SUSP)) {
21162306a36Sopenharmony_ci		drbd_err(device, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s);
21262306a36Sopenharmony_ci		return;
21362306a36Sopenharmony_ci	}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	if (!req->master_bio) {
21662306a36Sopenharmony_ci		drbd_err(device, "drbd_req_complete: Logic BUG, master_bio == NULL!\n");
21762306a36Sopenharmony_ci		return;
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	/*
22162306a36Sopenharmony_ci	 * figure out whether to report success or failure.
22262306a36Sopenharmony_ci	 *
22362306a36Sopenharmony_ci	 * report success when at least one of the operations succeeded.
22462306a36Sopenharmony_ci	 * or, to put the other way,
22562306a36Sopenharmony_ci	 * only report failure, when both operations failed.
22662306a36Sopenharmony_ci	 *
22762306a36Sopenharmony_ci	 * what to do about the failures is handled elsewhere.
22862306a36Sopenharmony_ci	 * what we need to do here is just: complete the master_bio.
22962306a36Sopenharmony_ci	 *
23062306a36Sopenharmony_ci	 * local completion error, if any, has been stored as ERR_PTR
23162306a36Sopenharmony_ci	 * in private_bio within drbd_request_endio.
23262306a36Sopenharmony_ci	 */
23362306a36Sopenharmony_ci	ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
23462306a36Sopenharmony_ci	error = PTR_ERR(req->private_bio);
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	/* Before we can signal completion to the upper layers,
23762306a36Sopenharmony_ci	 * we may need to close the current transfer log epoch.
23862306a36Sopenharmony_ci	 * We are within the request lock, so we can simply compare
23962306a36Sopenharmony_ci	 * the request epoch number with the current transfer log
24062306a36Sopenharmony_ci	 * epoch number.  If they match, increase the current_tle_nr,
24162306a36Sopenharmony_ci	 * and reset the transfer log epoch write_cnt.
24262306a36Sopenharmony_ci	 */
24362306a36Sopenharmony_ci	if (op_is_write(bio_op(req->master_bio)) &&
24462306a36Sopenharmony_ci	    req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr))
24562306a36Sopenharmony_ci		start_new_tl_epoch(first_peer_device(device)->connection);
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	/* Update disk stats */
24862306a36Sopenharmony_ci	bio_end_io_acct(req->master_bio, req->start_jif);
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	/* If READ failed,
25162306a36Sopenharmony_ci	 * have it be pushed back to the retry work queue,
25262306a36Sopenharmony_ci	 * so it will re-enter __drbd_make_request(),
25362306a36Sopenharmony_ci	 * and be re-assigned to a suitable local or remote path,
25462306a36Sopenharmony_ci	 * or failed if we do not have access to good data anymore.
25562306a36Sopenharmony_ci	 *
25662306a36Sopenharmony_ci	 * Unless it was failed early by __drbd_make_request(),
25762306a36Sopenharmony_ci	 * because no path was available, in which case
25862306a36Sopenharmony_ci	 * it was not even added to the transfer_log.
25962306a36Sopenharmony_ci	 *
26062306a36Sopenharmony_ci	 * read-ahead may fail, and will not be retried.
26162306a36Sopenharmony_ci	 *
26262306a36Sopenharmony_ci	 * WRITE should have used all available paths already.
26362306a36Sopenharmony_ci	 */
26462306a36Sopenharmony_ci	if (!ok &&
26562306a36Sopenharmony_ci	    bio_op(req->master_bio) == REQ_OP_READ &&
26662306a36Sopenharmony_ci	    !(req->master_bio->bi_opf & REQ_RAHEAD) &&
26762306a36Sopenharmony_ci	    !list_empty(&req->tl_requests))
26862306a36Sopenharmony_ci		req->rq_state |= RQ_POSTPONED;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	if (!(req->rq_state & RQ_POSTPONED)) {
27162306a36Sopenharmony_ci		m->error = ok ? 0 : (error ?: -EIO);
27262306a36Sopenharmony_ci		m->bio = req->master_bio;
27362306a36Sopenharmony_ci		req->master_bio = NULL;
27462306a36Sopenharmony_ci		/* We leave it in the tree, to be able to verify later
27562306a36Sopenharmony_ci		 * write-acks in protocol != C during resync.
27662306a36Sopenharmony_ci		 * But we mark it as "complete", so it won't be counted as
27762306a36Sopenharmony_ci		 * conflict in a multi-primary setup. */
27862306a36Sopenharmony_ci		req->i.completed = true;
27962306a36Sopenharmony_ci	}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	if (req->i.waiting)
28262306a36Sopenharmony_ci		wake_up(&device->misc_wait);
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	/* Either we are about to complete to upper layers,
28562306a36Sopenharmony_ci	 * or we will restart this request.
28662306a36Sopenharmony_ci	 * In either case, the request object will be destroyed soon,
28762306a36Sopenharmony_ci	 * so better remove it from all lists. */
28862306a36Sopenharmony_ci	list_del_init(&req->req_pending_master_completion);
28962306a36Sopenharmony_ci}
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci/* still holds resource->req_lock */
29262306a36Sopenharmony_cistatic void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	struct drbd_device *device = req->device;
29562306a36Sopenharmony_ci	D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED));
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	if (!put)
29862306a36Sopenharmony_ci		return;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	if (!atomic_sub_and_test(put, &req->completion_ref))
30162306a36Sopenharmony_ci		return;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	drbd_req_complete(req, m);
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	/* local completion may still come in later,
30662306a36Sopenharmony_ci	 * we need to keep the req object around. */
30762306a36Sopenharmony_ci	if (req->rq_state & RQ_LOCAL_ABORTED)
30862306a36Sopenharmony_ci		return;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	if (req->rq_state & RQ_POSTPONED) {
31162306a36Sopenharmony_ci		/* don't destroy the req object just yet,
31262306a36Sopenharmony_ci		 * but queue it for retry */
31362306a36Sopenharmony_ci		drbd_restart_request(req);
31462306a36Sopenharmony_ci		return;
31562306a36Sopenharmony_ci	}
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	kref_put(&req->kref, drbd_req_destroy);
31862306a36Sopenharmony_ci}
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_cistatic void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
32162306a36Sopenharmony_ci{
32262306a36Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
32362306a36Sopenharmony_ci	if (!connection)
32462306a36Sopenharmony_ci		return;
32562306a36Sopenharmony_ci	if (connection->req_next == NULL)
32662306a36Sopenharmony_ci		connection->req_next = req;
32762306a36Sopenharmony_ci}
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_cistatic void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
33062306a36Sopenharmony_ci{
33162306a36Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
33262306a36Sopenharmony_ci	struct drbd_request *iter = req;
33362306a36Sopenharmony_ci	if (!connection)
33462306a36Sopenharmony_ci		return;
33562306a36Sopenharmony_ci	if (connection->req_next != req)
33662306a36Sopenharmony_ci		return;
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	req = NULL;
33962306a36Sopenharmony_ci	list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
34062306a36Sopenharmony_ci		const unsigned int s = iter->rq_state;
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci		if (s & RQ_NET_QUEUED) {
34362306a36Sopenharmony_ci			req = iter;
34462306a36Sopenharmony_ci			break;
34562306a36Sopenharmony_ci		}
34662306a36Sopenharmony_ci	}
34762306a36Sopenharmony_ci	connection->req_next = req;
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_cistatic void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
35162306a36Sopenharmony_ci{
35262306a36Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
35362306a36Sopenharmony_ci	if (!connection)
35462306a36Sopenharmony_ci		return;
35562306a36Sopenharmony_ci	if (connection->req_ack_pending == NULL)
35662306a36Sopenharmony_ci		connection->req_ack_pending = req;
35762306a36Sopenharmony_ci}
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_cistatic void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
36062306a36Sopenharmony_ci{
36162306a36Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
36262306a36Sopenharmony_ci	struct drbd_request *iter = req;
36362306a36Sopenharmony_ci	if (!connection)
36462306a36Sopenharmony_ci		return;
36562306a36Sopenharmony_ci	if (connection->req_ack_pending != req)
36662306a36Sopenharmony_ci		return;
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	req = NULL;
36962306a36Sopenharmony_ci	list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
37062306a36Sopenharmony_ci		const unsigned int s = iter->rq_state;
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci		if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING)) {
37362306a36Sopenharmony_ci			req = iter;
37462306a36Sopenharmony_ci			break;
37562306a36Sopenharmony_ci		}
37662306a36Sopenharmony_ci	}
37762306a36Sopenharmony_ci	connection->req_ack_pending = req;
37862306a36Sopenharmony_ci}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_cistatic void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
38162306a36Sopenharmony_ci{
38262306a36Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
38362306a36Sopenharmony_ci	if (!connection)
38462306a36Sopenharmony_ci		return;
38562306a36Sopenharmony_ci	if (connection->req_not_net_done == NULL)
38662306a36Sopenharmony_ci		connection->req_not_net_done = req;
38762306a36Sopenharmony_ci}
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_cistatic void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
39062306a36Sopenharmony_ci{
39162306a36Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
39262306a36Sopenharmony_ci	struct drbd_request *iter = req;
39362306a36Sopenharmony_ci	if (!connection)
39462306a36Sopenharmony_ci		return;
39562306a36Sopenharmony_ci	if (connection->req_not_net_done != req)
39662306a36Sopenharmony_ci		return;
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	req = NULL;
39962306a36Sopenharmony_ci	list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
40062306a36Sopenharmony_ci		const unsigned int s = iter->rq_state;
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci		if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE)) {
40362306a36Sopenharmony_ci			req = iter;
40462306a36Sopenharmony_ci			break;
40562306a36Sopenharmony_ci		}
40662306a36Sopenharmony_ci	}
40762306a36Sopenharmony_ci	connection->req_not_net_done = req;
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci/* I'd like this to be the only place that manipulates
41162306a36Sopenharmony_ci * req->completion_ref and req->kref. */
41262306a36Sopenharmony_cistatic void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
41362306a36Sopenharmony_ci		int clear, int set)
41462306a36Sopenharmony_ci{
41562306a36Sopenharmony_ci	struct drbd_device *device = req->device;
41662306a36Sopenharmony_ci	struct drbd_peer_device *peer_device = first_peer_device(device);
41762306a36Sopenharmony_ci	unsigned s = req->rq_state;
41862306a36Sopenharmony_ci	int c_put = 0;
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci	if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP))
42162306a36Sopenharmony_ci		set |= RQ_COMPLETION_SUSP;
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	/* apply */
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	req->rq_state &= ~clear;
42662306a36Sopenharmony_ci	req->rq_state |= set;
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	/* no change? */
42962306a36Sopenharmony_ci	if (req->rq_state == s)
43062306a36Sopenharmony_ci		return;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	/* intent: get references */
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	kref_get(&req->kref);
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING))
43762306a36Sopenharmony_ci		atomic_inc(&req->completion_ref);
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) {
44062306a36Sopenharmony_ci		inc_ap_pending(device);
44162306a36Sopenharmony_ci		atomic_inc(&req->completion_ref);
44262306a36Sopenharmony_ci	}
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
44562306a36Sopenharmony_ci		atomic_inc(&req->completion_ref);
44662306a36Sopenharmony_ci		set_if_null_req_next(peer_device, req);
44762306a36Sopenharmony_ci	}
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci	if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
45062306a36Sopenharmony_ci		kref_get(&req->kref); /* wait for the DONE */
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
45362306a36Sopenharmony_ci		/* potentially already completed in the ack_receiver thread */
45462306a36Sopenharmony_ci		if (!(s & RQ_NET_DONE)) {
45562306a36Sopenharmony_ci			atomic_add(req->i.size >> 9, &device->ap_in_flight);
45662306a36Sopenharmony_ci			set_if_null_req_not_net_done(peer_device, req);
45762306a36Sopenharmony_ci		}
45862306a36Sopenharmony_ci		if (req->rq_state & RQ_NET_PENDING)
45962306a36Sopenharmony_ci			set_if_null_req_ack_pending(peer_device, req);
46062306a36Sopenharmony_ci	}
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
46362306a36Sopenharmony_ci		atomic_inc(&req->completion_ref);
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	/* progress: put references */
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP))
46862306a36Sopenharmony_ci		++c_put;
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci	if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) {
47162306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING);
47262306a36Sopenharmony_ci		++c_put;
47362306a36Sopenharmony_ci	}
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) {
47662306a36Sopenharmony_ci		if (req->rq_state & RQ_LOCAL_ABORTED)
47762306a36Sopenharmony_ci			kref_put(&req->kref, drbd_req_destroy);
47862306a36Sopenharmony_ci		else
47962306a36Sopenharmony_ci			++c_put;
48062306a36Sopenharmony_ci		list_del_init(&req->req_pending_local);
48162306a36Sopenharmony_ci	}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
48462306a36Sopenharmony_ci		dec_ap_pending(device);
48562306a36Sopenharmony_ci		++c_put;
48662306a36Sopenharmony_ci		req->acked_jif = jiffies;
48762306a36Sopenharmony_ci		advance_conn_req_ack_pending(peer_device, req);
48862306a36Sopenharmony_ci	}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
49162306a36Sopenharmony_ci		++c_put;
49262306a36Sopenharmony_ci		advance_conn_req_next(peer_device, req);
49362306a36Sopenharmony_ci	}
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci	if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
49662306a36Sopenharmony_ci		if (s & RQ_NET_SENT)
49762306a36Sopenharmony_ci			atomic_sub(req->i.size >> 9, &device->ap_in_flight);
49862306a36Sopenharmony_ci		if (s & RQ_EXP_BARR_ACK)
49962306a36Sopenharmony_ci			kref_put(&req->kref, drbd_req_destroy);
50062306a36Sopenharmony_ci		req->net_done_jif = jiffies;
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci		/* in ahead/behind mode, or just in case,
50362306a36Sopenharmony_ci		 * before we finally destroy this request,
50462306a36Sopenharmony_ci		 * the caching pointers must not reference it anymore */
50562306a36Sopenharmony_ci		advance_conn_req_next(peer_device, req);
50662306a36Sopenharmony_ci		advance_conn_req_ack_pending(peer_device, req);
50762306a36Sopenharmony_ci		advance_conn_req_not_net_done(peer_device, req);
50862306a36Sopenharmony_ci	}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	/* potentially complete and destroy */
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	/* If we made progress, retry conflicting peer requests, if any. */
51362306a36Sopenharmony_ci	if (req->i.waiting)
51462306a36Sopenharmony_ci		wake_up(&device->misc_wait);
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci	drbd_req_put_completion_ref(req, m, c_put);
51762306a36Sopenharmony_ci	kref_put(&req->kref, drbd_req_destroy);
51862306a36Sopenharmony_ci}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_cistatic void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
52162306a36Sopenharmony_ci{
52262306a36Sopenharmony_ci	if (!drbd_ratelimit())
52362306a36Sopenharmony_ci		return;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	drbd_warn(device, "local %s IO error sector %llu+%u on %pg\n",
52662306a36Sopenharmony_ci			(req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
52762306a36Sopenharmony_ci			(unsigned long long)req->i.sector,
52862306a36Sopenharmony_ci			req->i.size >> 9,
52962306a36Sopenharmony_ci			device->ldev->backing_bdev);
53062306a36Sopenharmony_ci}
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci/* Helper for HANDED_OVER_TO_NETWORK.
53362306a36Sopenharmony_ci * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)?
53462306a36Sopenharmony_ci * Is it also still "PENDING"?
53562306a36Sopenharmony_ci * --> If so, clear PENDING and set NET_OK below.
53662306a36Sopenharmony_ci * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster
53762306a36Sopenharmony_ci * (and we must not set RQ_NET_OK) */
53862306a36Sopenharmony_cistatic inline bool is_pending_write_protocol_A(struct drbd_request *req)
53962306a36Sopenharmony_ci{
54062306a36Sopenharmony_ci	return (req->rq_state &
54162306a36Sopenharmony_ci		   (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK))
54262306a36Sopenharmony_ci		== (RQ_WRITE|RQ_NET_PENDING);
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci/* obviously this could be coded as many single functions
54662306a36Sopenharmony_ci * instead of one huge switch,
54762306a36Sopenharmony_ci * or by putting the code directly in the respective locations
54862306a36Sopenharmony_ci * (as it has been before).
54962306a36Sopenharmony_ci *
55062306a36Sopenharmony_ci * but having it this way
55162306a36Sopenharmony_ci *  enforces that it is all in this one place, where it is easier to audit,
55262306a36Sopenharmony_ci *  it makes it obvious that whatever "event" "happens" to a request should
55362306a36Sopenharmony_ci *  happen "atomically" within the req_lock,
55462306a36Sopenharmony_ci *  and it enforces that we have to think in a very structured manner
55562306a36Sopenharmony_ci *  about the "events" that may happen to a request during its life time ...
55662306a36Sopenharmony_ci *
55762306a36Sopenharmony_ci *
55862306a36Sopenharmony_ci * peer_device == NULL means local disk
55962306a36Sopenharmony_ci */
56062306a36Sopenharmony_ciint __req_mod(struct drbd_request *req, enum drbd_req_event what,
56162306a36Sopenharmony_ci		struct drbd_peer_device *peer_device,
56262306a36Sopenharmony_ci		struct bio_and_error *m)
56362306a36Sopenharmony_ci{
56462306a36Sopenharmony_ci	struct drbd_device *const device = req->device;
56562306a36Sopenharmony_ci	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
56662306a36Sopenharmony_ci	struct net_conf *nc;
56762306a36Sopenharmony_ci	int p, rv = 0;
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	if (m)
57062306a36Sopenharmony_ci		m->bio = NULL;
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci	switch (what) {
57362306a36Sopenharmony_ci	default:
57462306a36Sopenharmony_ci		drbd_err(device, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
57562306a36Sopenharmony_ci		break;
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	/* does not happen...
57862306a36Sopenharmony_ci	 * initialization done in drbd_req_new
57962306a36Sopenharmony_ci	case CREATED:
58062306a36Sopenharmony_ci		break;
58162306a36Sopenharmony_ci		*/
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	case TO_BE_SENT: /* via network */
58462306a36Sopenharmony_ci		/* reached via __drbd_make_request
58562306a36Sopenharmony_ci		 * and from w_read_retry_remote */
58662306a36Sopenharmony_ci		D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
58762306a36Sopenharmony_ci		rcu_read_lock();
58862306a36Sopenharmony_ci		nc = rcu_dereference(connection->net_conf);
58962306a36Sopenharmony_ci		p = nc->wire_protocol;
59062306a36Sopenharmony_ci		rcu_read_unlock();
59162306a36Sopenharmony_ci		req->rq_state |=
59262306a36Sopenharmony_ci			p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
59362306a36Sopenharmony_ci			p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
59462306a36Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_PENDING);
59562306a36Sopenharmony_ci		break;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	case TO_BE_SUBMITTED: /* locally */
59862306a36Sopenharmony_ci		/* reached via __drbd_make_request */
59962306a36Sopenharmony_ci		D_ASSERT(device, !(req->rq_state & RQ_LOCAL_MASK));
60062306a36Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_LOCAL_PENDING);
60162306a36Sopenharmony_ci		break;
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	case COMPLETED_OK:
60462306a36Sopenharmony_ci		if (req->rq_state & RQ_WRITE)
60562306a36Sopenharmony_ci			device->writ_cnt += req->i.size >> 9;
60662306a36Sopenharmony_ci		else
60762306a36Sopenharmony_ci			device->read_cnt += req->i.size >> 9;
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_LOCAL_PENDING,
61062306a36Sopenharmony_ci				RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
61162306a36Sopenharmony_ci		break;
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	case ABORT_DISK_IO:
61462306a36Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED);
61562306a36Sopenharmony_ci		break;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	case WRITE_COMPLETED_WITH_ERROR:
61862306a36Sopenharmony_ci		drbd_report_io_error(device, req);
61962306a36Sopenharmony_ci		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
62062306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
62162306a36Sopenharmony_ci		break;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	case READ_COMPLETED_WITH_ERROR:
62462306a36Sopenharmony_ci		drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size);
62562306a36Sopenharmony_ci		drbd_report_io_error(device, req);
62662306a36Sopenharmony_ci		__drbd_chk_io_error(device, DRBD_READ_ERROR);
62762306a36Sopenharmony_ci		fallthrough;
62862306a36Sopenharmony_ci	case READ_AHEAD_COMPLETED_WITH_ERROR:
62962306a36Sopenharmony_ci		/* it is legal to fail read-ahead, no __drbd_chk_io_error in that case. */
63062306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
63162306a36Sopenharmony_ci		break;
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	case DISCARD_COMPLETED_NOTSUPP:
63462306a36Sopenharmony_ci	case DISCARD_COMPLETED_WITH_ERROR:
63562306a36Sopenharmony_ci		/* I'd rather not detach from local disk just because it
63662306a36Sopenharmony_ci		 * failed a REQ_OP_DISCARD. */
63762306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
63862306a36Sopenharmony_ci		break;
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci	case QUEUE_FOR_NET_READ:
64162306a36Sopenharmony_ci		/* READ, and
64262306a36Sopenharmony_ci		 * no local disk,
64362306a36Sopenharmony_ci		 * or target area marked as invalid,
64462306a36Sopenharmony_ci		 * or just got an io-error. */
64562306a36Sopenharmony_ci		/* from __drbd_make_request
64662306a36Sopenharmony_ci		 * or from bio_endio during read io-error recovery */
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci		/* So we can verify the handle in the answer packet.
64962306a36Sopenharmony_ci		 * Corresponding drbd_remove_request_interval is in
65062306a36Sopenharmony_ci		 * drbd_req_complete() */
65162306a36Sopenharmony_ci		D_ASSERT(device, drbd_interval_empty(&req->i));
65262306a36Sopenharmony_ci		drbd_insert_interval(&device->read_requests, &req->i);
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci		set_bit(UNPLUG_REMOTE, &device->flags);
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
65762306a36Sopenharmony_ci		D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
65862306a36Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_QUEUED);
65962306a36Sopenharmony_ci		req->w.cb = w_send_read_req;
66062306a36Sopenharmony_ci		drbd_queue_work(&connection->sender_work,
66162306a36Sopenharmony_ci				&req->w);
66262306a36Sopenharmony_ci		break;
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_ci	case QUEUE_FOR_NET_WRITE:
66562306a36Sopenharmony_ci		/* assert something? */
66662306a36Sopenharmony_ci		/* from __drbd_make_request only */
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci		/* Corresponding drbd_remove_request_interval is in
66962306a36Sopenharmony_ci		 * drbd_req_complete() */
67062306a36Sopenharmony_ci		D_ASSERT(device, drbd_interval_empty(&req->i));
67162306a36Sopenharmony_ci		drbd_insert_interval(&device->write_requests, &req->i);
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci		/* NOTE
67462306a36Sopenharmony_ci		 * In case the req ended up on the transfer log before being
67562306a36Sopenharmony_ci		 * queued on the worker, it could lead to this request being
67662306a36Sopenharmony_ci		 * missed during cleanup after connection loss.
67762306a36Sopenharmony_ci		 * So we have to do both operations here,
67862306a36Sopenharmony_ci		 * within the same lock that protects the transfer log.
67962306a36Sopenharmony_ci		 *
68062306a36Sopenharmony_ci		 * _req_add_to_epoch(req); this has to be after the
68162306a36Sopenharmony_ci		 * _maybe_start_new_epoch(req); which happened in
68262306a36Sopenharmony_ci		 * __drbd_make_request, because we now may set the bit
68362306a36Sopenharmony_ci		 * again ourselves to close the current epoch.
68462306a36Sopenharmony_ci		 *
68562306a36Sopenharmony_ci		 * Add req to the (now) current epoch (barrier). */
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci		/* otherwise we may lose an unplug, which may cause some remote
68862306a36Sopenharmony_ci		 * io-scheduler timeout to expire, increasing maximum latency,
68962306a36Sopenharmony_ci		 * hurting performance. */
69062306a36Sopenharmony_ci		set_bit(UNPLUG_REMOTE, &device->flags);
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci		/* queue work item to send data */
69362306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
69462306a36Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
69562306a36Sopenharmony_ci		req->w.cb =  w_send_dblock;
69662306a36Sopenharmony_ci		drbd_queue_work(&connection->sender_work,
69762306a36Sopenharmony_ci				&req->w);
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci		/* close the epoch, in case it outgrew the limit */
70062306a36Sopenharmony_ci		rcu_read_lock();
70162306a36Sopenharmony_ci		nc = rcu_dereference(connection->net_conf);
70262306a36Sopenharmony_ci		p = nc->max_epoch_size;
70362306a36Sopenharmony_ci		rcu_read_unlock();
70462306a36Sopenharmony_ci		if (connection->current_tle_writes >= p)
70562306a36Sopenharmony_ci			start_new_tl_epoch(connection);
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci		break;
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	case QUEUE_FOR_SEND_OOS:
71062306a36Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_QUEUED);
71162306a36Sopenharmony_ci		req->w.cb =  w_send_out_of_sync;
71262306a36Sopenharmony_ci		drbd_queue_work(&connection->sender_work,
71362306a36Sopenharmony_ci				&req->w);
71462306a36Sopenharmony_ci		break;
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	case READ_RETRY_REMOTE_CANCELED:
71762306a36Sopenharmony_ci	case SEND_CANCELED:
71862306a36Sopenharmony_ci	case SEND_FAILED:
71962306a36Sopenharmony_ci		/* real cleanup will be done from tl_clear.  just update flags
72062306a36Sopenharmony_ci		 * so it is no longer marked as on the worker queue */
72162306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_QUEUED, 0);
72262306a36Sopenharmony_ci		break;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	case HANDED_OVER_TO_NETWORK:
72562306a36Sopenharmony_ci		/* assert something? */
72662306a36Sopenharmony_ci		if (is_pending_write_protocol_A(req))
72762306a36Sopenharmony_ci			/* this is what is dangerous about protocol A:
72862306a36Sopenharmony_ci			 * pretend it was successfully written on the peer. */
72962306a36Sopenharmony_ci			mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING,
73062306a36Sopenharmony_ci						RQ_NET_SENT|RQ_NET_OK);
73162306a36Sopenharmony_ci		else
73262306a36Sopenharmony_ci			mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
73362306a36Sopenharmony_ci		/* It is still not yet RQ_NET_DONE until the
73462306a36Sopenharmony_ci		 * corresponding epoch barrier got acked as well,
73562306a36Sopenharmony_ci		 * so we know what to dirty on connection loss. */
73662306a36Sopenharmony_ci		break;
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	case OOS_HANDED_TO_NETWORK:
73962306a36Sopenharmony_ci		/* Was not set PENDING, no longer QUEUED, so is now DONE
74062306a36Sopenharmony_ci		 * as far as this connection is concerned. */
74162306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE);
74262306a36Sopenharmony_ci		break;
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci	case CONNECTION_LOST_WHILE_PENDING:
74562306a36Sopenharmony_ci		/* transfer log cleanup after connection loss */
74662306a36Sopenharmony_ci		mod_rq_state(req, m,
74762306a36Sopenharmony_ci				RQ_NET_OK|RQ_NET_PENDING|RQ_COMPLETION_SUSP,
74862306a36Sopenharmony_ci				RQ_NET_DONE);
74962306a36Sopenharmony_ci		break;
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	case CONFLICT_RESOLVED:
75262306a36Sopenharmony_ci		/* for superseded conflicting writes of multiple primaries,
75362306a36Sopenharmony_ci		 * there is no need to keep anything in the tl, potential
75462306a36Sopenharmony_ci		 * node crashes are covered by the activity log.
75562306a36Sopenharmony_ci		 *
75662306a36Sopenharmony_ci		 * If this request had been marked as RQ_POSTPONED before,
75762306a36Sopenharmony_ci		 * it will actually not be completed, but "restarted",
75862306a36Sopenharmony_ci		 * resubmitted from the retry worker context. */
75962306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
76062306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
76162306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE|RQ_NET_OK);
76262306a36Sopenharmony_ci		break;
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	case WRITE_ACKED_BY_PEER_AND_SIS:
76562306a36Sopenharmony_ci		req->rq_state |= RQ_NET_SIS;
76662306a36Sopenharmony_ci		fallthrough;
76762306a36Sopenharmony_ci	case WRITE_ACKED_BY_PEER:
76862306a36Sopenharmony_ci		/* Normal operation protocol C: successfully written on peer.
76962306a36Sopenharmony_ci		 * During resync, even in protocol != C,
77062306a36Sopenharmony_ci		 * we requested an explicit write ack anyways.
77162306a36Sopenharmony_ci		 * Which means we cannot even assert anything here.
77262306a36Sopenharmony_ci		 * Nothing more to do here.
77362306a36Sopenharmony_ci		 * We want to keep the tl in place for all protocols, to cater
77462306a36Sopenharmony_ci		 * for volatile write-back caches on lower level devices. */
77562306a36Sopenharmony_ci		goto ack_common;
77662306a36Sopenharmony_ci	case RECV_ACKED_BY_PEER:
77762306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
77862306a36Sopenharmony_ci		/* protocol B; pretends to be successfully written on peer.
77962306a36Sopenharmony_ci		 * see also notes above in HANDED_OVER_TO_NETWORK about
78062306a36Sopenharmony_ci		 * protocol != C */
78162306a36Sopenharmony_ci	ack_common:
78262306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
78362306a36Sopenharmony_ci		break;
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	case POSTPONE_WRITE:
78662306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
78762306a36Sopenharmony_ci		/* If this node has already detected the write conflict, the
78862306a36Sopenharmony_ci		 * worker will be waiting on misc_wait.  Wake it up once this
78962306a36Sopenharmony_ci		 * request has completed locally.
79062306a36Sopenharmony_ci		 */
79162306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
79262306a36Sopenharmony_ci		req->rq_state |= RQ_POSTPONED;
79362306a36Sopenharmony_ci		if (req->i.waiting)
79462306a36Sopenharmony_ci			wake_up(&device->misc_wait);
79562306a36Sopenharmony_ci		/* Do not clear RQ_NET_PENDING. This request will make further
79662306a36Sopenharmony_ci		 * progress via restart_conflicting_writes() or
79762306a36Sopenharmony_ci		 * fail_postponed_requests(). Hopefully. */
79862306a36Sopenharmony_ci		break;
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci	case NEG_ACKED:
80162306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, 0);
80262306a36Sopenharmony_ci		break;
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	case FAIL_FROZEN_DISK_IO:
80562306a36Sopenharmony_ci		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
80662306a36Sopenharmony_ci			break;
80762306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
80862306a36Sopenharmony_ci		break;
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	case RESTART_FROZEN_DISK_IO:
81162306a36Sopenharmony_ci		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
81262306a36Sopenharmony_ci			break;
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci		mod_rq_state(req, m,
81562306a36Sopenharmony_ci				RQ_COMPLETION_SUSP|RQ_LOCAL_COMPLETED,
81662306a36Sopenharmony_ci				RQ_LOCAL_PENDING);
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci		rv = MR_READ;
81962306a36Sopenharmony_ci		if (bio_data_dir(req->master_bio) == WRITE)
82062306a36Sopenharmony_ci			rv = MR_WRITE;
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci		get_ldev(device); /* always succeeds in this call path */
82362306a36Sopenharmony_ci		req->w.cb = w_restart_disk_io;
82462306a36Sopenharmony_ci		drbd_queue_work(&connection->sender_work,
82562306a36Sopenharmony_ci				&req->w);
82662306a36Sopenharmony_ci		break;
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	case RESEND:
82962306a36Sopenharmony_ci		/* Simply complete (local only) READs. */
83062306a36Sopenharmony_ci		if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
83162306a36Sopenharmony_ci			mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
83262306a36Sopenharmony_ci			break;
83362306a36Sopenharmony_ci		}
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
83662306a36Sopenharmony_ci		   before the connection loss (B&C only); only P_BARRIER_ACK
83762306a36Sopenharmony_ci		   (or the local completion?) was missing when we suspended.
83862306a36Sopenharmony_ci		   Throwing them out of the TL here by pretending we got a BARRIER_ACK.
83962306a36Sopenharmony_ci		   During connection handshake, we ensure that the peer was not rebooted. */
84062306a36Sopenharmony_ci		if (!(req->rq_state & RQ_NET_OK)) {
84162306a36Sopenharmony_ci			/* FIXME could this possibly be a req->dw.cb == w_send_out_of_sync?
84262306a36Sopenharmony_ci			 * in that case we must not set RQ_NET_PENDING. */
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci			mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
84562306a36Sopenharmony_ci			if (req->w.cb) {
84662306a36Sopenharmony_ci				/* w.cb expected to be w_send_dblock, or w_send_read_req */
84762306a36Sopenharmony_ci				drbd_queue_work(&connection->sender_work,
84862306a36Sopenharmony_ci						&req->w);
84962306a36Sopenharmony_ci				rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
85062306a36Sopenharmony_ci			} /* else: FIXME can this happen? */
85162306a36Sopenharmony_ci			break;
85262306a36Sopenharmony_ci		}
85362306a36Sopenharmony_ci		fallthrough;	/* to BARRIER_ACKED */
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci	case BARRIER_ACKED:
85662306a36Sopenharmony_ci		/* barrier ack for READ requests does not make sense */
85762306a36Sopenharmony_ci		if (!(req->rq_state & RQ_WRITE))
85862306a36Sopenharmony_ci			break;
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci		if (req->rq_state & RQ_NET_PENDING) {
86162306a36Sopenharmony_ci			/* barrier came in before all requests were acked.
86262306a36Sopenharmony_ci			 * this is bad, because if the connection is lost now,
86362306a36Sopenharmony_ci			 * we won't be able to clean them up... */
86462306a36Sopenharmony_ci			drbd_err(device, "FIXME (BARRIER_ACKED but pending)\n");
86562306a36Sopenharmony_ci		}
86662306a36Sopenharmony_ci		/* Allowed to complete requests, even while suspended.
86762306a36Sopenharmony_ci		 * As this is called for all requests within a matching epoch,
86862306a36Sopenharmony_ci		 * we need to filter, and only set RQ_NET_DONE for those that
86962306a36Sopenharmony_ci		 * have actually been on the wire. */
87062306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_COMPLETION_SUSP,
87162306a36Sopenharmony_ci				(req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0);
87262306a36Sopenharmony_ci		break;
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	case DATA_RECEIVED:
87562306a36Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
87662306a36Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
87762306a36Sopenharmony_ci		break;
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	case QUEUE_AS_DRBD_BARRIER:
88062306a36Sopenharmony_ci		start_new_tl_epoch(connection);
88162306a36Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
88262306a36Sopenharmony_ci		break;
88362306a36Sopenharmony_ci	}
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	return rv;
88662306a36Sopenharmony_ci}
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci/* we may do a local read if:
88962306a36Sopenharmony_ci * - we are consistent (of course),
89062306a36Sopenharmony_ci * - or we are generally inconsistent,
89162306a36Sopenharmony_ci *   BUT we are still/already IN SYNC for this area.
89262306a36Sopenharmony_ci *   since size may be bigger than BM_BLOCK_SIZE,
89362306a36Sopenharmony_ci *   we may need to check several bits.
89462306a36Sopenharmony_ci */
89562306a36Sopenharmony_cistatic bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size)
89662306a36Sopenharmony_ci{
89762306a36Sopenharmony_ci	unsigned long sbnr, ebnr;
89862306a36Sopenharmony_ci	sector_t esector, nr_sectors;
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci	if (device->state.disk == D_UP_TO_DATE)
90162306a36Sopenharmony_ci		return true;
90262306a36Sopenharmony_ci	if (device->state.disk != D_INCONSISTENT)
90362306a36Sopenharmony_ci		return false;
90462306a36Sopenharmony_ci	esector = sector + (size >> 9) - 1;
90562306a36Sopenharmony_ci	nr_sectors = get_capacity(device->vdisk);
90662306a36Sopenharmony_ci	D_ASSERT(device, sector  < nr_sectors);
90762306a36Sopenharmony_ci	D_ASSERT(device, esector < nr_sectors);
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	sbnr = BM_SECT_TO_BIT(sector);
91062306a36Sopenharmony_ci	ebnr = BM_SECT_TO_BIT(esector);
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	return drbd_bm_count_bits(device, sbnr, ebnr) == 0;
91362306a36Sopenharmony_ci}
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_cistatic bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector,
91662306a36Sopenharmony_ci		enum drbd_read_balancing rbm)
91762306a36Sopenharmony_ci{
91862306a36Sopenharmony_ci	int stripe_shift;
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci	switch (rbm) {
92162306a36Sopenharmony_ci	case RB_CONGESTED_REMOTE:
92262306a36Sopenharmony_ci		return false;
92362306a36Sopenharmony_ci	case RB_LEAST_PENDING:
92462306a36Sopenharmony_ci		return atomic_read(&device->local_cnt) >
92562306a36Sopenharmony_ci			atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
92662306a36Sopenharmony_ci	case RB_32K_STRIPING:  /* stripe_shift = 15 */
92762306a36Sopenharmony_ci	case RB_64K_STRIPING:
92862306a36Sopenharmony_ci	case RB_128K_STRIPING:
92962306a36Sopenharmony_ci	case RB_256K_STRIPING:
93062306a36Sopenharmony_ci	case RB_512K_STRIPING:
93162306a36Sopenharmony_ci	case RB_1M_STRIPING:   /* stripe_shift = 20 */
93262306a36Sopenharmony_ci		stripe_shift = (rbm - RB_32K_STRIPING + 15);
93362306a36Sopenharmony_ci		return (sector >> (stripe_shift - 9)) & 1;
93462306a36Sopenharmony_ci	case RB_ROUND_ROBIN:
93562306a36Sopenharmony_ci		return test_and_change_bit(READ_BALANCE_RR, &device->flags);
93662306a36Sopenharmony_ci	case RB_PREFER_REMOTE:
93762306a36Sopenharmony_ci		return true;
93862306a36Sopenharmony_ci	case RB_PREFER_LOCAL:
93962306a36Sopenharmony_ci	default:
94062306a36Sopenharmony_ci		return false;
94162306a36Sopenharmony_ci	}
94262306a36Sopenharmony_ci}
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci/*
94562306a36Sopenharmony_ci * complete_conflicting_writes  -  wait for any conflicting write requests
94662306a36Sopenharmony_ci *
94762306a36Sopenharmony_ci * The write_requests tree contains all active write requests which we
94862306a36Sopenharmony_ci * currently know about.  Wait for any requests to complete which conflict with
94962306a36Sopenharmony_ci * the new one.
95062306a36Sopenharmony_ci *
95162306a36Sopenharmony_ci * Only way out: remove the conflicting intervals from the tree.
95262306a36Sopenharmony_ci */
95362306a36Sopenharmony_cistatic void complete_conflicting_writes(struct drbd_request *req)
95462306a36Sopenharmony_ci{
95562306a36Sopenharmony_ci	DEFINE_WAIT(wait);
95662306a36Sopenharmony_ci	struct drbd_device *device = req->device;
95762306a36Sopenharmony_ci	struct drbd_interval *i;
95862306a36Sopenharmony_ci	sector_t sector = req->i.sector;
95962306a36Sopenharmony_ci	int size = req->i.size;
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	for (;;) {
96262306a36Sopenharmony_ci		drbd_for_each_overlap(i, &device->write_requests, sector, size) {
96362306a36Sopenharmony_ci			/* Ignore, if already completed to upper layers. */
96462306a36Sopenharmony_ci			if (i->completed)
96562306a36Sopenharmony_ci				continue;
96662306a36Sopenharmony_ci			/* Handle the first found overlap.  After the schedule
96762306a36Sopenharmony_ci			 * we have to restart the tree walk. */
96862306a36Sopenharmony_ci			break;
96962306a36Sopenharmony_ci		}
97062306a36Sopenharmony_ci		if (!i)	/* if any */
97162306a36Sopenharmony_ci			break;
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci		/* Indicate to wake up device->misc_wait on progress.  */
97462306a36Sopenharmony_ci		prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
97562306a36Sopenharmony_ci		i->waiting = true;
97662306a36Sopenharmony_ci		spin_unlock_irq(&device->resource->req_lock);
97762306a36Sopenharmony_ci		schedule();
97862306a36Sopenharmony_ci		spin_lock_irq(&device->resource->req_lock);
97962306a36Sopenharmony_ci	}
98062306a36Sopenharmony_ci	finish_wait(&device->misc_wait, &wait);
98162306a36Sopenharmony_ci}
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci/* called within req_lock */
98462306a36Sopenharmony_cistatic void maybe_pull_ahead(struct drbd_device *device)
98562306a36Sopenharmony_ci{
98662306a36Sopenharmony_ci	struct drbd_connection *connection = first_peer_device(device)->connection;
98762306a36Sopenharmony_ci	struct net_conf *nc;
98862306a36Sopenharmony_ci	bool congested = false;
98962306a36Sopenharmony_ci	enum drbd_on_congestion on_congestion;
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci	rcu_read_lock();
99262306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
99362306a36Sopenharmony_ci	on_congestion = nc ? nc->on_congestion : OC_BLOCK;
99462306a36Sopenharmony_ci	rcu_read_unlock();
99562306a36Sopenharmony_ci	if (on_congestion == OC_BLOCK ||
99662306a36Sopenharmony_ci	    connection->agreed_pro_version < 96)
99762306a36Sopenharmony_ci		return;
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
100062306a36Sopenharmony_ci		return; /* nothing to do ... */
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci	/* If I don't even have good local storage, we can not reasonably try
100362306a36Sopenharmony_ci	 * to pull ahead of the peer. We also need the local reference to make
100462306a36Sopenharmony_ci	 * sure device->act_log is there.
100562306a36Sopenharmony_ci	 */
100662306a36Sopenharmony_ci	if (!get_ldev_if_state(device, D_UP_TO_DATE))
100762306a36Sopenharmony_ci		return;
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	if (nc->cong_fill &&
101062306a36Sopenharmony_ci	    atomic_read(&device->ap_in_flight) >= nc->cong_fill) {
101162306a36Sopenharmony_ci		drbd_info(device, "Congestion-fill threshold reached\n");
101262306a36Sopenharmony_ci		congested = true;
101362306a36Sopenharmony_ci	}
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci	if (device->act_log->used >= nc->cong_extents) {
101662306a36Sopenharmony_ci		drbd_info(device, "Congestion-extents threshold reached\n");
101762306a36Sopenharmony_ci		congested = true;
101862306a36Sopenharmony_ci	}
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci	if (congested) {
102162306a36Sopenharmony_ci		/* start a new epoch for non-mirrored writes */
102262306a36Sopenharmony_ci		start_new_tl_epoch(first_peer_device(device)->connection);
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci		if (on_congestion == OC_PULL_AHEAD)
102562306a36Sopenharmony_ci			_drbd_set_state(_NS(device, conn, C_AHEAD), 0, NULL);
102662306a36Sopenharmony_ci		else  /*nc->on_congestion == OC_DISCONNECT */
102762306a36Sopenharmony_ci			_drbd_set_state(_NS(device, conn, C_DISCONNECTING), 0, NULL);
102862306a36Sopenharmony_ci	}
102962306a36Sopenharmony_ci	put_ldev(device);
103062306a36Sopenharmony_ci}
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci/* If this returns false, and req->private_bio is still set,
103362306a36Sopenharmony_ci * this should be submitted locally.
103462306a36Sopenharmony_ci *
103562306a36Sopenharmony_ci * If it returns false, but req->private_bio is not set,
103662306a36Sopenharmony_ci * we do not have access to good data :(
103762306a36Sopenharmony_ci *
103862306a36Sopenharmony_ci * Otherwise, this destroys req->private_bio, if any,
103962306a36Sopenharmony_ci * and returns true.
104062306a36Sopenharmony_ci */
104162306a36Sopenharmony_cistatic bool do_remote_read(struct drbd_request *req)
104262306a36Sopenharmony_ci{
104362306a36Sopenharmony_ci	struct drbd_device *device = req->device;
104462306a36Sopenharmony_ci	enum drbd_read_balancing rbm;
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci	if (req->private_bio) {
104762306a36Sopenharmony_ci		if (!drbd_may_do_local_read(device,
104862306a36Sopenharmony_ci					req->i.sector, req->i.size)) {
104962306a36Sopenharmony_ci			bio_put(req->private_bio);
105062306a36Sopenharmony_ci			req->private_bio = NULL;
105162306a36Sopenharmony_ci			put_ldev(device);
105262306a36Sopenharmony_ci		}
105362306a36Sopenharmony_ci	}
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci	if (device->state.pdsk != D_UP_TO_DATE)
105662306a36Sopenharmony_ci		return false;
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci	if (req->private_bio == NULL)
105962306a36Sopenharmony_ci		return true;
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	/* TODO: improve read balancing decisions, take into account drbd
106262306a36Sopenharmony_ci	 * protocol, pending requests etc. */
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	rcu_read_lock();
106562306a36Sopenharmony_ci	rbm = rcu_dereference(device->ldev->disk_conf)->read_balancing;
106662306a36Sopenharmony_ci	rcu_read_unlock();
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci	if (rbm == RB_PREFER_LOCAL && req->private_bio)
106962306a36Sopenharmony_ci		return false; /* submit locally */
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_ci	if (remote_due_to_read_balancing(device, req->i.sector, rbm)) {
107262306a36Sopenharmony_ci		if (req->private_bio) {
107362306a36Sopenharmony_ci			bio_put(req->private_bio);
107462306a36Sopenharmony_ci			req->private_bio = NULL;
107562306a36Sopenharmony_ci			put_ldev(device);
107662306a36Sopenharmony_ci		}
107762306a36Sopenharmony_ci		return true;
107862306a36Sopenharmony_ci	}
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci	return false;
108162306a36Sopenharmony_ci}
108262306a36Sopenharmony_ci
108362306a36Sopenharmony_cibool drbd_should_do_remote(union drbd_dev_state s)
108462306a36Sopenharmony_ci{
108562306a36Sopenharmony_ci	return s.pdsk == D_UP_TO_DATE ||
108662306a36Sopenharmony_ci		(s.pdsk >= D_INCONSISTENT &&
108762306a36Sopenharmony_ci		 s.conn >= C_WF_BITMAP_T &&
108862306a36Sopenharmony_ci		 s.conn < C_AHEAD);
108962306a36Sopenharmony_ci	/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
109062306a36Sopenharmony_ci	   That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
109162306a36Sopenharmony_ci	   states. */
109262306a36Sopenharmony_ci}
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_cistatic bool drbd_should_send_out_of_sync(union drbd_dev_state s)
109562306a36Sopenharmony_ci{
109662306a36Sopenharmony_ci	return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
109762306a36Sopenharmony_ci	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
109862306a36Sopenharmony_ci	   since we enter state C_AHEAD only if proto >= 96 */
109962306a36Sopenharmony_ci}
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_ci/* returns number of connections (== 1, for drbd 8.4)
110262306a36Sopenharmony_ci * expected to actually write this data,
110362306a36Sopenharmony_ci * which does NOT include those that we are L_AHEAD for. */
110462306a36Sopenharmony_cistatic int drbd_process_write_request(struct drbd_request *req)
110562306a36Sopenharmony_ci{
110662306a36Sopenharmony_ci	struct drbd_device *device = req->device;
110762306a36Sopenharmony_ci	struct drbd_peer_device *peer_device = first_peer_device(device);
110862306a36Sopenharmony_ci	int remote, send_oos;
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	remote = drbd_should_do_remote(device->state);
111162306a36Sopenharmony_ci	send_oos = drbd_should_send_out_of_sync(device->state);
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	/* Need to replicate writes.  Unless it is an empty flush,
111462306a36Sopenharmony_ci	 * which is better mapped to a DRBD P_BARRIER packet,
111562306a36Sopenharmony_ci	 * also for drbd wire protocol compatibility reasons.
111662306a36Sopenharmony_ci	 * If this was a flush, just start a new epoch.
111762306a36Sopenharmony_ci	 * Unless the current epoch was empty anyways, or we are not currently
111862306a36Sopenharmony_ci	 * replicating, in which case there is no point. */
111962306a36Sopenharmony_ci	if (unlikely(req->i.size == 0)) {
112062306a36Sopenharmony_ci		/* The only size==0 bios we expect are empty flushes. */
112162306a36Sopenharmony_ci		D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH);
112262306a36Sopenharmony_ci		if (remote)
112362306a36Sopenharmony_ci			_req_mod(req, QUEUE_AS_DRBD_BARRIER, peer_device);
112462306a36Sopenharmony_ci		return remote;
112562306a36Sopenharmony_ci	}
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ci	if (!remote && !send_oos)
112862306a36Sopenharmony_ci		return 0;
112962306a36Sopenharmony_ci
113062306a36Sopenharmony_ci	D_ASSERT(device, !(remote && send_oos));
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	if (remote) {
113362306a36Sopenharmony_ci		_req_mod(req, TO_BE_SENT, peer_device);
113462306a36Sopenharmony_ci		_req_mod(req, QUEUE_FOR_NET_WRITE, peer_device);
113562306a36Sopenharmony_ci	} else if (drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size))
113662306a36Sopenharmony_ci		_req_mod(req, QUEUE_FOR_SEND_OOS, peer_device);
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci	return remote;
113962306a36Sopenharmony_ci}
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_cistatic void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags)
114262306a36Sopenharmony_ci{
114362306a36Sopenharmony_ci	int err = drbd_issue_discard_or_zero_out(req->device,
114462306a36Sopenharmony_ci				req->i.sector, req->i.size >> 9, flags);
114562306a36Sopenharmony_ci	if (err)
114662306a36Sopenharmony_ci		req->private_bio->bi_status = BLK_STS_IOERR;
114762306a36Sopenharmony_ci	bio_endio(req->private_bio);
114862306a36Sopenharmony_ci}
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_cistatic void
115162306a36Sopenharmony_cidrbd_submit_req_private_bio(struct drbd_request *req)
115262306a36Sopenharmony_ci{
115362306a36Sopenharmony_ci	struct drbd_device *device = req->device;
115462306a36Sopenharmony_ci	struct bio *bio = req->private_bio;
115562306a36Sopenharmony_ci	unsigned int type;
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_ci	if (bio_op(bio) != REQ_OP_READ)
115862306a36Sopenharmony_ci		type = DRBD_FAULT_DT_WR;
115962306a36Sopenharmony_ci	else if (bio->bi_opf & REQ_RAHEAD)
116062306a36Sopenharmony_ci		type = DRBD_FAULT_DT_RA;
116162306a36Sopenharmony_ci	else
116262306a36Sopenharmony_ci		type = DRBD_FAULT_DT_RD;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	/* State may have changed since we grabbed our reference on the
116562306a36Sopenharmony_ci	 * ->ldev member. Double check, and short-circuit to endio.
116662306a36Sopenharmony_ci	 * In case the last activity log transaction failed to get on
116762306a36Sopenharmony_ci	 * stable storage, and this is a WRITE, we may not even submit
116862306a36Sopenharmony_ci	 * this bio. */
116962306a36Sopenharmony_ci	if (get_ldev(device)) {
117062306a36Sopenharmony_ci		if (drbd_insert_fault(device, type))
117162306a36Sopenharmony_ci			bio_io_error(bio);
117262306a36Sopenharmony_ci		else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
117362306a36Sopenharmony_ci			drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT |
117462306a36Sopenharmony_ci			    ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM));
117562306a36Sopenharmony_ci		else if (bio_op(bio) == REQ_OP_DISCARD)
117662306a36Sopenharmony_ci			drbd_process_discard_or_zeroes_req(req, EE_TRIM);
117762306a36Sopenharmony_ci		else
117862306a36Sopenharmony_ci			submit_bio_noacct(bio);
117962306a36Sopenharmony_ci		put_ldev(device);
118062306a36Sopenharmony_ci	} else
118162306a36Sopenharmony_ci		bio_io_error(bio);
118262306a36Sopenharmony_ci}
118362306a36Sopenharmony_ci
118462306a36Sopenharmony_cistatic void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
118562306a36Sopenharmony_ci{
118662306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
118762306a36Sopenharmony_ci	list_add_tail(&req->tl_requests, &device->submit.writes);
118862306a36Sopenharmony_ci	list_add_tail(&req->req_pending_master_completion,
118962306a36Sopenharmony_ci			&device->pending_master_completion[1 /* WRITE */]);
119062306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
119162306a36Sopenharmony_ci	queue_work(device->submit.wq, &device->submit.worker);
119262306a36Sopenharmony_ci	/* do_submit() may sleep internally on al_wait, too */
119362306a36Sopenharmony_ci	wake_up(&device->al_wait);
119462306a36Sopenharmony_ci}
119562306a36Sopenharmony_ci
119662306a36Sopenharmony_ci/* returns the new drbd_request pointer, if the caller is expected to
119762306a36Sopenharmony_ci * drbd_send_and_submit() it (to save latency), or NULL if we queued the
119862306a36Sopenharmony_ci * request on the submitter thread.
119962306a36Sopenharmony_ci * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
120062306a36Sopenharmony_ci */
120162306a36Sopenharmony_cistatic struct drbd_request *
120262306a36Sopenharmony_cidrbd_request_prepare(struct drbd_device *device, struct bio *bio)
120362306a36Sopenharmony_ci{
120462306a36Sopenharmony_ci	const int rw = bio_data_dir(bio);
120562306a36Sopenharmony_ci	struct drbd_request *req;
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_ci	/* allocate outside of all locks; */
120862306a36Sopenharmony_ci	req = drbd_req_new(device, bio);
120962306a36Sopenharmony_ci	if (!req) {
121062306a36Sopenharmony_ci		dec_ap_bio(device);
121162306a36Sopenharmony_ci		/* only pass the error to the upper layers.
121262306a36Sopenharmony_ci		 * if user cannot handle io errors, that's not our business. */
121362306a36Sopenharmony_ci		drbd_err(device, "could not kmalloc() req\n");
121462306a36Sopenharmony_ci		bio->bi_status = BLK_STS_RESOURCE;
121562306a36Sopenharmony_ci		bio_endio(bio);
121662306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
121762306a36Sopenharmony_ci	}
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci	/* Update disk stats */
122062306a36Sopenharmony_ci	req->start_jif = bio_start_io_acct(req->master_bio);
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	if (get_ldev(device)) {
122362306a36Sopenharmony_ci		req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
122462306a36Sopenharmony_ci						   bio, GFP_NOIO,
122562306a36Sopenharmony_ci						   &drbd_io_bio_set);
122662306a36Sopenharmony_ci		req->private_bio->bi_private = req;
122762306a36Sopenharmony_ci		req->private_bio->bi_end_io = drbd_request_endio;
122862306a36Sopenharmony_ci	}
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci	/* process discards always from our submitter thread */
123162306a36Sopenharmony_ci	if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
123262306a36Sopenharmony_ci	    bio_op(bio) == REQ_OP_DISCARD)
123362306a36Sopenharmony_ci		goto queue_for_submitter_thread;
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_ci	if (rw == WRITE && req->private_bio && req->i.size
123662306a36Sopenharmony_ci	&& !test_bit(AL_SUSPENDED, &device->flags)) {
123762306a36Sopenharmony_ci		if (!drbd_al_begin_io_fastpath(device, &req->i))
123862306a36Sopenharmony_ci			goto queue_for_submitter_thread;
123962306a36Sopenharmony_ci		req->rq_state |= RQ_IN_ACT_LOG;
124062306a36Sopenharmony_ci		req->in_actlog_jif = jiffies;
124162306a36Sopenharmony_ci	}
124262306a36Sopenharmony_ci	return req;
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci queue_for_submitter_thread:
124562306a36Sopenharmony_ci	atomic_inc(&device->ap_actlog_cnt);
124662306a36Sopenharmony_ci	drbd_queue_write(device, req);
124762306a36Sopenharmony_ci	return NULL;
124862306a36Sopenharmony_ci}
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_ci/* Require at least one path to current data.
125162306a36Sopenharmony_ci * We don't want to allow writes on C_STANDALONE D_INCONSISTENT:
125262306a36Sopenharmony_ci * We would not allow to read what was written,
125362306a36Sopenharmony_ci * we would not have bumped the data generation uuids,
125462306a36Sopenharmony_ci * we would cause data divergence for all the wrong reasons.
125562306a36Sopenharmony_ci *
125662306a36Sopenharmony_ci * If we don't see at least one D_UP_TO_DATE, we will fail this request,
125762306a36Sopenharmony_ci * which either returns EIO, or, if OND_SUSPEND_IO is set, suspends IO,
125862306a36Sopenharmony_ci * and queues for retry later.
125962306a36Sopenharmony_ci */
126062306a36Sopenharmony_cistatic bool may_do_writes(struct drbd_device *device)
126162306a36Sopenharmony_ci{
126262306a36Sopenharmony_ci	const union drbd_dev_state s = device->state;
126362306a36Sopenharmony_ci	return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE;
126462306a36Sopenharmony_ci}
126562306a36Sopenharmony_ci
126662306a36Sopenharmony_cistruct drbd_plug_cb {
126762306a36Sopenharmony_ci	struct blk_plug_cb cb;
126862306a36Sopenharmony_ci	struct drbd_request *most_recent_req;
126962306a36Sopenharmony_ci	/* do we need more? */
127062306a36Sopenharmony_ci};
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_cistatic void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule)
127362306a36Sopenharmony_ci{
127462306a36Sopenharmony_ci	struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb);
127562306a36Sopenharmony_ci	struct drbd_resource *resource = plug->cb.data;
127662306a36Sopenharmony_ci	struct drbd_request *req = plug->most_recent_req;
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci	kfree(cb);
127962306a36Sopenharmony_ci	if (!req)
128062306a36Sopenharmony_ci		return;
128162306a36Sopenharmony_ci
128262306a36Sopenharmony_ci	spin_lock_irq(&resource->req_lock);
128362306a36Sopenharmony_ci	/* In case the sender did not process it yet, raise the flag to
128462306a36Sopenharmony_ci	 * have it followed with P_UNPLUG_REMOTE just after. */
128562306a36Sopenharmony_ci	req->rq_state |= RQ_UNPLUG;
128662306a36Sopenharmony_ci	/* but also queue a generic unplug */
128762306a36Sopenharmony_ci	drbd_queue_unplug(req->device);
128862306a36Sopenharmony_ci	kref_put(&req->kref, drbd_req_destroy);
128962306a36Sopenharmony_ci	spin_unlock_irq(&resource->req_lock);
129062306a36Sopenharmony_ci}
129162306a36Sopenharmony_ci
129262306a36Sopenharmony_cistatic struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource)
129362306a36Sopenharmony_ci{
129462306a36Sopenharmony_ci	/* A lot of text to say
129562306a36Sopenharmony_ci	 * return (struct drbd_plug_cb*)blk_check_plugged(); */
129662306a36Sopenharmony_ci	struct drbd_plug_cb *plug;
129762306a36Sopenharmony_ci	struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug));
129862306a36Sopenharmony_ci
129962306a36Sopenharmony_ci	if (cb)
130062306a36Sopenharmony_ci		plug = container_of(cb, struct drbd_plug_cb, cb);
130162306a36Sopenharmony_ci	else
130262306a36Sopenharmony_ci		plug = NULL;
130362306a36Sopenharmony_ci	return plug;
130462306a36Sopenharmony_ci}
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_cistatic void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req)
130762306a36Sopenharmony_ci{
130862306a36Sopenharmony_ci	struct drbd_request *tmp = plug->most_recent_req;
130962306a36Sopenharmony_ci	/* Will be sent to some peer.
131062306a36Sopenharmony_ci	 * Remember to tag it with UNPLUG_REMOTE on unplug */
131162306a36Sopenharmony_ci	kref_get(&req->kref);
131262306a36Sopenharmony_ci	plug->most_recent_req = req;
131362306a36Sopenharmony_ci	if (tmp)
131462306a36Sopenharmony_ci		kref_put(&tmp->kref, drbd_req_destroy);
131562306a36Sopenharmony_ci}
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_cistatic void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
131862306a36Sopenharmony_ci{
131962306a36Sopenharmony_ci	struct drbd_resource *resource = device->resource;
132062306a36Sopenharmony_ci	struct drbd_peer_device *peer_device = first_peer_device(device);
132162306a36Sopenharmony_ci	const int rw = bio_data_dir(req->master_bio);
132262306a36Sopenharmony_ci	struct bio_and_error m = { NULL, };
132362306a36Sopenharmony_ci	bool no_remote = false;
132462306a36Sopenharmony_ci	bool submit_private_bio = false;
132562306a36Sopenharmony_ci
132662306a36Sopenharmony_ci	spin_lock_irq(&resource->req_lock);
132762306a36Sopenharmony_ci	if (rw == WRITE) {
132862306a36Sopenharmony_ci		/* This may temporarily give up the req_lock,
132962306a36Sopenharmony_ci		 * but will re-aquire it before it returns here.
133062306a36Sopenharmony_ci		 * Needs to be before the check on drbd_suspended() */
133162306a36Sopenharmony_ci		complete_conflicting_writes(req);
133262306a36Sopenharmony_ci		/* no more giving up req_lock from now on! */
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci		/* check for congestion, and potentially stop sending
133562306a36Sopenharmony_ci		 * full data updates, but start sending "dirty bits" only. */
133662306a36Sopenharmony_ci		maybe_pull_ahead(device);
133762306a36Sopenharmony_ci	}
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci	if (drbd_suspended(device)) {
134162306a36Sopenharmony_ci		/* push back and retry: */
134262306a36Sopenharmony_ci		req->rq_state |= RQ_POSTPONED;
134362306a36Sopenharmony_ci		if (req->private_bio) {
134462306a36Sopenharmony_ci			bio_put(req->private_bio);
134562306a36Sopenharmony_ci			req->private_bio = NULL;
134662306a36Sopenharmony_ci			put_ldev(device);
134762306a36Sopenharmony_ci		}
134862306a36Sopenharmony_ci		goto out;
134962306a36Sopenharmony_ci	}
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	/* We fail READ early, if we can not serve it.
135262306a36Sopenharmony_ci	 * We must do this before req is registered on any lists.
135362306a36Sopenharmony_ci	 * Otherwise, drbd_req_complete() will queue failed READ for retry. */
135462306a36Sopenharmony_ci	if (rw != WRITE) {
135562306a36Sopenharmony_ci		if (!do_remote_read(req) && !req->private_bio)
135662306a36Sopenharmony_ci			goto nodata;
135762306a36Sopenharmony_ci	}
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci	/* which transfer log epoch does this belong to? */
136062306a36Sopenharmony_ci	req->epoch = atomic_read(&first_peer_device(device)->connection->current_tle_nr);
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci	/* no point in adding empty flushes to the transfer log,
136362306a36Sopenharmony_ci	 * they are mapped to drbd barriers already. */
136462306a36Sopenharmony_ci	if (likely(req->i.size!=0)) {
136562306a36Sopenharmony_ci		if (rw == WRITE)
136662306a36Sopenharmony_ci			first_peer_device(device)->connection->current_tle_writes++;
136762306a36Sopenharmony_ci
136862306a36Sopenharmony_ci		list_add_tail(&req->tl_requests, &first_peer_device(device)->connection->transfer_log);
136962306a36Sopenharmony_ci	}
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	if (rw == WRITE) {
137262306a36Sopenharmony_ci		if (req->private_bio && !may_do_writes(device)) {
137362306a36Sopenharmony_ci			bio_put(req->private_bio);
137462306a36Sopenharmony_ci			req->private_bio = NULL;
137562306a36Sopenharmony_ci			put_ldev(device);
137662306a36Sopenharmony_ci			goto nodata;
137762306a36Sopenharmony_ci		}
137862306a36Sopenharmony_ci		if (!drbd_process_write_request(req))
137962306a36Sopenharmony_ci			no_remote = true;
138062306a36Sopenharmony_ci	} else {
138162306a36Sopenharmony_ci		/* We either have a private_bio, or we can read from remote.
138262306a36Sopenharmony_ci		 * Otherwise we had done the goto nodata above. */
138362306a36Sopenharmony_ci		if (req->private_bio == NULL) {
138462306a36Sopenharmony_ci			_req_mod(req, TO_BE_SENT, peer_device);
138562306a36Sopenharmony_ci			_req_mod(req, QUEUE_FOR_NET_READ, peer_device);
138662306a36Sopenharmony_ci		} else
138762306a36Sopenharmony_ci			no_remote = true;
138862306a36Sopenharmony_ci	}
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci	if (no_remote == false) {
139162306a36Sopenharmony_ci		struct drbd_plug_cb *plug = drbd_check_plugged(resource);
139262306a36Sopenharmony_ci		if (plug)
139362306a36Sopenharmony_ci			drbd_update_plug(plug, req);
139462306a36Sopenharmony_ci	}
139562306a36Sopenharmony_ci
139662306a36Sopenharmony_ci	/* If it took the fast path in drbd_request_prepare, add it here.
139762306a36Sopenharmony_ci	 * The slow path has added it already. */
139862306a36Sopenharmony_ci	if (list_empty(&req->req_pending_master_completion))
139962306a36Sopenharmony_ci		list_add_tail(&req->req_pending_master_completion,
140062306a36Sopenharmony_ci			&device->pending_master_completion[rw == WRITE]);
140162306a36Sopenharmony_ci	if (req->private_bio) {
140262306a36Sopenharmony_ci		/* needs to be marked within the same spinlock */
140362306a36Sopenharmony_ci		req->pre_submit_jif = jiffies;
140462306a36Sopenharmony_ci		list_add_tail(&req->req_pending_local,
140562306a36Sopenharmony_ci			&device->pending_completion[rw == WRITE]);
140662306a36Sopenharmony_ci		_req_mod(req, TO_BE_SUBMITTED, NULL);
140762306a36Sopenharmony_ci		/* but we need to give up the spinlock to submit */
140862306a36Sopenharmony_ci		submit_private_bio = true;
140962306a36Sopenharmony_ci	} else if (no_remote) {
141062306a36Sopenharmony_cinodata:
141162306a36Sopenharmony_ci		if (drbd_ratelimit())
141262306a36Sopenharmony_ci			drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
141362306a36Sopenharmony_ci					(unsigned long long)req->i.sector, req->i.size >> 9);
141462306a36Sopenharmony_ci		/* A write may have been queued for send_oos, however.
141562306a36Sopenharmony_ci		 * So we can not simply free it, we must go through drbd_req_put_completion_ref() */
141662306a36Sopenharmony_ci	}
141762306a36Sopenharmony_ci
141862306a36Sopenharmony_ciout:
141962306a36Sopenharmony_ci	drbd_req_put_completion_ref(req, &m, 1);
142062306a36Sopenharmony_ci	spin_unlock_irq(&resource->req_lock);
142162306a36Sopenharmony_ci
142262306a36Sopenharmony_ci	/* Even though above is a kref_put(), this is safe.
142362306a36Sopenharmony_ci	 * As long as we still need to submit our private bio,
142462306a36Sopenharmony_ci	 * we hold a completion ref, and the request cannot disappear.
142562306a36Sopenharmony_ci	 * If however this request did not even have a private bio to submit
142662306a36Sopenharmony_ci	 * (e.g. remote read), req may already be invalid now.
142762306a36Sopenharmony_ci	 * That's why we cannot check on req->private_bio. */
142862306a36Sopenharmony_ci	if (submit_private_bio)
142962306a36Sopenharmony_ci		drbd_submit_req_private_bio(req);
143062306a36Sopenharmony_ci	if (m.bio)
143162306a36Sopenharmony_ci		complete_master_bio(device, &m);
143262306a36Sopenharmony_ci}
143362306a36Sopenharmony_ci
143462306a36Sopenharmony_civoid __drbd_make_request(struct drbd_device *device, struct bio *bio)
143562306a36Sopenharmony_ci{
143662306a36Sopenharmony_ci	struct drbd_request *req = drbd_request_prepare(device, bio);
143762306a36Sopenharmony_ci	if (IS_ERR_OR_NULL(req))
143862306a36Sopenharmony_ci		return;
143962306a36Sopenharmony_ci	drbd_send_and_submit(device, req);
144062306a36Sopenharmony_ci}
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_cistatic void submit_fast_path(struct drbd_device *device, struct list_head *incoming)
144362306a36Sopenharmony_ci{
144462306a36Sopenharmony_ci	struct blk_plug plug;
144562306a36Sopenharmony_ci	struct drbd_request *req, *tmp;
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci	blk_start_plug(&plug);
144862306a36Sopenharmony_ci	list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
144962306a36Sopenharmony_ci		const int rw = bio_data_dir(req->master_bio);
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_ci		if (rw == WRITE /* rw != WRITE should not even end up here! */
145262306a36Sopenharmony_ci		&& req->private_bio && req->i.size
145362306a36Sopenharmony_ci		&& !test_bit(AL_SUSPENDED, &device->flags)) {
145462306a36Sopenharmony_ci			if (!drbd_al_begin_io_fastpath(device, &req->i))
145562306a36Sopenharmony_ci				continue;
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci			req->rq_state |= RQ_IN_ACT_LOG;
145862306a36Sopenharmony_ci			req->in_actlog_jif = jiffies;
145962306a36Sopenharmony_ci			atomic_dec(&device->ap_actlog_cnt);
146062306a36Sopenharmony_ci		}
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_ci		list_del_init(&req->tl_requests);
146362306a36Sopenharmony_ci		drbd_send_and_submit(device, req);
146462306a36Sopenharmony_ci	}
146562306a36Sopenharmony_ci	blk_finish_plug(&plug);
146662306a36Sopenharmony_ci}
146762306a36Sopenharmony_ci
146862306a36Sopenharmony_cistatic bool prepare_al_transaction_nonblock(struct drbd_device *device,
146962306a36Sopenharmony_ci					    struct list_head *incoming,
147062306a36Sopenharmony_ci					    struct list_head *pending,
147162306a36Sopenharmony_ci					    struct list_head *later)
147262306a36Sopenharmony_ci{
147362306a36Sopenharmony_ci	struct drbd_request *req;
147462306a36Sopenharmony_ci	int wake = 0;
147562306a36Sopenharmony_ci	int err;
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci	spin_lock_irq(&device->al_lock);
147862306a36Sopenharmony_ci	while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) {
147962306a36Sopenharmony_ci		err = drbd_al_begin_io_nonblock(device, &req->i);
148062306a36Sopenharmony_ci		if (err == -ENOBUFS)
148162306a36Sopenharmony_ci			break;
148262306a36Sopenharmony_ci		if (err == -EBUSY)
148362306a36Sopenharmony_ci			wake = 1;
148462306a36Sopenharmony_ci		if (err)
148562306a36Sopenharmony_ci			list_move_tail(&req->tl_requests, later);
148662306a36Sopenharmony_ci		else
148762306a36Sopenharmony_ci			list_move_tail(&req->tl_requests, pending);
148862306a36Sopenharmony_ci	}
148962306a36Sopenharmony_ci	spin_unlock_irq(&device->al_lock);
149062306a36Sopenharmony_ci	if (wake)
149162306a36Sopenharmony_ci		wake_up(&device->al_wait);
149262306a36Sopenharmony_ci	return !list_empty(pending);
149362306a36Sopenharmony_ci}
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_cistatic void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
149662306a36Sopenharmony_ci{
149762306a36Sopenharmony_ci	struct blk_plug plug;
149862306a36Sopenharmony_ci	struct drbd_request *req;
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci	blk_start_plug(&plug);
150162306a36Sopenharmony_ci	while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) {
150262306a36Sopenharmony_ci		req->rq_state |= RQ_IN_ACT_LOG;
150362306a36Sopenharmony_ci		req->in_actlog_jif = jiffies;
150462306a36Sopenharmony_ci		atomic_dec(&device->ap_actlog_cnt);
150562306a36Sopenharmony_ci		list_del_init(&req->tl_requests);
150662306a36Sopenharmony_ci		drbd_send_and_submit(device, req);
150762306a36Sopenharmony_ci	}
150862306a36Sopenharmony_ci	blk_finish_plug(&plug);
150962306a36Sopenharmony_ci}
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_civoid do_submit(struct work_struct *ws)
151262306a36Sopenharmony_ci{
151362306a36Sopenharmony_ci	struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
151462306a36Sopenharmony_ci	LIST_HEAD(incoming);	/* from drbd_make_request() */
151562306a36Sopenharmony_ci	LIST_HEAD(pending);	/* to be submitted after next AL-transaction commit */
151662306a36Sopenharmony_ci	LIST_HEAD(busy);	/* blocked by resync requests */
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_ci	/* grab new incoming requests */
151962306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
152062306a36Sopenharmony_ci	list_splice_tail_init(&device->submit.writes, &incoming);
152162306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
152262306a36Sopenharmony_ci
152362306a36Sopenharmony_ci	for (;;) {
152462306a36Sopenharmony_ci		DEFINE_WAIT(wait);
152562306a36Sopenharmony_ci
152662306a36Sopenharmony_ci		/* move used-to-be-busy back to front of incoming */
152762306a36Sopenharmony_ci		list_splice_init(&busy, &incoming);
152862306a36Sopenharmony_ci		submit_fast_path(device, &incoming);
152962306a36Sopenharmony_ci		if (list_empty(&incoming))
153062306a36Sopenharmony_ci			break;
153162306a36Sopenharmony_ci
153262306a36Sopenharmony_ci		for (;;) {
153362306a36Sopenharmony_ci			prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
153462306a36Sopenharmony_ci
153562306a36Sopenharmony_ci			list_splice_init(&busy, &incoming);
153662306a36Sopenharmony_ci			prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
153762306a36Sopenharmony_ci			if (!list_empty(&pending))
153862306a36Sopenharmony_ci				break;
153962306a36Sopenharmony_ci
154062306a36Sopenharmony_ci			schedule();
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci			/* If all currently "hot" activity log extents are kept busy by
154362306a36Sopenharmony_ci			 * incoming requests, we still must not totally starve new
154462306a36Sopenharmony_ci			 * requests to "cold" extents.
154562306a36Sopenharmony_ci			 * Something left on &incoming means there had not been
154662306a36Sopenharmony_ci			 * enough update slots available, and the activity log
154762306a36Sopenharmony_ci			 * has been marked as "starving".
154862306a36Sopenharmony_ci			 *
154962306a36Sopenharmony_ci			 * Try again now, without looking for new requests,
155062306a36Sopenharmony_ci			 * effectively blocking all new requests until we made
155162306a36Sopenharmony_ci			 * at least _some_ progress with what we currently have.
155262306a36Sopenharmony_ci			 */
155362306a36Sopenharmony_ci			if (!list_empty(&incoming))
155462306a36Sopenharmony_ci				continue;
155562306a36Sopenharmony_ci
155662306a36Sopenharmony_ci			/* Nothing moved to pending, but nothing left
155762306a36Sopenharmony_ci			 * on incoming: all moved to busy!
155862306a36Sopenharmony_ci			 * Grab new and iterate. */
155962306a36Sopenharmony_ci			spin_lock_irq(&device->resource->req_lock);
156062306a36Sopenharmony_ci			list_splice_tail_init(&device->submit.writes, &incoming);
156162306a36Sopenharmony_ci			spin_unlock_irq(&device->resource->req_lock);
156262306a36Sopenharmony_ci		}
156362306a36Sopenharmony_ci		finish_wait(&device->al_wait, &wait);
156462306a36Sopenharmony_ci
156562306a36Sopenharmony_ci		/* If the transaction was full, before all incoming requests
156662306a36Sopenharmony_ci		 * had been processed, skip ahead to commit, and iterate
156762306a36Sopenharmony_ci		 * without splicing in more incoming requests from upper layers.
156862306a36Sopenharmony_ci		 *
156962306a36Sopenharmony_ci		 * Else, if all incoming have been processed,
157062306a36Sopenharmony_ci		 * they have become either "pending" (to be submitted after
157162306a36Sopenharmony_ci		 * next transaction commit) or "busy" (blocked by resync).
157262306a36Sopenharmony_ci		 *
157362306a36Sopenharmony_ci		 * Maybe more was queued, while we prepared the transaction?
157462306a36Sopenharmony_ci		 * Try to stuff those into this transaction as well.
157562306a36Sopenharmony_ci		 * Be strictly non-blocking here,
157662306a36Sopenharmony_ci		 * we already have something to commit.
157762306a36Sopenharmony_ci		 *
157862306a36Sopenharmony_ci		 * Commit if we don't make any more progres.
157962306a36Sopenharmony_ci		 */
158062306a36Sopenharmony_ci
158162306a36Sopenharmony_ci		while (list_empty(&incoming)) {
158262306a36Sopenharmony_ci			LIST_HEAD(more_pending);
158362306a36Sopenharmony_ci			LIST_HEAD(more_incoming);
158462306a36Sopenharmony_ci			bool made_progress;
158562306a36Sopenharmony_ci
158662306a36Sopenharmony_ci			/* It is ok to look outside the lock,
158762306a36Sopenharmony_ci			 * it's only an optimization anyways */
158862306a36Sopenharmony_ci			if (list_empty(&device->submit.writes))
158962306a36Sopenharmony_ci				break;
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci			spin_lock_irq(&device->resource->req_lock);
159262306a36Sopenharmony_ci			list_splice_tail_init(&device->submit.writes, &more_incoming);
159362306a36Sopenharmony_ci			spin_unlock_irq(&device->resource->req_lock);
159462306a36Sopenharmony_ci
159562306a36Sopenharmony_ci			if (list_empty(&more_incoming))
159662306a36Sopenharmony_ci				break;
159762306a36Sopenharmony_ci
159862306a36Sopenharmony_ci			made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
159962306a36Sopenharmony_ci
160062306a36Sopenharmony_ci			list_splice_tail_init(&more_pending, &pending);
160162306a36Sopenharmony_ci			list_splice_tail_init(&more_incoming, &incoming);
160262306a36Sopenharmony_ci			if (!made_progress)
160362306a36Sopenharmony_ci				break;
160462306a36Sopenharmony_ci		}
160562306a36Sopenharmony_ci
160662306a36Sopenharmony_ci		drbd_al_begin_io_commit(device);
160762306a36Sopenharmony_ci		send_and_submit_pending(device, &pending);
160862306a36Sopenharmony_ci	}
160962306a36Sopenharmony_ci}
161062306a36Sopenharmony_ci
161162306a36Sopenharmony_civoid drbd_submit_bio(struct bio *bio)
161262306a36Sopenharmony_ci{
161362306a36Sopenharmony_ci	struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
161462306a36Sopenharmony_ci
161562306a36Sopenharmony_ci	bio = bio_split_to_limits(bio);
161662306a36Sopenharmony_ci	if (!bio)
161762306a36Sopenharmony_ci		return;
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci	/*
162062306a36Sopenharmony_ci	 * what we "blindly" assume:
162162306a36Sopenharmony_ci	 */
162262306a36Sopenharmony_ci	D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
162362306a36Sopenharmony_ci
162462306a36Sopenharmony_ci	inc_ap_bio(device);
162562306a36Sopenharmony_ci	__drbd_make_request(device, bio);
162662306a36Sopenharmony_ci}
162762306a36Sopenharmony_ci
162862306a36Sopenharmony_cistatic bool net_timeout_reached(struct drbd_request *net_req,
162962306a36Sopenharmony_ci		struct drbd_connection *connection,
163062306a36Sopenharmony_ci		unsigned long now, unsigned long ent,
163162306a36Sopenharmony_ci		unsigned int ko_count, unsigned int timeout)
163262306a36Sopenharmony_ci{
163362306a36Sopenharmony_ci	struct drbd_device *device = net_req->device;
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	if (!time_after(now, net_req->pre_send_jif + ent))
163662306a36Sopenharmony_ci		return false;
163762306a36Sopenharmony_ci
163862306a36Sopenharmony_ci	if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
163962306a36Sopenharmony_ci		return false;
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci	if (net_req->rq_state & RQ_NET_PENDING) {
164262306a36Sopenharmony_ci		drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
164362306a36Sopenharmony_ci			jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
164462306a36Sopenharmony_ci		return true;
164562306a36Sopenharmony_ci	}
164662306a36Sopenharmony_ci
164762306a36Sopenharmony_ci	/* We received an ACK already (or are using protocol A),
164862306a36Sopenharmony_ci	 * but are waiting for the epoch closing barrier ack.
164962306a36Sopenharmony_ci	 * Check if we sent the barrier already.  We should not blame the peer
165062306a36Sopenharmony_ci	 * for being unresponsive, if we did not even ask it yet. */
165162306a36Sopenharmony_ci	if (net_req->epoch == connection->send.current_epoch_nr) {
165262306a36Sopenharmony_ci		drbd_warn(device,
165362306a36Sopenharmony_ci			"We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
165462306a36Sopenharmony_ci			jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
165562306a36Sopenharmony_ci		return false;
165662306a36Sopenharmony_ci	}
165762306a36Sopenharmony_ci
165862306a36Sopenharmony_ci	/* Worst case: we may have been blocked for whatever reason, then
165962306a36Sopenharmony_ci	 * suddenly are able to send a lot of requests (and epoch separating
166062306a36Sopenharmony_ci	 * barriers) in quick succession.
166162306a36Sopenharmony_ci	 * The timestamp of the net_req may be much too old and not correspond
166262306a36Sopenharmony_ci	 * to the sending time of the relevant unack'ed barrier packet, so
166362306a36Sopenharmony_ci	 * would trigger a spurious timeout.  The latest barrier packet may
166462306a36Sopenharmony_ci	 * have a too recent timestamp to trigger the timeout, potentially miss
166562306a36Sopenharmony_ci	 * a timeout.  Right now we don't have a place to conveniently store
166662306a36Sopenharmony_ci	 * these timestamps.
166762306a36Sopenharmony_ci	 * But in this particular situation, the application requests are still
166862306a36Sopenharmony_ci	 * completed to upper layers, DRBD should still "feel" responsive.
166962306a36Sopenharmony_ci	 * No need yet to kill this connection, it may still recover.
167062306a36Sopenharmony_ci	 * If not, eventually we will have queued enough into the network for
167162306a36Sopenharmony_ci	 * us to block. From that point of view, the timestamp of the last sent
167262306a36Sopenharmony_ci	 * barrier packet is relevant enough.
167362306a36Sopenharmony_ci	 */
167462306a36Sopenharmony_ci	if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
167562306a36Sopenharmony_ci		drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
167662306a36Sopenharmony_ci			connection->send.last_sent_barrier_jif, now,
167762306a36Sopenharmony_ci			jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
167862306a36Sopenharmony_ci		return true;
167962306a36Sopenharmony_ci	}
168062306a36Sopenharmony_ci	return false;
168162306a36Sopenharmony_ci}
168262306a36Sopenharmony_ci
168362306a36Sopenharmony_ci/* A request is considered timed out, if
168462306a36Sopenharmony_ci * - we have some effective timeout from the configuration,
168562306a36Sopenharmony_ci *   with some state restrictions applied,
168662306a36Sopenharmony_ci * - the oldest request is waiting for a response from the network
168762306a36Sopenharmony_ci *   resp. the local disk,
168862306a36Sopenharmony_ci * - the oldest request is in fact older than the effective timeout,
168962306a36Sopenharmony_ci * - the connection was established (resp. disk was attached)
169062306a36Sopenharmony_ci *   for longer than the timeout already.
169162306a36Sopenharmony_ci * Note that for 32bit jiffies and very stable connections/disks,
169262306a36Sopenharmony_ci * we may have a wrap around, which is catched by
169362306a36Sopenharmony_ci *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
169462306a36Sopenharmony_ci *
169562306a36Sopenharmony_ci * Side effect: once per 32bit wrap-around interval, which means every
169662306a36Sopenharmony_ci * ~198 days with 250 HZ, we have a window where the timeout would need
169762306a36Sopenharmony_ci * to expire twice (worst case) to become effective. Good enough.
169862306a36Sopenharmony_ci */
169962306a36Sopenharmony_ci
170062306a36Sopenharmony_civoid request_timer_fn(struct timer_list *t)
170162306a36Sopenharmony_ci{
170262306a36Sopenharmony_ci	struct drbd_device *device = from_timer(device, t, request_timer);
170362306a36Sopenharmony_ci	struct drbd_connection *connection = first_peer_device(device)->connection;
170462306a36Sopenharmony_ci	struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
170562306a36Sopenharmony_ci	struct net_conf *nc;
170662306a36Sopenharmony_ci	unsigned long oldest_submit_jif;
170762306a36Sopenharmony_ci	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
170862306a36Sopenharmony_ci	unsigned long now;
170962306a36Sopenharmony_ci	unsigned int ko_count = 0, timeout = 0;
171062306a36Sopenharmony_ci
171162306a36Sopenharmony_ci	rcu_read_lock();
171262306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
171362306a36Sopenharmony_ci	if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
171462306a36Sopenharmony_ci		ko_count = nc->ko_count;
171562306a36Sopenharmony_ci		timeout = nc->timeout;
171662306a36Sopenharmony_ci	}
171762306a36Sopenharmony_ci
171862306a36Sopenharmony_ci	if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
171962306a36Sopenharmony_ci		dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
172062306a36Sopenharmony_ci		put_ldev(device);
172162306a36Sopenharmony_ci	}
172262306a36Sopenharmony_ci	rcu_read_unlock();
172362306a36Sopenharmony_ci
172462306a36Sopenharmony_ci
172562306a36Sopenharmony_ci	ent = timeout * HZ/10 * ko_count;
172662306a36Sopenharmony_ci	et = min_not_zero(dt, ent);
172762306a36Sopenharmony_ci
172862306a36Sopenharmony_ci	if (!et)
172962306a36Sopenharmony_ci		return; /* Recurring timer stopped */
173062306a36Sopenharmony_ci
173162306a36Sopenharmony_ci	now = jiffies;
173262306a36Sopenharmony_ci	nt = now + et;
173362306a36Sopenharmony_ci
173462306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
173562306a36Sopenharmony_ci	req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
173662306a36Sopenharmony_ci	req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
173762306a36Sopenharmony_ci
173862306a36Sopenharmony_ci	/* maybe the oldest request waiting for the peer is in fact still
173962306a36Sopenharmony_ci	 * blocking in tcp sendmsg.  That's ok, though, that's handled via the
174062306a36Sopenharmony_ci	 * socket send timeout, requesting a ping, and bumping ko-count in
174162306a36Sopenharmony_ci	 * we_should_drop_the_connection().
174262306a36Sopenharmony_ci	 */
174362306a36Sopenharmony_ci
174462306a36Sopenharmony_ci	/* check the oldest request we did successfully sent,
174562306a36Sopenharmony_ci	 * but which is still waiting for an ACK. */
174662306a36Sopenharmony_ci	req_peer = connection->req_ack_pending;
174762306a36Sopenharmony_ci
174862306a36Sopenharmony_ci	/* if we don't have such request (e.g. protocoll A)
174962306a36Sopenharmony_ci	 * check the oldest requests which is still waiting on its epoch
175062306a36Sopenharmony_ci	 * closing barrier ack. */
175162306a36Sopenharmony_ci	if (!req_peer)
175262306a36Sopenharmony_ci		req_peer = connection->req_not_net_done;
175362306a36Sopenharmony_ci
175462306a36Sopenharmony_ci	/* evaluate the oldest peer request only in one timer! */
175562306a36Sopenharmony_ci	if (req_peer && req_peer->device != device)
175662306a36Sopenharmony_ci		req_peer = NULL;
175762306a36Sopenharmony_ci
175862306a36Sopenharmony_ci	/* do we have something to evaluate? */
175962306a36Sopenharmony_ci	if (req_peer == NULL && req_write == NULL && req_read == NULL)
176062306a36Sopenharmony_ci		goto out;
176162306a36Sopenharmony_ci
176262306a36Sopenharmony_ci	oldest_submit_jif =
176362306a36Sopenharmony_ci		(req_write && req_read)
176462306a36Sopenharmony_ci		? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
176562306a36Sopenharmony_ci		  ? req_write->pre_submit_jif : req_read->pre_submit_jif )
176662306a36Sopenharmony_ci		: req_write ? req_write->pre_submit_jif
176762306a36Sopenharmony_ci		: req_read ? req_read->pre_submit_jif : now;
176862306a36Sopenharmony_ci
176962306a36Sopenharmony_ci	if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
177062306a36Sopenharmony_ci		_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
177162306a36Sopenharmony_ci
177262306a36Sopenharmony_ci	if (dt && oldest_submit_jif != now &&
177362306a36Sopenharmony_ci		 time_after(now, oldest_submit_jif + dt) &&
177462306a36Sopenharmony_ci		!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
177562306a36Sopenharmony_ci		drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
177662306a36Sopenharmony_ci		__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
177762306a36Sopenharmony_ci	}
177862306a36Sopenharmony_ci
177962306a36Sopenharmony_ci	/* Reschedule timer for the nearest not already expired timeout.
178062306a36Sopenharmony_ci	 * Fallback to now + min(effective network timeout, disk timeout). */
178162306a36Sopenharmony_ci	ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
178262306a36Sopenharmony_ci		? req_peer->pre_send_jif + ent : now + et;
178362306a36Sopenharmony_ci	dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
178462306a36Sopenharmony_ci		? oldest_submit_jif + dt : now + et;
178562306a36Sopenharmony_ci	nt = time_before(ent, dt) ? ent : dt;
178662306a36Sopenharmony_ciout:
178762306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
178862306a36Sopenharmony_ci	mod_timer(&device->request_timer, nt);
178962306a36Sopenharmony_ci}
1790