18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci   drbd_req.c
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ci   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
88c2ecf20Sopenharmony_ci   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
98c2ecf20Sopenharmony_ci   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci */
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#include <linux/module.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include <linux/slab.h>
178c2ecf20Sopenharmony_ci#include <linux/drbd.h>
188c2ecf20Sopenharmony_ci#include "drbd_int.h"
198c2ecf20Sopenharmony_ci#include "drbd_req.h"
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_cistatic bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size);
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistatic struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src)
258c2ecf20Sopenharmony_ci{
268c2ecf20Sopenharmony_ci	struct drbd_request *req;
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	req = mempool_alloc(&drbd_request_mempool, GFP_NOIO);
298c2ecf20Sopenharmony_ci	if (!req)
308c2ecf20Sopenharmony_ci		return NULL;
318c2ecf20Sopenharmony_ci	memset(req, 0, sizeof(*req));
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci	drbd_req_make_private_bio(req, bio_src);
348c2ecf20Sopenharmony_ci	req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
358c2ecf20Sopenharmony_ci		      | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
368c2ecf20Sopenharmony_ci		      | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
378c2ecf20Sopenharmony_ci		      | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
388c2ecf20Sopenharmony_ci	req->device = device;
398c2ecf20Sopenharmony_ci	req->master_bio = bio_src;
408c2ecf20Sopenharmony_ci	req->epoch = 0;
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci	drbd_clear_interval(&req->i);
438c2ecf20Sopenharmony_ci	req->i.sector     = bio_src->bi_iter.bi_sector;
448c2ecf20Sopenharmony_ci	req->i.size      = bio_src->bi_iter.bi_size;
458c2ecf20Sopenharmony_ci	req->i.local = true;
468c2ecf20Sopenharmony_ci	req->i.waiting = false;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&req->tl_requests);
498c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&req->w.list);
508c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&req->req_pending_master_completion);
518c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&req->req_pending_local);
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	/* one reference to be put by __drbd_make_request */
548c2ecf20Sopenharmony_ci	atomic_set(&req->completion_ref, 1);
558c2ecf20Sopenharmony_ci	/* one kref as long as completion_ref > 0 */
568c2ecf20Sopenharmony_ci	kref_init(&req->kref);
578c2ecf20Sopenharmony_ci	return req;
588c2ecf20Sopenharmony_ci}
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_cistatic void drbd_remove_request_interval(struct rb_root *root,
618c2ecf20Sopenharmony_ci					 struct drbd_request *req)
628c2ecf20Sopenharmony_ci{
638c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
648c2ecf20Sopenharmony_ci	struct drbd_interval *i = &req->i;
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	drbd_remove_interval(root, i);
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	/* Wake up any processes waiting for this request to complete.  */
698c2ecf20Sopenharmony_ci	if (i->waiting)
708c2ecf20Sopenharmony_ci		wake_up(&device->misc_wait);
718c2ecf20Sopenharmony_ci}
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_civoid drbd_req_destroy(struct kref *kref)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	struct drbd_request *req = container_of(kref, struct drbd_request, kref);
768c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
778c2ecf20Sopenharmony_ci	const unsigned s = req->rq_state;
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	if ((req->master_bio && !(s & RQ_POSTPONED)) ||
808c2ecf20Sopenharmony_ci		atomic_read(&req->completion_ref) ||
818c2ecf20Sopenharmony_ci		(s & RQ_LOCAL_PENDING) ||
828c2ecf20Sopenharmony_ci		((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) {
838c2ecf20Sopenharmony_ci		drbd_err(device, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n",
848c2ecf20Sopenharmony_ci				s, atomic_read(&req->completion_ref));
858c2ecf20Sopenharmony_ci		return;
868c2ecf20Sopenharmony_ci	}
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci	/* If called from mod_rq_state (expected normal case) or
898c2ecf20Sopenharmony_ci	 * drbd_send_and_submit (the less likely normal path), this holds the
908c2ecf20Sopenharmony_ci	 * req_lock, and req->tl_requests will typicaly be on ->transfer_log,
918c2ecf20Sopenharmony_ci	 * though it may be still empty (never added to the transfer log).
928c2ecf20Sopenharmony_ci	 *
938c2ecf20Sopenharmony_ci	 * If called from do_retry(), we do NOT hold the req_lock, but we are
948c2ecf20Sopenharmony_ci	 * still allowed to unconditionally list_del(&req->tl_requests),
958c2ecf20Sopenharmony_ci	 * because it will be on a local on-stack list only. */
968c2ecf20Sopenharmony_ci	list_del_init(&req->tl_requests);
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	/* finally remove the request from the conflict detection
998c2ecf20Sopenharmony_ci	 * respective block_id verification interval tree. */
1008c2ecf20Sopenharmony_ci	if (!drbd_interval_empty(&req->i)) {
1018c2ecf20Sopenharmony_ci		struct rb_root *root;
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci		if (s & RQ_WRITE)
1048c2ecf20Sopenharmony_ci			root = &device->write_requests;
1058c2ecf20Sopenharmony_ci		else
1068c2ecf20Sopenharmony_ci			root = &device->read_requests;
1078c2ecf20Sopenharmony_ci		drbd_remove_request_interval(root, req);
1088c2ecf20Sopenharmony_ci	} else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
1098c2ecf20Sopenharmony_ci		drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
1108c2ecf20Sopenharmony_ci			s, (unsigned long long)req->i.sector, req->i.size);
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	/* if it was a write, we may have to set the corresponding
1138c2ecf20Sopenharmony_ci	 * bit(s) out-of-sync first. If it had a local part, we need to
1148c2ecf20Sopenharmony_ci	 * release the reference to the activity log. */
1158c2ecf20Sopenharmony_ci	if (s & RQ_WRITE) {
1168c2ecf20Sopenharmony_ci		/* Set out-of-sync unless both OK flags are set
1178c2ecf20Sopenharmony_ci		 * (local only or remote failed).
1188c2ecf20Sopenharmony_ci		 * Other places where we set out-of-sync:
1198c2ecf20Sopenharmony_ci		 * READ with local io-error */
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci		/* There is a special case:
1228c2ecf20Sopenharmony_ci		 * we may notice late that IO was suspended,
1238c2ecf20Sopenharmony_ci		 * and postpone, or schedule for retry, a write,
1248c2ecf20Sopenharmony_ci		 * before it even was submitted or sent.
1258c2ecf20Sopenharmony_ci		 * In that case we do not want to touch the bitmap at all.
1268c2ecf20Sopenharmony_ci		 */
1278c2ecf20Sopenharmony_ci		if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) {
1288c2ecf20Sopenharmony_ci			if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
1298c2ecf20Sopenharmony_ci				drbd_set_out_of_sync(device, req->i.sector, req->i.size);
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci			if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
1328c2ecf20Sopenharmony_ci				drbd_set_in_sync(device, req->i.sector, req->i.size);
1338c2ecf20Sopenharmony_ci		}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci		/* one might be tempted to move the drbd_al_complete_io
1368c2ecf20Sopenharmony_ci		 * to the local io completion callback drbd_request_endio.
1378c2ecf20Sopenharmony_ci		 * but, if this was a mirror write, we may only
1388c2ecf20Sopenharmony_ci		 * drbd_al_complete_io after this is RQ_NET_DONE,
1398c2ecf20Sopenharmony_ci		 * otherwise the extent could be dropped from the al
1408c2ecf20Sopenharmony_ci		 * before it has actually been written on the peer.
1418c2ecf20Sopenharmony_ci		 * if we crash before our peer knows about the request,
1428c2ecf20Sopenharmony_ci		 * but after the extent has been dropped from the al,
1438c2ecf20Sopenharmony_ci		 * we would forget to resync the corresponding extent.
1448c2ecf20Sopenharmony_ci		 */
1458c2ecf20Sopenharmony_ci		if (s & RQ_IN_ACT_LOG) {
1468c2ecf20Sopenharmony_ci			if (get_ldev_if_state(device, D_FAILED)) {
1478c2ecf20Sopenharmony_ci				drbd_al_complete_io(device, &req->i);
1488c2ecf20Sopenharmony_ci				put_ldev(device);
1498c2ecf20Sopenharmony_ci			} else if (__ratelimit(&drbd_ratelimit_state)) {
1508c2ecf20Sopenharmony_ci				drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), "
1518c2ecf20Sopenharmony_ci					 "but my Disk seems to have failed :(\n",
1528c2ecf20Sopenharmony_ci					 (unsigned long long) req->i.sector, req->i.size);
1538c2ecf20Sopenharmony_ci			}
1548c2ecf20Sopenharmony_ci		}
1558c2ecf20Sopenharmony_ci	}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	mempool_free(req, &drbd_request_mempool);
1588c2ecf20Sopenharmony_ci}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_cistatic void wake_all_senders(struct drbd_connection *connection)
1618c2ecf20Sopenharmony_ci{
1628c2ecf20Sopenharmony_ci	wake_up(&connection->sender_work.q_wait);
1638c2ecf20Sopenharmony_ci}
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci/* must hold resource->req_lock */
1668c2ecf20Sopenharmony_civoid start_new_tl_epoch(struct drbd_connection *connection)
1678c2ecf20Sopenharmony_ci{
1688c2ecf20Sopenharmony_ci	/* no point closing an epoch, if it is empty, anyways. */
1698c2ecf20Sopenharmony_ci	if (connection->current_tle_writes == 0)
1708c2ecf20Sopenharmony_ci		return;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	connection->current_tle_writes = 0;
1738c2ecf20Sopenharmony_ci	atomic_inc(&connection->current_tle_nr);
1748c2ecf20Sopenharmony_ci	wake_all_senders(connection);
1758c2ecf20Sopenharmony_ci}
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_civoid complete_master_bio(struct drbd_device *device,
1788c2ecf20Sopenharmony_ci		struct bio_and_error *m)
1798c2ecf20Sopenharmony_ci{
1808c2ecf20Sopenharmony_ci	if (unlikely(m->error))
1818c2ecf20Sopenharmony_ci		m->bio->bi_status = errno_to_blk_status(m->error);
1828c2ecf20Sopenharmony_ci	bio_endio(m->bio);
1838c2ecf20Sopenharmony_ci	dec_ap_bio(device);
1848c2ecf20Sopenharmony_ci}
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci/* Helper for __req_mod().
1888c2ecf20Sopenharmony_ci * Set m->bio to the master bio, if it is fit to be completed,
1898c2ecf20Sopenharmony_ci * or leave it alone (it is initialized to NULL in __req_mod),
1908c2ecf20Sopenharmony_ci * if it has already been completed, or cannot be completed yet.
1918c2ecf20Sopenharmony_ci * If m->bio is set, the error status to be returned is placed in m->error.
1928c2ecf20Sopenharmony_ci */
1938c2ecf20Sopenharmony_cistatic
1948c2ecf20Sopenharmony_civoid drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
1958c2ecf20Sopenharmony_ci{
1968c2ecf20Sopenharmony_ci	const unsigned s = req->rq_state;
1978c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
1988c2ecf20Sopenharmony_ci	int error, ok;
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	/* we must not complete the master bio, while it is
2018c2ecf20Sopenharmony_ci	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock)
2028c2ecf20Sopenharmony_ci	 *	not yet acknowledged by the peer
2038c2ecf20Sopenharmony_ci	 *	not yet completed by the local io subsystem
2048c2ecf20Sopenharmony_ci	 * these flags may get cleared in any order by
2058c2ecf20Sopenharmony_ci	 *	the worker,
2068c2ecf20Sopenharmony_ci	 *	the receiver,
2078c2ecf20Sopenharmony_ci	 *	the bio_endio completion callbacks.
2088c2ecf20Sopenharmony_ci	 */
2098c2ecf20Sopenharmony_ci	if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) ||
2108c2ecf20Sopenharmony_ci	    (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) ||
2118c2ecf20Sopenharmony_ci	    (s & RQ_COMPLETION_SUSP)) {
2128c2ecf20Sopenharmony_ci		drbd_err(device, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s);
2138c2ecf20Sopenharmony_ci		return;
2148c2ecf20Sopenharmony_ci	}
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	if (!req->master_bio) {
2178c2ecf20Sopenharmony_ci		drbd_err(device, "drbd_req_complete: Logic BUG, master_bio == NULL!\n");
2188c2ecf20Sopenharmony_ci		return;
2198c2ecf20Sopenharmony_ci	}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	/*
2228c2ecf20Sopenharmony_ci	 * figure out whether to report success or failure.
2238c2ecf20Sopenharmony_ci	 *
2248c2ecf20Sopenharmony_ci	 * report success when at least one of the operations succeeded.
2258c2ecf20Sopenharmony_ci	 * or, to put the other way,
2268c2ecf20Sopenharmony_ci	 * only report failure, when both operations failed.
2278c2ecf20Sopenharmony_ci	 *
2288c2ecf20Sopenharmony_ci	 * what to do about the failures is handled elsewhere.
2298c2ecf20Sopenharmony_ci	 * what we need to do here is just: complete the master_bio.
2308c2ecf20Sopenharmony_ci	 *
2318c2ecf20Sopenharmony_ci	 * local completion error, if any, has been stored as ERR_PTR
2328c2ecf20Sopenharmony_ci	 * in private_bio within drbd_request_endio.
2338c2ecf20Sopenharmony_ci	 */
2348c2ecf20Sopenharmony_ci	ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
2358c2ecf20Sopenharmony_ci	error = PTR_ERR(req->private_bio);
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	/* Before we can signal completion to the upper layers,
2388c2ecf20Sopenharmony_ci	 * we may need to close the current transfer log epoch.
2398c2ecf20Sopenharmony_ci	 * We are within the request lock, so we can simply compare
2408c2ecf20Sopenharmony_ci	 * the request epoch number with the current transfer log
2418c2ecf20Sopenharmony_ci	 * epoch number.  If they match, increase the current_tle_nr,
2428c2ecf20Sopenharmony_ci	 * and reset the transfer log epoch write_cnt.
2438c2ecf20Sopenharmony_ci	 */
2448c2ecf20Sopenharmony_ci	if (op_is_write(bio_op(req->master_bio)) &&
2458c2ecf20Sopenharmony_ci	    req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr))
2468c2ecf20Sopenharmony_ci		start_new_tl_epoch(first_peer_device(device)->connection);
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	/* Update disk stats */
2498c2ecf20Sopenharmony_ci	bio_end_io_acct(req->master_bio, req->start_jif);
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	/* If READ failed,
2528c2ecf20Sopenharmony_ci	 * have it be pushed back to the retry work queue,
2538c2ecf20Sopenharmony_ci	 * so it will re-enter __drbd_make_request(),
2548c2ecf20Sopenharmony_ci	 * and be re-assigned to a suitable local or remote path,
2558c2ecf20Sopenharmony_ci	 * or failed if we do not have access to good data anymore.
2568c2ecf20Sopenharmony_ci	 *
2578c2ecf20Sopenharmony_ci	 * Unless it was failed early by __drbd_make_request(),
2588c2ecf20Sopenharmony_ci	 * because no path was available, in which case
2598c2ecf20Sopenharmony_ci	 * it was not even added to the transfer_log.
2608c2ecf20Sopenharmony_ci	 *
2618c2ecf20Sopenharmony_ci	 * read-ahead may fail, and will not be retried.
2628c2ecf20Sopenharmony_ci	 *
2638c2ecf20Sopenharmony_ci	 * WRITE should have used all available paths already.
2648c2ecf20Sopenharmony_ci	 */
2658c2ecf20Sopenharmony_ci	if (!ok &&
2668c2ecf20Sopenharmony_ci	    bio_op(req->master_bio) == REQ_OP_READ &&
2678c2ecf20Sopenharmony_ci	    !(req->master_bio->bi_opf & REQ_RAHEAD) &&
2688c2ecf20Sopenharmony_ci	    !list_empty(&req->tl_requests))
2698c2ecf20Sopenharmony_ci		req->rq_state |= RQ_POSTPONED;
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	if (!(req->rq_state & RQ_POSTPONED)) {
2728c2ecf20Sopenharmony_ci		m->error = ok ? 0 : (error ?: -EIO);
2738c2ecf20Sopenharmony_ci		m->bio = req->master_bio;
2748c2ecf20Sopenharmony_ci		req->master_bio = NULL;
2758c2ecf20Sopenharmony_ci		/* We leave it in the tree, to be able to verify later
2768c2ecf20Sopenharmony_ci		 * write-acks in protocol != C during resync.
2778c2ecf20Sopenharmony_ci		 * But we mark it as "complete", so it won't be counted as
2788c2ecf20Sopenharmony_ci		 * conflict in a multi-primary setup. */
2798c2ecf20Sopenharmony_ci		req->i.completed = true;
2808c2ecf20Sopenharmony_ci	}
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	if (req->i.waiting)
2838c2ecf20Sopenharmony_ci		wake_up(&device->misc_wait);
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci	/* Either we are about to complete to upper layers,
2868c2ecf20Sopenharmony_ci	 * or we will restart this request.
2878c2ecf20Sopenharmony_ci	 * In either case, the request object will be destroyed soon,
2888c2ecf20Sopenharmony_ci	 * so better remove it from all lists. */
2898c2ecf20Sopenharmony_ci	list_del_init(&req->req_pending_master_completion);
2908c2ecf20Sopenharmony_ci}
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci/* still holds resource->req_lock */
2938c2ecf20Sopenharmony_cistatic void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
2948c2ecf20Sopenharmony_ci{
2958c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
2968c2ecf20Sopenharmony_ci	D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED));
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	if (!put)
2998c2ecf20Sopenharmony_ci		return;
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	if (!atomic_sub_and_test(put, &req->completion_ref))
3028c2ecf20Sopenharmony_ci		return;
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	drbd_req_complete(req, m);
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	/* local completion may still come in later,
3078c2ecf20Sopenharmony_ci	 * we need to keep the req object around. */
3088c2ecf20Sopenharmony_ci	if (req->rq_state & RQ_LOCAL_ABORTED)
3098c2ecf20Sopenharmony_ci		return;
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	if (req->rq_state & RQ_POSTPONED) {
3128c2ecf20Sopenharmony_ci		/* don't destroy the req object just yet,
3138c2ecf20Sopenharmony_ci		 * but queue it for retry */
3148c2ecf20Sopenharmony_ci		drbd_restart_request(req);
3158c2ecf20Sopenharmony_ci		return;
3168c2ecf20Sopenharmony_ci	}
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	kref_put(&req->kref, drbd_req_destroy);
3198c2ecf20Sopenharmony_ci}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_cistatic void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
3228c2ecf20Sopenharmony_ci{
3238c2ecf20Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
3248c2ecf20Sopenharmony_ci	if (!connection)
3258c2ecf20Sopenharmony_ci		return;
3268c2ecf20Sopenharmony_ci	if (connection->req_next == NULL)
3278c2ecf20Sopenharmony_ci		connection->req_next = req;
3288c2ecf20Sopenharmony_ci}
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_cistatic void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
3318c2ecf20Sopenharmony_ci{
3328c2ecf20Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
3338c2ecf20Sopenharmony_ci	if (!connection)
3348c2ecf20Sopenharmony_ci		return;
3358c2ecf20Sopenharmony_ci	if (connection->req_next != req)
3368c2ecf20Sopenharmony_ci		return;
3378c2ecf20Sopenharmony_ci	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
3388c2ecf20Sopenharmony_ci		const unsigned s = req->rq_state;
3398c2ecf20Sopenharmony_ci		if (s & RQ_NET_QUEUED)
3408c2ecf20Sopenharmony_ci			break;
3418c2ecf20Sopenharmony_ci	}
3428c2ecf20Sopenharmony_ci	if (&req->tl_requests == &connection->transfer_log)
3438c2ecf20Sopenharmony_ci		req = NULL;
3448c2ecf20Sopenharmony_ci	connection->req_next = req;
3458c2ecf20Sopenharmony_ci}
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_cistatic void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
3488c2ecf20Sopenharmony_ci{
3498c2ecf20Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
3508c2ecf20Sopenharmony_ci	if (!connection)
3518c2ecf20Sopenharmony_ci		return;
3528c2ecf20Sopenharmony_ci	if (connection->req_ack_pending == NULL)
3538c2ecf20Sopenharmony_ci		connection->req_ack_pending = req;
3548c2ecf20Sopenharmony_ci}
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_cistatic void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
3578c2ecf20Sopenharmony_ci{
3588c2ecf20Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
3598c2ecf20Sopenharmony_ci	if (!connection)
3608c2ecf20Sopenharmony_ci		return;
3618c2ecf20Sopenharmony_ci	if (connection->req_ack_pending != req)
3628c2ecf20Sopenharmony_ci		return;
3638c2ecf20Sopenharmony_ci	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
3648c2ecf20Sopenharmony_ci		const unsigned s = req->rq_state;
3658c2ecf20Sopenharmony_ci		if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
3668c2ecf20Sopenharmony_ci			break;
3678c2ecf20Sopenharmony_ci	}
3688c2ecf20Sopenharmony_ci	if (&req->tl_requests == &connection->transfer_log)
3698c2ecf20Sopenharmony_ci		req = NULL;
3708c2ecf20Sopenharmony_ci	connection->req_ack_pending = req;
3718c2ecf20Sopenharmony_ci}
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_cistatic void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
3748c2ecf20Sopenharmony_ci{
3758c2ecf20Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
3768c2ecf20Sopenharmony_ci	if (!connection)
3778c2ecf20Sopenharmony_ci		return;
3788c2ecf20Sopenharmony_ci	if (connection->req_not_net_done == NULL)
3798c2ecf20Sopenharmony_ci		connection->req_not_net_done = req;
3808c2ecf20Sopenharmony_ci}
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_cistatic void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
3838c2ecf20Sopenharmony_ci{
3848c2ecf20Sopenharmony_ci	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
3858c2ecf20Sopenharmony_ci	if (!connection)
3868c2ecf20Sopenharmony_ci		return;
3878c2ecf20Sopenharmony_ci	if (connection->req_not_net_done != req)
3888c2ecf20Sopenharmony_ci		return;
3898c2ecf20Sopenharmony_ci	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
3908c2ecf20Sopenharmony_ci		const unsigned s = req->rq_state;
3918c2ecf20Sopenharmony_ci		if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
3928c2ecf20Sopenharmony_ci			break;
3938c2ecf20Sopenharmony_ci	}
3948c2ecf20Sopenharmony_ci	if (&req->tl_requests == &connection->transfer_log)
3958c2ecf20Sopenharmony_ci		req = NULL;
3968c2ecf20Sopenharmony_ci	connection->req_not_net_done = req;
3978c2ecf20Sopenharmony_ci}
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci/* I'd like this to be the only place that manipulates
4008c2ecf20Sopenharmony_ci * req->completion_ref and req->kref. */
4018c2ecf20Sopenharmony_cistatic void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
4028c2ecf20Sopenharmony_ci		int clear, int set)
4038c2ecf20Sopenharmony_ci{
4048c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
4058c2ecf20Sopenharmony_ci	struct drbd_peer_device *peer_device = first_peer_device(device);
4068c2ecf20Sopenharmony_ci	unsigned s = req->rq_state;
4078c2ecf20Sopenharmony_ci	int c_put = 0;
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP))
4108c2ecf20Sopenharmony_ci		set |= RQ_COMPLETION_SUSP;
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci	/* apply */
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	req->rq_state &= ~clear;
4158c2ecf20Sopenharmony_ci	req->rq_state |= set;
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	/* no change? */
4188c2ecf20Sopenharmony_ci	if (req->rq_state == s)
4198c2ecf20Sopenharmony_ci		return;
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci	/* intent: get references */
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci	kref_get(&req->kref);
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING))
4268c2ecf20Sopenharmony_ci		atomic_inc(&req->completion_ref);
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) {
4298c2ecf20Sopenharmony_ci		inc_ap_pending(device);
4308c2ecf20Sopenharmony_ci		atomic_inc(&req->completion_ref);
4318c2ecf20Sopenharmony_ci	}
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
4348c2ecf20Sopenharmony_ci		atomic_inc(&req->completion_ref);
4358c2ecf20Sopenharmony_ci		set_if_null_req_next(peer_device, req);
4368c2ecf20Sopenharmony_ci	}
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
4398c2ecf20Sopenharmony_ci		kref_get(&req->kref); /* wait for the DONE */
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
4428c2ecf20Sopenharmony_ci		/* potentially already completed in the ack_receiver thread */
4438c2ecf20Sopenharmony_ci		if (!(s & RQ_NET_DONE)) {
4448c2ecf20Sopenharmony_ci			atomic_add(req->i.size >> 9, &device->ap_in_flight);
4458c2ecf20Sopenharmony_ci			set_if_null_req_not_net_done(peer_device, req);
4468c2ecf20Sopenharmony_ci		}
4478c2ecf20Sopenharmony_ci		if (req->rq_state & RQ_NET_PENDING)
4488c2ecf20Sopenharmony_ci			set_if_null_req_ack_pending(peer_device, req);
4498c2ecf20Sopenharmony_ci	}
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
4528c2ecf20Sopenharmony_ci		atomic_inc(&req->completion_ref);
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	/* progress: put references */
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP))
4578c2ecf20Sopenharmony_ci		++c_put;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) {
4608c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING);
4618c2ecf20Sopenharmony_ci		++c_put;
4628c2ecf20Sopenharmony_ci	}
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci	if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) {
4658c2ecf20Sopenharmony_ci		if (req->rq_state & RQ_LOCAL_ABORTED)
4668c2ecf20Sopenharmony_ci			kref_put(&req->kref, drbd_req_destroy);
4678c2ecf20Sopenharmony_ci		else
4688c2ecf20Sopenharmony_ci			++c_put;
4698c2ecf20Sopenharmony_ci		list_del_init(&req->req_pending_local);
4708c2ecf20Sopenharmony_ci	}
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci	if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
4738c2ecf20Sopenharmony_ci		dec_ap_pending(device);
4748c2ecf20Sopenharmony_ci		++c_put;
4758c2ecf20Sopenharmony_ci		req->acked_jif = jiffies;
4768c2ecf20Sopenharmony_ci		advance_conn_req_ack_pending(peer_device, req);
4778c2ecf20Sopenharmony_ci	}
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
4808c2ecf20Sopenharmony_ci		++c_put;
4818c2ecf20Sopenharmony_ci		advance_conn_req_next(peer_device, req);
4828c2ecf20Sopenharmony_ci	}
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
4858c2ecf20Sopenharmony_ci		if (s & RQ_NET_SENT)
4868c2ecf20Sopenharmony_ci			atomic_sub(req->i.size >> 9, &device->ap_in_flight);
4878c2ecf20Sopenharmony_ci		if (s & RQ_EXP_BARR_ACK)
4888c2ecf20Sopenharmony_ci			kref_put(&req->kref, drbd_req_destroy);
4898c2ecf20Sopenharmony_ci		req->net_done_jif = jiffies;
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci		/* in ahead/behind mode, or just in case,
4928c2ecf20Sopenharmony_ci		 * before we finally destroy this request,
4938c2ecf20Sopenharmony_ci		 * the caching pointers must not reference it anymore */
4948c2ecf20Sopenharmony_ci		advance_conn_req_next(peer_device, req);
4958c2ecf20Sopenharmony_ci		advance_conn_req_ack_pending(peer_device, req);
4968c2ecf20Sopenharmony_ci		advance_conn_req_not_net_done(peer_device, req);
4978c2ecf20Sopenharmony_ci	}
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci	/* potentially complete and destroy */
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci	/* If we made progress, retry conflicting peer requests, if any. */
5028c2ecf20Sopenharmony_ci	if (req->i.waiting)
5038c2ecf20Sopenharmony_ci		wake_up(&device->misc_wait);
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	drbd_req_put_completion_ref(req, m, c_put);
5068c2ecf20Sopenharmony_ci	kref_put(&req->kref, drbd_req_destroy);
5078c2ecf20Sopenharmony_ci}
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_cistatic void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
5108c2ecf20Sopenharmony_ci{
5118c2ecf20Sopenharmony_ci        char b[BDEVNAME_SIZE];
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	if (!__ratelimit(&drbd_ratelimit_state))
5148c2ecf20Sopenharmony_ci		return;
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	drbd_warn(device, "local %s IO error sector %llu+%u on %s\n",
5178c2ecf20Sopenharmony_ci			(req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
5188c2ecf20Sopenharmony_ci			(unsigned long long)req->i.sector,
5198c2ecf20Sopenharmony_ci			req->i.size >> 9,
5208c2ecf20Sopenharmony_ci			bdevname(device->ldev->backing_bdev, b));
5218c2ecf20Sopenharmony_ci}
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci/* Helper for HANDED_OVER_TO_NETWORK.
5248c2ecf20Sopenharmony_ci * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)?
5258c2ecf20Sopenharmony_ci * Is it also still "PENDING"?
5268c2ecf20Sopenharmony_ci * --> If so, clear PENDING and set NET_OK below.
5278c2ecf20Sopenharmony_ci * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster
5288c2ecf20Sopenharmony_ci * (and we must not set RQ_NET_OK) */
5298c2ecf20Sopenharmony_cistatic inline bool is_pending_write_protocol_A(struct drbd_request *req)
5308c2ecf20Sopenharmony_ci{
5318c2ecf20Sopenharmony_ci	return (req->rq_state &
5328c2ecf20Sopenharmony_ci		   (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK))
5338c2ecf20Sopenharmony_ci		== (RQ_WRITE|RQ_NET_PENDING);
5348c2ecf20Sopenharmony_ci}
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci/* obviously this could be coded as many single functions
5378c2ecf20Sopenharmony_ci * instead of one huge switch,
5388c2ecf20Sopenharmony_ci * or by putting the code directly in the respective locations
5398c2ecf20Sopenharmony_ci * (as it has been before).
5408c2ecf20Sopenharmony_ci *
5418c2ecf20Sopenharmony_ci * but having it this way
5428c2ecf20Sopenharmony_ci *  enforces that it is all in this one place, where it is easier to audit,
5438c2ecf20Sopenharmony_ci *  it makes it obvious that whatever "event" "happens" to a request should
5448c2ecf20Sopenharmony_ci *  happen "atomically" within the req_lock,
5458c2ecf20Sopenharmony_ci *  and it enforces that we have to think in a very structured manner
5468c2ecf20Sopenharmony_ci *  about the "events" that may happen to a request during its life time ...
5478c2ecf20Sopenharmony_ci */
5488c2ecf20Sopenharmony_ciint __req_mod(struct drbd_request *req, enum drbd_req_event what,
5498c2ecf20Sopenharmony_ci		struct bio_and_error *m)
5508c2ecf20Sopenharmony_ci{
5518c2ecf20Sopenharmony_ci	struct drbd_device *const device = req->device;
5528c2ecf20Sopenharmony_ci	struct drbd_peer_device *const peer_device = first_peer_device(device);
5538c2ecf20Sopenharmony_ci	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
5548c2ecf20Sopenharmony_ci	struct net_conf *nc;
5558c2ecf20Sopenharmony_ci	int p, rv = 0;
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci	if (m)
5588c2ecf20Sopenharmony_ci		m->bio = NULL;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	switch (what) {
5618c2ecf20Sopenharmony_ci	default:
5628c2ecf20Sopenharmony_ci		drbd_err(device, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
5638c2ecf20Sopenharmony_ci		break;
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci	/* does not happen...
5668c2ecf20Sopenharmony_ci	 * initialization done in drbd_req_new
5678c2ecf20Sopenharmony_ci	case CREATED:
5688c2ecf20Sopenharmony_ci		break;
5698c2ecf20Sopenharmony_ci		*/
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	case TO_BE_SENT: /* via network */
5728c2ecf20Sopenharmony_ci		/* reached via __drbd_make_request
5738c2ecf20Sopenharmony_ci		 * and from w_read_retry_remote */
5748c2ecf20Sopenharmony_ci		D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
5758c2ecf20Sopenharmony_ci		rcu_read_lock();
5768c2ecf20Sopenharmony_ci		nc = rcu_dereference(connection->net_conf);
5778c2ecf20Sopenharmony_ci		p = nc->wire_protocol;
5788c2ecf20Sopenharmony_ci		rcu_read_unlock();
5798c2ecf20Sopenharmony_ci		req->rq_state |=
5808c2ecf20Sopenharmony_ci			p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
5818c2ecf20Sopenharmony_ci			p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
5828c2ecf20Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_PENDING);
5838c2ecf20Sopenharmony_ci		break;
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_ci	case TO_BE_SUBMITTED: /* locally */
5868c2ecf20Sopenharmony_ci		/* reached via __drbd_make_request */
5878c2ecf20Sopenharmony_ci		D_ASSERT(device, !(req->rq_state & RQ_LOCAL_MASK));
5888c2ecf20Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_LOCAL_PENDING);
5898c2ecf20Sopenharmony_ci		break;
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci	case COMPLETED_OK:
5928c2ecf20Sopenharmony_ci		if (req->rq_state & RQ_WRITE)
5938c2ecf20Sopenharmony_ci			device->writ_cnt += req->i.size >> 9;
5948c2ecf20Sopenharmony_ci		else
5958c2ecf20Sopenharmony_ci			device->read_cnt += req->i.size >> 9;
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_LOCAL_PENDING,
5988c2ecf20Sopenharmony_ci				RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
5998c2ecf20Sopenharmony_ci		break;
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci	case ABORT_DISK_IO:
6028c2ecf20Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED);
6038c2ecf20Sopenharmony_ci		break;
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	case WRITE_COMPLETED_WITH_ERROR:
6068c2ecf20Sopenharmony_ci		drbd_report_io_error(device, req);
6078c2ecf20Sopenharmony_ci		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
6088c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
6098c2ecf20Sopenharmony_ci		break;
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	case READ_COMPLETED_WITH_ERROR:
6128c2ecf20Sopenharmony_ci		drbd_set_out_of_sync(device, req->i.sector, req->i.size);
6138c2ecf20Sopenharmony_ci		drbd_report_io_error(device, req);
6148c2ecf20Sopenharmony_ci		__drbd_chk_io_error(device, DRBD_READ_ERROR);
6158c2ecf20Sopenharmony_ci		fallthrough;
6168c2ecf20Sopenharmony_ci	case READ_AHEAD_COMPLETED_WITH_ERROR:
6178c2ecf20Sopenharmony_ci		/* it is legal to fail read-ahead, no __drbd_chk_io_error in that case. */
6188c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
6198c2ecf20Sopenharmony_ci		break;
6208c2ecf20Sopenharmony_ci
6218c2ecf20Sopenharmony_ci	case DISCARD_COMPLETED_NOTSUPP:
6228c2ecf20Sopenharmony_ci	case DISCARD_COMPLETED_WITH_ERROR:
6238c2ecf20Sopenharmony_ci		/* I'd rather not detach from local disk just because it
6248c2ecf20Sopenharmony_ci		 * failed a REQ_OP_DISCARD. */
6258c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
6268c2ecf20Sopenharmony_ci		break;
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci	case QUEUE_FOR_NET_READ:
6298c2ecf20Sopenharmony_ci		/* READ, and
6308c2ecf20Sopenharmony_ci		 * no local disk,
6318c2ecf20Sopenharmony_ci		 * or target area marked as invalid,
6328c2ecf20Sopenharmony_ci		 * or just got an io-error. */
6338c2ecf20Sopenharmony_ci		/* from __drbd_make_request
6348c2ecf20Sopenharmony_ci		 * or from bio_endio during read io-error recovery */
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_ci		/* So we can verify the handle in the answer packet.
6378c2ecf20Sopenharmony_ci		 * Corresponding drbd_remove_request_interval is in
6388c2ecf20Sopenharmony_ci		 * drbd_req_complete() */
6398c2ecf20Sopenharmony_ci		D_ASSERT(device, drbd_interval_empty(&req->i));
6408c2ecf20Sopenharmony_ci		drbd_insert_interval(&device->read_requests, &req->i);
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_ci		set_bit(UNPLUG_REMOTE, &device->flags);
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
6458c2ecf20Sopenharmony_ci		D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
6468c2ecf20Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_QUEUED);
6478c2ecf20Sopenharmony_ci		req->w.cb = w_send_read_req;
6488c2ecf20Sopenharmony_ci		drbd_queue_work(&connection->sender_work,
6498c2ecf20Sopenharmony_ci				&req->w);
6508c2ecf20Sopenharmony_ci		break;
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	case QUEUE_FOR_NET_WRITE:
6538c2ecf20Sopenharmony_ci		/* assert something? */
6548c2ecf20Sopenharmony_ci		/* from __drbd_make_request only */
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_ci		/* Corresponding drbd_remove_request_interval is in
6578c2ecf20Sopenharmony_ci		 * drbd_req_complete() */
6588c2ecf20Sopenharmony_ci		D_ASSERT(device, drbd_interval_empty(&req->i));
6598c2ecf20Sopenharmony_ci		drbd_insert_interval(&device->write_requests, &req->i);
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ci		/* NOTE
6628c2ecf20Sopenharmony_ci		 * In case the req ended up on the transfer log before being
6638c2ecf20Sopenharmony_ci		 * queued on the worker, it could lead to this request being
6648c2ecf20Sopenharmony_ci		 * missed during cleanup after connection loss.
6658c2ecf20Sopenharmony_ci		 * So we have to do both operations here,
6668c2ecf20Sopenharmony_ci		 * within the same lock that protects the transfer log.
6678c2ecf20Sopenharmony_ci		 *
6688c2ecf20Sopenharmony_ci		 * _req_add_to_epoch(req); this has to be after the
6698c2ecf20Sopenharmony_ci		 * _maybe_start_new_epoch(req); which happened in
6708c2ecf20Sopenharmony_ci		 * __drbd_make_request, because we now may set the bit
6718c2ecf20Sopenharmony_ci		 * again ourselves to close the current epoch.
6728c2ecf20Sopenharmony_ci		 *
6738c2ecf20Sopenharmony_ci		 * Add req to the (now) current epoch (barrier). */
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ci		/* otherwise we may lose an unplug, which may cause some remote
6768c2ecf20Sopenharmony_ci		 * io-scheduler timeout to expire, increasing maximum latency,
6778c2ecf20Sopenharmony_ci		 * hurting performance. */
6788c2ecf20Sopenharmony_ci		set_bit(UNPLUG_REMOTE, &device->flags);
6798c2ecf20Sopenharmony_ci
6808c2ecf20Sopenharmony_ci		/* queue work item to send data */
6818c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
6828c2ecf20Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
6838c2ecf20Sopenharmony_ci		req->w.cb =  w_send_dblock;
6848c2ecf20Sopenharmony_ci		drbd_queue_work(&connection->sender_work,
6858c2ecf20Sopenharmony_ci				&req->w);
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci		/* close the epoch, in case it outgrew the limit */
6888c2ecf20Sopenharmony_ci		rcu_read_lock();
6898c2ecf20Sopenharmony_ci		nc = rcu_dereference(connection->net_conf);
6908c2ecf20Sopenharmony_ci		p = nc->max_epoch_size;
6918c2ecf20Sopenharmony_ci		rcu_read_unlock();
6928c2ecf20Sopenharmony_ci		if (connection->current_tle_writes >= p)
6938c2ecf20Sopenharmony_ci			start_new_tl_epoch(connection);
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci		break;
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_ci	case QUEUE_FOR_SEND_OOS:
6988c2ecf20Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_QUEUED);
6998c2ecf20Sopenharmony_ci		req->w.cb =  w_send_out_of_sync;
7008c2ecf20Sopenharmony_ci		drbd_queue_work(&connection->sender_work,
7018c2ecf20Sopenharmony_ci				&req->w);
7028c2ecf20Sopenharmony_ci		break;
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_ci	case READ_RETRY_REMOTE_CANCELED:
7058c2ecf20Sopenharmony_ci	case SEND_CANCELED:
7068c2ecf20Sopenharmony_ci	case SEND_FAILED:
7078c2ecf20Sopenharmony_ci		/* real cleanup will be done from tl_clear.  just update flags
7088c2ecf20Sopenharmony_ci		 * so it is no longer marked as on the worker queue */
7098c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_QUEUED, 0);
7108c2ecf20Sopenharmony_ci		break;
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	case HANDED_OVER_TO_NETWORK:
7138c2ecf20Sopenharmony_ci		/* assert something? */
7148c2ecf20Sopenharmony_ci		if (is_pending_write_protocol_A(req))
7158c2ecf20Sopenharmony_ci			/* this is what is dangerous about protocol A:
7168c2ecf20Sopenharmony_ci			 * pretend it was successfully written on the peer. */
7178c2ecf20Sopenharmony_ci			mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING,
7188c2ecf20Sopenharmony_ci						RQ_NET_SENT|RQ_NET_OK);
7198c2ecf20Sopenharmony_ci		else
7208c2ecf20Sopenharmony_ci			mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
7218c2ecf20Sopenharmony_ci		/* It is still not yet RQ_NET_DONE until the
7228c2ecf20Sopenharmony_ci		 * corresponding epoch barrier got acked as well,
7238c2ecf20Sopenharmony_ci		 * so we know what to dirty on connection loss. */
7248c2ecf20Sopenharmony_ci		break;
7258c2ecf20Sopenharmony_ci
7268c2ecf20Sopenharmony_ci	case OOS_HANDED_TO_NETWORK:
7278c2ecf20Sopenharmony_ci		/* Was not set PENDING, no longer QUEUED, so is now DONE
7288c2ecf20Sopenharmony_ci		 * as far as this connection is concerned. */
7298c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE);
7308c2ecf20Sopenharmony_ci		break;
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci	case CONNECTION_LOST_WHILE_PENDING:
7338c2ecf20Sopenharmony_ci		/* transfer log cleanup after connection loss */
7348c2ecf20Sopenharmony_ci		mod_rq_state(req, m,
7358c2ecf20Sopenharmony_ci				RQ_NET_OK|RQ_NET_PENDING|RQ_COMPLETION_SUSP,
7368c2ecf20Sopenharmony_ci				RQ_NET_DONE);
7378c2ecf20Sopenharmony_ci		break;
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci	case CONFLICT_RESOLVED:
7408c2ecf20Sopenharmony_ci		/* for superseded conflicting writes of multiple primaries,
7418c2ecf20Sopenharmony_ci		 * there is no need to keep anything in the tl, potential
7428c2ecf20Sopenharmony_ci		 * node crashes are covered by the activity log.
7438c2ecf20Sopenharmony_ci		 *
7448c2ecf20Sopenharmony_ci		 * If this request had been marked as RQ_POSTPONED before,
7458c2ecf20Sopenharmony_ci		 * it will actually not be completed, but "restarted",
7468c2ecf20Sopenharmony_ci		 * resubmitted from the retry worker context. */
7478c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
7488c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
7498c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE|RQ_NET_OK);
7508c2ecf20Sopenharmony_ci		break;
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	case WRITE_ACKED_BY_PEER_AND_SIS:
7538c2ecf20Sopenharmony_ci		req->rq_state |= RQ_NET_SIS;
7548c2ecf20Sopenharmony_ci	case WRITE_ACKED_BY_PEER:
7558c2ecf20Sopenharmony_ci		/* Normal operation protocol C: successfully written on peer.
7568c2ecf20Sopenharmony_ci		 * During resync, even in protocol != C,
7578c2ecf20Sopenharmony_ci		 * we requested an explicit write ack anyways.
7588c2ecf20Sopenharmony_ci		 * Which means we cannot even assert anything here.
7598c2ecf20Sopenharmony_ci		 * Nothing more to do here.
7608c2ecf20Sopenharmony_ci		 * We want to keep the tl in place for all protocols, to cater
7618c2ecf20Sopenharmony_ci		 * for volatile write-back caches on lower level devices. */
7628c2ecf20Sopenharmony_ci		goto ack_common;
7638c2ecf20Sopenharmony_ci	case RECV_ACKED_BY_PEER:
7648c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
7658c2ecf20Sopenharmony_ci		/* protocol B; pretends to be successfully written on peer.
7668c2ecf20Sopenharmony_ci		 * see also notes above in HANDED_OVER_TO_NETWORK about
7678c2ecf20Sopenharmony_ci		 * protocol != C */
7688c2ecf20Sopenharmony_ci	ack_common:
7698c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
7708c2ecf20Sopenharmony_ci		break;
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	case POSTPONE_WRITE:
7738c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
7748c2ecf20Sopenharmony_ci		/* If this node has already detected the write conflict, the
7758c2ecf20Sopenharmony_ci		 * worker will be waiting on misc_wait.  Wake it up once this
7768c2ecf20Sopenharmony_ci		 * request has completed locally.
7778c2ecf20Sopenharmony_ci		 */
7788c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
7798c2ecf20Sopenharmony_ci		req->rq_state |= RQ_POSTPONED;
7808c2ecf20Sopenharmony_ci		if (req->i.waiting)
7818c2ecf20Sopenharmony_ci			wake_up(&device->misc_wait);
7828c2ecf20Sopenharmony_ci		/* Do not clear RQ_NET_PENDING. This request will make further
7838c2ecf20Sopenharmony_ci		 * progress via restart_conflicting_writes() or
7848c2ecf20Sopenharmony_ci		 * fail_postponed_requests(). Hopefully. */
7858c2ecf20Sopenharmony_ci		break;
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci	case NEG_ACKED:
7888c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, 0);
7898c2ecf20Sopenharmony_ci		break;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	case FAIL_FROZEN_DISK_IO:
7928c2ecf20Sopenharmony_ci		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
7938c2ecf20Sopenharmony_ci			break;
7948c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
7958c2ecf20Sopenharmony_ci		break;
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	case RESTART_FROZEN_DISK_IO:
7988c2ecf20Sopenharmony_ci		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
7998c2ecf20Sopenharmony_ci			break;
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci		mod_rq_state(req, m,
8028c2ecf20Sopenharmony_ci				RQ_COMPLETION_SUSP|RQ_LOCAL_COMPLETED,
8038c2ecf20Sopenharmony_ci				RQ_LOCAL_PENDING);
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci		rv = MR_READ;
8068c2ecf20Sopenharmony_ci		if (bio_data_dir(req->master_bio) == WRITE)
8078c2ecf20Sopenharmony_ci			rv = MR_WRITE;
8088c2ecf20Sopenharmony_ci
8098c2ecf20Sopenharmony_ci		get_ldev(device); /* always succeeds in this call path */
8108c2ecf20Sopenharmony_ci		req->w.cb = w_restart_disk_io;
8118c2ecf20Sopenharmony_ci		drbd_queue_work(&connection->sender_work,
8128c2ecf20Sopenharmony_ci				&req->w);
8138c2ecf20Sopenharmony_ci		break;
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	case RESEND:
8168c2ecf20Sopenharmony_ci		/* Simply complete (local only) READs. */
8178c2ecf20Sopenharmony_ci		if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
8188c2ecf20Sopenharmony_ci			mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
8198c2ecf20Sopenharmony_ci			break;
8208c2ecf20Sopenharmony_ci		}
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
8238c2ecf20Sopenharmony_ci		   before the connection loss (B&C only); only P_BARRIER_ACK
8248c2ecf20Sopenharmony_ci		   (or the local completion?) was missing when we suspended.
8258c2ecf20Sopenharmony_ci		   Throwing them out of the TL here by pretending we got a BARRIER_ACK.
8268c2ecf20Sopenharmony_ci		   During connection handshake, we ensure that the peer was not rebooted. */
8278c2ecf20Sopenharmony_ci		if (!(req->rq_state & RQ_NET_OK)) {
8288c2ecf20Sopenharmony_ci			/* FIXME could this possibly be a req->dw.cb == w_send_out_of_sync?
8298c2ecf20Sopenharmony_ci			 * in that case we must not set RQ_NET_PENDING. */
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci			mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
8328c2ecf20Sopenharmony_ci			if (req->w.cb) {
8338c2ecf20Sopenharmony_ci				/* w.cb expected to be w_send_dblock, or w_send_read_req */
8348c2ecf20Sopenharmony_ci				drbd_queue_work(&connection->sender_work,
8358c2ecf20Sopenharmony_ci						&req->w);
8368c2ecf20Sopenharmony_ci				rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
8378c2ecf20Sopenharmony_ci			} /* else: FIXME can this happen? */
8388c2ecf20Sopenharmony_ci			break;
8398c2ecf20Sopenharmony_ci		}
8408c2ecf20Sopenharmony_ci		fallthrough;	/* to BARRIER_ACKED */
8418c2ecf20Sopenharmony_ci
8428c2ecf20Sopenharmony_ci	case BARRIER_ACKED:
8438c2ecf20Sopenharmony_ci		/* barrier ack for READ requests does not make sense */
8448c2ecf20Sopenharmony_ci		if (!(req->rq_state & RQ_WRITE))
8458c2ecf20Sopenharmony_ci			break;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci		if (req->rq_state & RQ_NET_PENDING) {
8488c2ecf20Sopenharmony_ci			/* barrier came in before all requests were acked.
8498c2ecf20Sopenharmony_ci			 * this is bad, because if the connection is lost now,
8508c2ecf20Sopenharmony_ci			 * we won't be able to clean them up... */
8518c2ecf20Sopenharmony_ci			drbd_err(device, "FIXME (BARRIER_ACKED but pending)\n");
8528c2ecf20Sopenharmony_ci		}
8538c2ecf20Sopenharmony_ci		/* Allowed to complete requests, even while suspended.
8548c2ecf20Sopenharmony_ci		 * As this is called for all requests within a matching epoch,
8558c2ecf20Sopenharmony_ci		 * we need to filter, and only set RQ_NET_DONE for those that
8568c2ecf20Sopenharmony_ci		 * have actually been on the wire. */
8578c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_COMPLETION_SUSP,
8588c2ecf20Sopenharmony_ci				(req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0);
8598c2ecf20Sopenharmony_ci		break;
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci	case DATA_RECEIVED:
8628c2ecf20Sopenharmony_ci		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
8638c2ecf20Sopenharmony_ci		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
8648c2ecf20Sopenharmony_ci		break;
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci	case QUEUE_AS_DRBD_BARRIER:
8678c2ecf20Sopenharmony_ci		start_new_tl_epoch(connection);
8688c2ecf20Sopenharmony_ci		mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
8698c2ecf20Sopenharmony_ci		break;
8708c2ecf20Sopenharmony_ci	}
8718c2ecf20Sopenharmony_ci
8728c2ecf20Sopenharmony_ci	return rv;
8738c2ecf20Sopenharmony_ci}
8748c2ecf20Sopenharmony_ci
8758c2ecf20Sopenharmony_ci/* we may do a local read if:
8768c2ecf20Sopenharmony_ci * - we are consistent (of course),
8778c2ecf20Sopenharmony_ci * - or we are generally inconsistent,
8788c2ecf20Sopenharmony_ci *   BUT we are still/already IN SYNC for this area.
8798c2ecf20Sopenharmony_ci *   since size may be bigger than BM_BLOCK_SIZE,
8808c2ecf20Sopenharmony_ci *   we may need to check several bits.
8818c2ecf20Sopenharmony_ci */
8828c2ecf20Sopenharmony_cistatic bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size)
8838c2ecf20Sopenharmony_ci{
8848c2ecf20Sopenharmony_ci	unsigned long sbnr, ebnr;
8858c2ecf20Sopenharmony_ci	sector_t esector, nr_sectors;
8868c2ecf20Sopenharmony_ci
8878c2ecf20Sopenharmony_ci	if (device->state.disk == D_UP_TO_DATE)
8888c2ecf20Sopenharmony_ci		return true;
8898c2ecf20Sopenharmony_ci	if (device->state.disk != D_INCONSISTENT)
8908c2ecf20Sopenharmony_ci		return false;
8918c2ecf20Sopenharmony_ci	esector = sector + (size >> 9) - 1;
8928c2ecf20Sopenharmony_ci	nr_sectors = get_capacity(device->vdisk);
8938c2ecf20Sopenharmony_ci	D_ASSERT(device, sector  < nr_sectors);
8948c2ecf20Sopenharmony_ci	D_ASSERT(device, esector < nr_sectors);
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci	sbnr = BM_SECT_TO_BIT(sector);
8978c2ecf20Sopenharmony_ci	ebnr = BM_SECT_TO_BIT(esector);
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci	return drbd_bm_count_bits(device, sbnr, ebnr) == 0;
9008c2ecf20Sopenharmony_ci}
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_cistatic bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector,
9038c2ecf20Sopenharmony_ci		enum drbd_read_balancing rbm)
9048c2ecf20Sopenharmony_ci{
9058c2ecf20Sopenharmony_ci	struct backing_dev_info *bdi;
9068c2ecf20Sopenharmony_ci	int stripe_shift;
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ci	switch (rbm) {
9098c2ecf20Sopenharmony_ci	case RB_CONGESTED_REMOTE:
9108c2ecf20Sopenharmony_ci		bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
9118c2ecf20Sopenharmony_ci		return bdi_read_congested(bdi);
9128c2ecf20Sopenharmony_ci	case RB_LEAST_PENDING:
9138c2ecf20Sopenharmony_ci		return atomic_read(&device->local_cnt) >
9148c2ecf20Sopenharmony_ci			atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
9158c2ecf20Sopenharmony_ci	case RB_32K_STRIPING:  /* stripe_shift = 15 */
9168c2ecf20Sopenharmony_ci	case RB_64K_STRIPING:
9178c2ecf20Sopenharmony_ci	case RB_128K_STRIPING:
9188c2ecf20Sopenharmony_ci	case RB_256K_STRIPING:
9198c2ecf20Sopenharmony_ci	case RB_512K_STRIPING:
9208c2ecf20Sopenharmony_ci	case RB_1M_STRIPING:   /* stripe_shift = 20 */
9218c2ecf20Sopenharmony_ci		stripe_shift = (rbm - RB_32K_STRIPING + 15);
9228c2ecf20Sopenharmony_ci		return (sector >> (stripe_shift - 9)) & 1;
9238c2ecf20Sopenharmony_ci	case RB_ROUND_ROBIN:
9248c2ecf20Sopenharmony_ci		return test_and_change_bit(READ_BALANCE_RR, &device->flags);
9258c2ecf20Sopenharmony_ci	case RB_PREFER_REMOTE:
9268c2ecf20Sopenharmony_ci		return true;
9278c2ecf20Sopenharmony_ci	case RB_PREFER_LOCAL:
9288c2ecf20Sopenharmony_ci	default:
9298c2ecf20Sopenharmony_ci		return false;
9308c2ecf20Sopenharmony_ci	}
9318c2ecf20Sopenharmony_ci}
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci/*
9348c2ecf20Sopenharmony_ci * complete_conflicting_writes  -  wait for any conflicting write requests
9358c2ecf20Sopenharmony_ci *
9368c2ecf20Sopenharmony_ci * The write_requests tree contains all active write requests which we
9378c2ecf20Sopenharmony_ci * currently know about.  Wait for any requests to complete which conflict with
9388c2ecf20Sopenharmony_ci * the new one.
9398c2ecf20Sopenharmony_ci *
9408c2ecf20Sopenharmony_ci * Only way out: remove the conflicting intervals from the tree.
9418c2ecf20Sopenharmony_ci */
9428c2ecf20Sopenharmony_cistatic void complete_conflicting_writes(struct drbd_request *req)
9438c2ecf20Sopenharmony_ci{
9448c2ecf20Sopenharmony_ci	DEFINE_WAIT(wait);
9458c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
9468c2ecf20Sopenharmony_ci	struct drbd_interval *i;
9478c2ecf20Sopenharmony_ci	sector_t sector = req->i.sector;
9488c2ecf20Sopenharmony_ci	int size = req->i.size;
9498c2ecf20Sopenharmony_ci
9508c2ecf20Sopenharmony_ci	for (;;) {
9518c2ecf20Sopenharmony_ci		drbd_for_each_overlap(i, &device->write_requests, sector, size) {
9528c2ecf20Sopenharmony_ci			/* Ignore, if already completed to upper layers. */
9538c2ecf20Sopenharmony_ci			if (i->completed)
9548c2ecf20Sopenharmony_ci				continue;
9558c2ecf20Sopenharmony_ci			/* Handle the first found overlap.  After the schedule
9568c2ecf20Sopenharmony_ci			 * we have to restart the tree walk. */
9578c2ecf20Sopenharmony_ci			break;
9588c2ecf20Sopenharmony_ci		}
9598c2ecf20Sopenharmony_ci		if (!i)	/* if any */
9608c2ecf20Sopenharmony_ci			break;
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci		/* Indicate to wake up device->misc_wait on progress.  */
9638c2ecf20Sopenharmony_ci		prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
9648c2ecf20Sopenharmony_ci		i->waiting = true;
9658c2ecf20Sopenharmony_ci		spin_unlock_irq(&device->resource->req_lock);
9668c2ecf20Sopenharmony_ci		schedule();
9678c2ecf20Sopenharmony_ci		spin_lock_irq(&device->resource->req_lock);
9688c2ecf20Sopenharmony_ci	}
9698c2ecf20Sopenharmony_ci	finish_wait(&device->misc_wait, &wait);
9708c2ecf20Sopenharmony_ci}
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_ci/* called within req_lock */
9738c2ecf20Sopenharmony_cistatic void maybe_pull_ahead(struct drbd_device *device)
9748c2ecf20Sopenharmony_ci{
9758c2ecf20Sopenharmony_ci	struct drbd_connection *connection = first_peer_device(device)->connection;
9768c2ecf20Sopenharmony_ci	struct net_conf *nc;
9778c2ecf20Sopenharmony_ci	bool congested = false;
9788c2ecf20Sopenharmony_ci	enum drbd_on_congestion on_congestion;
9798c2ecf20Sopenharmony_ci
9808c2ecf20Sopenharmony_ci	rcu_read_lock();
9818c2ecf20Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
9828c2ecf20Sopenharmony_ci	on_congestion = nc ? nc->on_congestion : OC_BLOCK;
9838c2ecf20Sopenharmony_ci	rcu_read_unlock();
9848c2ecf20Sopenharmony_ci	if (on_congestion == OC_BLOCK ||
9858c2ecf20Sopenharmony_ci	    connection->agreed_pro_version < 96)
9868c2ecf20Sopenharmony_ci		return;
9878c2ecf20Sopenharmony_ci
9888c2ecf20Sopenharmony_ci	if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
9898c2ecf20Sopenharmony_ci		return; /* nothing to do ... */
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_ci	/* If I don't even have good local storage, we can not reasonably try
9928c2ecf20Sopenharmony_ci	 * to pull ahead of the peer. We also need the local reference to make
9938c2ecf20Sopenharmony_ci	 * sure device->act_log is there.
9948c2ecf20Sopenharmony_ci	 */
9958c2ecf20Sopenharmony_ci	if (!get_ldev_if_state(device, D_UP_TO_DATE))
9968c2ecf20Sopenharmony_ci		return;
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci	if (nc->cong_fill &&
9998c2ecf20Sopenharmony_ci	    atomic_read(&device->ap_in_flight) >= nc->cong_fill) {
10008c2ecf20Sopenharmony_ci		drbd_info(device, "Congestion-fill threshold reached\n");
10018c2ecf20Sopenharmony_ci		congested = true;
10028c2ecf20Sopenharmony_ci	}
10038c2ecf20Sopenharmony_ci
10048c2ecf20Sopenharmony_ci	if (device->act_log->used >= nc->cong_extents) {
10058c2ecf20Sopenharmony_ci		drbd_info(device, "Congestion-extents threshold reached\n");
10068c2ecf20Sopenharmony_ci		congested = true;
10078c2ecf20Sopenharmony_ci	}
10088c2ecf20Sopenharmony_ci
10098c2ecf20Sopenharmony_ci	if (congested) {
10108c2ecf20Sopenharmony_ci		/* start a new epoch for non-mirrored writes */
10118c2ecf20Sopenharmony_ci		start_new_tl_epoch(first_peer_device(device)->connection);
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci		if (on_congestion == OC_PULL_AHEAD)
10148c2ecf20Sopenharmony_ci			_drbd_set_state(_NS(device, conn, C_AHEAD), 0, NULL);
10158c2ecf20Sopenharmony_ci		else  /*nc->on_congestion == OC_DISCONNECT */
10168c2ecf20Sopenharmony_ci			_drbd_set_state(_NS(device, conn, C_DISCONNECTING), 0, NULL);
10178c2ecf20Sopenharmony_ci	}
10188c2ecf20Sopenharmony_ci	put_ldev(device);
10198c2ecf20Sopenharmony_ci}
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_ci/* If this returns false, and req->private_bio is still set,
10228c2ecf20Sopenharmony_ci * this should be submitted locally.
10238c2ecf20Sopenharmony_ci *
10248c2ecf20Sopenharmony_ci * If it returns false, but req->private_bio is not set,
10258c2ecf20Sopenharmony_ci * we do not have access to good data :(
10268c2ecf20Sopenharmony_ci *
10278c2ecf20Sopenharmony_ci * Otherwise, this destroys req->private_bio, if any,
10288c2ecf20Sopenharmony_ci * and returns true.
10298c2ecf20Sopenharmony_ci */
10308c2ecf20Sopenharmony_cistatic bool do_remote_read(struct drbd_request *req)
10318c2ecf20Sopenharmony_ci{
10328c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
10338c2ecf20Sopenharmony_ci	enum drbd_read_balancing rbm;
10348c2ecf20Sopenharmony_ci
10358c2ecf20Sopenharmony_ci	if (req->private_bio) {
10368c2ecf20Sopenharmony_ci		if (!drbd_may_do_local_read(device,
10378c2ecf20Sopenharmony_ci					req->i.sector, req->i.size)) {
10388c2ecf20Sopenharmony_ci			bio_put(req->private_bio);
10398c2ecf20Sopenharmony_ci			req->private_bio = NULL;
10408c2ecf20Sopenharmony_ci			put_ldev(device);
10418c2ecf20Sopenharmony_ci		}
10428c2ecf20Sopenharmony_ci	}
10438c2ecf20Sopenharmony_ci
10448c2ecf20Sopenharmony_ci	if (device->state.pdsk != D_UP_TO_DATE)
10458c2ecf20Sopenharmony_ci		return false;
10468c2ecf20Sopenharmony_ci
10478c2ecf20Sopenharmony_ci	if (req->private_bio == NULL)
10488c2ecf20Sopenharmony_ci		return true;
10498c2ecf20Sopenharmony_ci
10508c2ecf20Sopenharmony_ci	/* TODO: improve read balancing decisions, take into account drbd
10518c2ecf20Sopenharmony_ci	 * protocol, pending requests etc. */
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	rcu_read_lock();
10548c2ecf20Sopenharmony_ci	rbm = rcu_dereference(device->ldev->disk_conf)->read_balancing;
10558c2ecf20Sopenharmony_ci	rcu_read_unlock();
10568c2ecf20Sopenharmony_ci
10578c2ecf20Sopenharmony_ci	if (rbm == RB_PREFER_LOCAL && req->private_bio)
10588c2ecf20Sopenharmony_ci		return false; /* submit locally */
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci	if (remote_due_to_read_balancing(device, req->i.sector, rbm)) {
10618c2ecf20Sopenharmony_ci		if (req->private_bio) {
10628c2ecf20Sopenharmony_ci			bio_put(req->private_bio);
10638c2ecf20Sopenharmony_ci			req->private_bio = NULL;
10648c2ecf20Sopenharmony_ci			put_ldev(device);
10658c2ecf20Sopenharmony_ci		}
10668c2ecf20Sopenharmony_ci		return true;
10678c2ecf20Sopenharmony_ci	}
10688c2ecf20Sopenharmony_ci
10698c2ecf20Sopenharmony_ci	return false;
10708c2ecf20Sopenharmony_ci}
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_cibool drbd_should_do_remote(union drbd_dev_state s)
10738c2ecf20Sopenharmony_ci{
10748c2ecf20Sopenharmony_ci	return s.pdsk == D_UP_TO_DATE ||
10758c2ecf20Sopenharmony_ci		(s.pdsk >= D_INCONSISTENT &&
10768c2ecf20Sopenharmony_ci		 s.conn >= C_WF_BITMAP_T &&
10778c2ecf20Sopenharmony_ci		 s.conn < C_AHEAD);
10788c2ecf20Sopenharmony_ci	/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
10798c2ecf20Sopenharmony_ci	   That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
10808c2ecf20Sopenharmony_ci	   states. */
10818c2ecf20Sopenharmony_ci}
10828c2ecf20Sopenharmony_ci
10838c2ecf20Sopenharmony_cistatic bool drbd_should_send_out_of_sync(union drbd_dev_state s)
10848c2ecf20Sopenharmony_ci{
10858c2ecf20Sopenharmony_ci	return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
10868c2ecf20Sopenharmony_ci	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
10878c2ecf20Sopenharmony_ci	   since we enter state C_AHEAD only if proto >= 96 */
10888c2ecf20Sopenharmony_ci}
10898c2ecf20Sopenharmony_ci
10908c2ecf20Sopenharmony_ci/* returns number of connections (== 1, for drbd 8.4)
10918c2ecf20Sopenharmony_ci * expected to actually write this data,
10928c2ecf20Sopenharmony_ci * which does NOT include those that we are L_AHEAD for. */
10938c2ecf20Sopenharmony_cistatic int drbd_process_write_request(struct drbd_request *req)
10948c2ecf20Sopenharmony_ci{
10958c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
10968c2ecf20Sopenharmony_ci	int remote, send_oos;
10978c2ecf20Sopenharmony_ci
10988c2ecf20Sopenharmony_ci	remote = drbd_should_do_remote(device->state);
10998c2ecf20Sopenharmony_ci	send_oos = drbd_should_send_out_of_sync(device->state);
11008c2ecf20Sopenharmony_ci
11018c2ecf20Sopenharmony_ci	/* Need to replicate writes.  Unless it is an empty flush,
11028c2ecf20Sopenharmony_ci	 * which is better mapped to a DRBD P_BARRIER packet,
11038c2ecf20Sopenharmony_ci	 * also for drbd wire protocol compatibility reasons.
11048c2ecf20Sopenharmony_ci	 * If this was a flush, just start a new epoch.
11058c2ecf20Sopenharmony_ci	 * Unless the current epoch was empty anyways, or we are not currently
11068c2ecf20Sopenharmony_ci	 * replicating, in which case there is no point. */
11078c2ecf20Sopenharmony_ci	if (unlikely(req->i.size == 0)) {
11088c2ecf20Sopenharmony_ci		/* The only size==0 bios we expect are empty flushes. */
11098c2ecf20Sopenharmony_ci		D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH);
11108c2ecf20Sopenharmony_ci		if (remote)
11118c2ecf20Sopenharmony_ci			_req_mod(req, QUEUE_AS_DRBD_BARRIER);
11128c2ecf20Sopenharmony_ci		return remote;
11138c2ecf20Sopenharmony_ci	}
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ci	if (!remote && !send_oos)
11168c2ecf20Sopenharmony_ci		return 0;
11178c2ecf20Sopenharmony_ci
11188c2ecf20Sopenharmony_ci	D_ASSERT(device, !(remote && send_oos));
11198c2ecf20Sopenharmony_ci
11208c2ecf20Sopenharmony_ci	if (remote) {
11218c2ecf20Sopenharmony_ci		_req_mod(req, TO_BE_SENT);
11228c2ecf20Sopenharmony_ci		_req_mod(req, QUEUE_FOR_NET_WRITE);
11238c2ecf20Sopenharmony_ci	} else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size))
11248c2ecf20Sopenharmony_ci		_req_mod(req, QUEUE_FOR_SEND_OOS);
11258c2ecf20Sopenharmony_ci
11268c2ecf20Sopenharmony_ci	return remote;
11278c2ecf20Sopenharmony_ci}
11288c2ecf20Sopenharmony_ci
11298c2ecf20Sopenharmony_cistatic void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags)
11308c2ecf20Sopenharmony_ci{
11318c2ecf20Sopenharmony_ci	int err = drbd_issue_discard_or_zero_out(req->device,
11328c2ecf20Sopenharmony_ci				req->i.sector, req->i.size >> 9, flags);
11338c2ecf20Sopenharmony_ci	if (err)
11348c2ecf20Sopenharmony_ci		req->private_bio->bi_status = BLK_STS_IOERR;
11358c2ecf20Sopenharmony_ci	bio_endio(req->private_bio);
11368c2ecf20Sopenharmony_ci}
11378c2ecf20Sopenharmony_ci
11388c2ecf20Sopenharmony_cistatic void
11398c2ecf20Sopenharmony_cidrbd_submit_req_private_bio(struct drbd_request *req)
11408c2ecf20Sopenharmony_ci{
11418c2ecf20Sopenharmony_ci	struct drbd_device *device = req->device;
11428c2ecf20Sopenharmony_ci	struct bio *bio = req->private_bio;
11438c2ecf20Sopenharmony_ci	unsigned int type;
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_ci	if (bio_op(bio) != REQ_OP_READ)
11468c2ecf20Sopenharmony_ci		type = DRBD_FAULT_DT_WR;
11478c2ecf20Sopenharmony_ci	else if (bio->bi_opf & REQ_RAHEAD)
11488c2ecf20Sopenharmony_ci		type = DRBD_FAULT_DT_RA;
11498c2ecf20Sopenharmony_ci	else
11508c2ecf20Sopenharmony_ci		type = DRBD_FAULT_DT_RD;
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci	bio_set_dev(bio, device->ldev->backing_bdev);
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_ci	/* State may have changed since we grabbed our reference on the
11558c2ecf20Sopenharmony_ci	 * ->ldev member. Double check, and short-circuit to endio.
11568c2ecf20Sopenharmony_ci	 * In case the last activity log transaction failed to get on
11578c2ecf20Sopenharmony_ci	 * stable storage, and this is a WRITE, we may not even submit
11588c2ecf20Sopenharmony_ci	 * this bio. */
11598c2ecf20Sopenharmony_ci	if (get_ldev(device)) {
11608c2ecf20Sopenharmony_ci		if (drbd_insert_fault(device, type))
11618c2ecf20Sopenharmony_ci			bio_io_error(bio);
11628c2ecf20Sopenharmony_ci		else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
11638c2ecf20Sopenharmony_ci			drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT |
11648c2ecf20Sopenharmony_ci			    ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM));
11658c2ecf20Sopenharmony_ci		else if (bio_op(bio) == REQ_OP_DISCARD)
11668c2ecf20Sopenharmony_ci			drbd_process_discard_or_zeroes_req(req, EE_TRIM);
11678c2ecf20Sopenharmony_ci		else
11688c2ecf20Sopenharmony_ci			submit_bio_noacct(bio);
11698c2ecf20Sopenharmony_ci		put_ldev(device);
11708c2ecf20Sopenharmony_ci	} else
11718c2ecf20Sopenharmony_ci		bio_io_error(bio);
11728c2ecf20Sopenharmony_ci}
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_cistatic void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
11758c2ecf20Sopenharmony_ci{
11768c2ecf20Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
11778c2ecf20Sopenharmony_ci	list_add_tail(&req->tl_requests, &device->submit.writes);
11788c2ecf20Sopenharmony_ci	list_add_tail(&req->req_pending_master_completion,
11798c2ecf20Sopenharmony_ci			&device->pending_master_completion[1 /* WRITE */]);
11808c2ecf20Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
11818c2ecf20Sopenharmony_ci	queue_work(device->submit.wq, &device->submit.worker);
11828c2ecf20Sopenharmony_ci	/* do_submit() may sleep internally on al_wait, too */
11838c2ecf20Sopenharmony_ci	wake_up(&device->al_wait);
11848c2ecf20Sopenharmony_ci}
11858c2ecf20Sopenharmony_ci
11868c2ecf20Sopenharmony_ci/* returns the new drbd_request pointer, if the caller is expected to
11878c2ecf20Sopenharmony_ci * drbd_send_and_submit() it (to save latency), or NULL if we queued the
11888c2ecf20Sopenharmony_ci * request on the submitter thread.
11898c2ecf20Sopenharmony_ci * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
11908c2ecf20Sopenharmony_ci */
11918c2ecf20Sopenharmony_cistatic struct drbd_request *
11928c2ecf20Sopenharmony_cidrbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
11938c2ecf20Sopenharmony_ci{
11948c2ecf20Sopenharmony_ci	const int rw = bio_data_dir(bio);
11958c2ecf20Sopenharmony_ci	struct drbd_request *req;
11968c2ecf20Sopenharmony_ci
11978c2ecf20Sopenharmony_ci	/* allocate outside of all locks; */
11988c2ecf20Sopenharmony_ci	req = drbd_req_new(device, bio);
11998c2ecf20Sopenharmony_ci	if (!req) {
12008c2ecf20Sopenharmony_ci		dec_ap_bio(device);
12018c2ecf20Sopenharmony_ci		/* only pass the error to the upper layers.
12028c2ecf20Sopenharmony_ci		 * if user cannot handle io errors, that's not our business. */
12038c2ecf20Sopenharmony_ci		drbd_err(device, "could not kmalloc() req\n");
12048c2ecf20Sopenharmony_ci		bio->bi_status = BLK_STS_RESOURCE;
12058c2ecf20Sopenharmony_ci		bio_endio(bio);
12068c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
12078c2ecf20Sopenharmony_ci	}
12088c2ecf20Sopenharmony_ci
12098c2ecf20Sopenharmony_ci	/* Update disk stats */
12108c2ecf20Sopenharmony_ci	req->start_jif = bio_start_io_acct(req->master_bio);
12118c2ecf20Sopenharmony_ci
12128c2ecf20Sopenharmony_ci	if (!get_ldev(device)) {
12138c2ecf20Sopenharmony_ci		bio_put(req->private_bio);
12148c2ecf20Sopenharmony_ci		req->private_bio = NULL;
12158c2ecf20Sopenharmony_ci	}
12168c2ecf20Sopenharmony_ci
12178c2ecf20Sopenharmony_ci	/* process discards always from our submitter thread */
12188c2ecf20Sopenharmony_ci	if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
12198c2ecf20Sopenharmony_ci	    bio_op(bio) == REQ_OP_DISCARD)
12208c2ecf20Sopenharmony_ci		goto queue_for_submitter_thread;
12218c2ecf20Sopenharmony_ci
12228c2ecf20Sopenharmony_ci	if (rw == WRITE && req->private_bio && req->i.size
12238c2ecf20Sopenharmony_ci	&& !test_bit(AL_SUSPENDED, &device->flags)) {
12248c2ecf20Sopenharmony_ci		if (!drbd_al_begin_io_fastpath(device, &req->i))
12258c2ecf20Sopenharmony_ci			goto queue_for_submitter_thread;
12268c2ecf20Sopenharmony_ci		req->rq_state |= RQ_IN_ACT_LOG;
12278c2ecf20Sopenharmony_ci		req->in_actlog_jif = jiffies;
12288c2ecf20Sopenharmony_ci	}
12298c2ecf20Sopenharmony_ci	return req;
12308c2ecf20Sopenharmony_ci
12318c2ecf20Sopenharmony_ci queue_for_submitter_thread:
12328c2ecf20Sopenharmony_ci	atomic_inc(&device->ap_actlog_cnt);
12338c2ecf20Sopenharmony_ci	drbd_queue_write(device, req);
12348c2ecf20Sopenharmony_ci	return NULL;
12358c2ecf20Sopenharmony_ci}
12368c2ecf20Sopenharmony_ci
12378c2ecf20Sopenharmony_ci/* Require at least one path to current data.
12388c2ecf20Sopenharmony_ci * We don't want to allow writes on C_STANDALONE D_INCONSISTENT:
12398c2ecf20Sopenharmony_ci * We would not allow to read what was written,
12408c2ecf20Sopenharmony_ci * we would not have bumped the data generation uuids,
12418c2ecf20Sopenharmony_ci * we would cause data divergence for all the wrong reasons.
12428c2ecf20Sopenharmony_ci *
12438c2ecf20Sopenharmony_ci * If we don't see at least one D_UP_TO_DATE, we will fail this request,
12448c2ecf20Sopenharmony_ci * which either returns EIO, or, if OND_SUSPEND_IO is set, suspends IO,
12458c2ecf20Sopenharmony_ci * and queues for retry later.
12468c2ecf20Sopenharmony_ci */
12478c2ecf20Sopenharmony_cistatic bool may_do_writes(struct drbd_device *device)
12488c2ecf20Sopenharmony_ci{
12498c2ecf20Sopenharmony_ci	const union drbd_dev_state s = device->state;
12508c2ecf20Sopenharmony_ci	return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE;
12518c2ecf20Sopenharmony_ci}
12528c2ecf20Sopenharmony_ci
12538c2ecf20Sopenharmony_cistruct drbd_plug_cb {
12548c2ecf20Sopenharmony_ci	struct blk_plug_cb cb;
12558c2ecf20Sopenharmony_ci	struct drbd_request *most_recent_req;
12568c2ecf20Sopenharmony_ci	/* do we need more? */
12578c2ecf20Sopenharmony_ci};
12588c2ecf20Sopenharmony_ci
12598c2ecf20Sopenharmony_cistatic void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule)
12608c2ecf20Sopenharmony_ci{
12618c2ecf20Sopenharmony_ci	struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb);
12628c2ecf20Sopenharmony_ci	struct drbd_resource *resource = plug->cb.data;
12638c2ecf20Sopenharmony_ci	struct drbd_request *req = plug->most_recent_req;
12648c2ecf20Sopenharmony_ci
12658c2ecf20Sopenharmony_ci	kfree(cb);
12668c2ecf20Sopenharmony_ci	if (!req)
12678c2ecf20Sopenharmony_ci		return;
12688c2ecf20Sopenharmony_ci
12698c2ecf20Sopenharmony_ci	spin_lock_irq(&resource->req_lock);
12708c2ecf20Sopenharmony_ci	/* In case the sender did not process it yet, raise the flag to
12718c2ecf20Sopenharmony_ci	 * have it followed with P_UNPLUG_REMOTE just after. */
12728c2ecf20Sopenharmony_ci	req->rq_state |= RQ_UNPLUG;
12738c2ecf20Sopenharmony_ci	/* but also queue a generic unplug */
12748c2ecf20Sopenharmony_ci	drbd_queue_unplug(req->device);
12758c2ecf20Sopenharmony_ci	kref_put(&req->kref, drbd_req_destroy);
12768c2ecf20Sopenharmony_ci	spin_unlock_irq(&resource->req_lock);
12778c2ecf20Sopenharmony_ci}
12788c2ecf20Sopenharmony_ci
12798c2ecf20Sopenharmony_cistatic struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource)
12808c2ecf20Sopenharmony_ci{
12818c2ecf20Sopenharmony_ci	/* A lot of text to say
12828c2ecf20Sopenharmony_ci	 * return (struct drbd_plug_cb*)blk_check_plugged(); */
12838c2ecf20Sopenharmony_ci	struct drbd_plug_cb *plug;
12848c2ecf20Sopenharmony_ci	struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug));
12858c2ecf20Sopenharmony_ci
12868c2ecf20Sopenharmony_ci	if (cb)
12878c2ecf20Sopenharmony_ci		plug = container_of(cb, struct drbd_plug_cb, cb);
12888c2ecf20Sopenharmony_ci	else
12898c2ecf20Sopenharmony_ci		plug = NULL;
12908c2ecf20Sopenharmony_ci	return plug;
12918c2ecf20Sopenharmony_ci}
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_cistatic void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req)
12948c2ecf20Sopenharmony_ci{
12958c2ecf20Sopenharmony_ci	struct drbd_request *tmp = plug->most_recent_req;
12968c2ecf20Sopenharmony_ci	/* Will be sent to some peer.
12978c2ecf20Sopenharmony_ci	 * Remember to tag it with UNPLUG_REMOTE on unplug */
12988c2ecf20Sopenharmony_ci	kref_get(&req->kref);
12998c2ecf20Sopenharmony_ci	plug->most_recent_req = req;
13008c2ecf20Sopenharmony_ci	if (tmp)
13018c2ecf20Sopenharmony_ci		kref_put(&tmp->kref, drbd_req_destroy);
13028c2ecf20Sopenharmony_ci}
13038c2ecf20Sopenharmony_ci
13048c2ecf20Sopenharmony_cistatic void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
13058c2ecf20Sopenharmony_ci{
13068c2ecf20Sopenharmony_ci	struct drbd_resource *resource = device->resource;
13078c2ecf20Sopenharmony_ci	const int rw = bio_data_dir(req->master_bio);
13088c2ecf20Sopenharmony_ci	struct bio_and_error m = { NULL, };
13098c2ecf20Sopenharmony_ci	bool no_remote = false;
13108c2ecf20Sopenharmony_ci	bool submit_private_bio = false;
13118c2ecf20Sopenharmony_ci
13128c2ecf20Sopenharmony_ci	spin_lock_irq(&resource->req_lock);
13138c2ecf20Sopenharmony_ci	if (rw == WRITE) {
13148c2ecf20Sopenharmony_ci		/* This may temporarily give up the req_lock,
13158c2ecf20Sopenharmony_ci		 * but will re-aquire it before it returns here.
13168c2ecf20Sopenharmony_ci		 * Needs to be before the check on drbd_suspended() */
13178c2ecf20Sopenharmony_ci		complete_conflicting_writes(req);
13188c2ecf20Sopenharmony_ci		/* no more giving up req_lock from now on! */
13198c2ecf20Sopenharmony_ci
13208c2ecf20Sopenharmony_ci		/* check for congestion, and potentially stop sending
13218c2ecf20Sopenharmony_ci		 * full data updates, but start sending "dirty bits" only. */
13228c2ecf20Sopenharmony_ci		maybe_pull_ahead(device);
13238c2ecf20Sopenharmony_ci	}
13248c2ecf20Sopenharmony_ci
13258c2ecf20Sopenharmony_ci
13268c2ecf20Sopenharmony_ci	if (drbd_suspended(device)) {
13278c2ecf20Sopenharmony_ci		/* push back and retry: */
13288c2ecf20Sopenharmony_ci		req->rq_state |= RQ_POSTPONED;
13298c2ecf20Sopenharmony_ci		if (req->private_bio) {
13308c2ecf20Sopenharmony_ci			bio_put(req->private_bio);
13318c2ecf20Sopenharmony_ci			req->private_bio = NULL;
13328c2ecf20Sopenharmony_ci			put_ldev(device);
13338c2ecf20Sopenharmony_ci		}
13348c2ecf20Sopenharmony_ci		goto out;
13358c2ecf20Sopenharmony_ci	}
13368c2ecf20Sopenharmony_ci
13378c2ecf20Sopenharmony_ci	/* We fail READ early, if we can not serve it.
13388c2ecf20Sopenharmony_ci	 * We must do this before req is registered on any lists.
13398c2ecf20Sopenharmony_ci	 * Otherwise, drbd_req_complete() will queue failed READ for retry. */
13408c2ecf20Sopenharmony_ci	if (rw != WRITE) {
13418c2ecf20Sopenharmony_ci		if (!do_remote_read(req) && !req->private_bio)
13428c2ecf20Sopenharmony_ci			goto nodata;
13438c2ecf20Sopenharmony_ci	}
13448c2ecf20Sopenharmony_ci
13458c2ecf20Sopenharmony_ci	/* which transfer log epoch does this belong to? */
13468c2ecf20Sopenharmony_ci	req->epoch = atomic_read(&first_peer_device(device)->connection->current_tle_nr);
13478c2ecf20Sopenharmony_ci
13488c2ecf20Sopenharmony_ci	/* no point in adding empty flushes to the transfer log,
13498c2ecf20Sopenharmony_ci	 * they are mapped to drbd barriers already. */
13508c2ecf20Sopenharmony_ci	if (likely(req->i.size!=0)) {
13518c2ecf20Sopenharmony_ci		if (rw == WRITE)
13528c2ecf20Sopenharmony_ci			first_peer_device(device)->connection->current_tle_writes++;
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ci		list_add_tail(&req->tl_requests, &first_peer_device(device)->connection->transfer_log);
13558c2ecf20Sopenharmony_ci	}
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_ci	if (rw == WRITE) {
13588c2ecf20Sopenharmony_ci		if (req->private_bio && !may_do_writes(device)) {
13598c2ecf20Sopenharmony_ci			bio_put(req->private_bio);
13608c2ecf20Sopenharmony_ci			req->private_bio = NULL;
13618c2ecf20Sopenharmony_ci			put_ldev(device);
13628c2ecf20Sopenharmony_ci			goto nodata;
13638c2ecf20Sopenharmony_ci		}
13648c2ecf20Sopenharmony_ci		if (!drbd_process_write_request(req))
13658c2ecf20Sopenharmony_ci			no_remote = true;
13668c2ecf20Sopenharmony_ci	} else {
13678c2ecf20Sopenharmony_ci		/* We either have a private_bio, or we can read from remote.
13688c2ecf20Sopenharmony_ci		 * Otherwise we had done the goto nodata above. */
13698c2ecf20Sopenharmony_ci		if (req->private_bio == NULL) {
13708c2ecf20Sopenharmony_ci			_req_mod(req, TO_BE_SENT);
13718c2ecf20Sopenharmony_ci			_req_mod(req, QUEUE_FOR_NET_READ);
13728c2ecf20Sopenharmony_ci		} else
13738c2ecf20Sopenharmony_ci			no_remote = true;
13748c2ecf20Sopenharmony_ci	}
13758c2ecf20Sopenharmony_ci
13768c2ecf20Sopenharmony_ci	if (no_remote == false) {
13778c2ecf20Sopenharmony_ci		struct drbd_plug_cb *plug = drbd_check_plugged(resource);
13788c2ecf20Sopenharmony_ci		if (plug)
13798c2ecf20Sopenharmony_ci			drbd_update_plug(plug, req);
13808c2ecf20Sopenharmony_ci	}
13818c2ecf20Sopenharmony_ci
13828c2ecf20Sopenharmony_ci	/* If it took the fast path in drbd_request_prepare, add it here.
13838c2ecf20Sopenharmony_ci	 * The slow path has added it already. */
13848c2ecf20Sopenharmony_ci	if (list_empty(&req->req_pending_master_completion))
13858c2ecf20Sopenharmony_ci		list_add_tail(&req->req_pending_master_completion,
13868c2ecf20Sopenharmony_ci			&device->pending_master_completion[rw == WRITE]);
13878c2ecf20Sopenharmony_ci	if (req->private_bio) {
13888c2ecf20Sopenharmony_ci		/* needs to be marked within the same spinlock */
13898c2ecf20Sopenharmony_ci		req->pre_submit_jif = jiffies;
13908c2ecf20Sopenharmony_ci		list_add_tail(&req->req_pending_local,
13918c2ecf20Sopenharmony_ci			&device->pending_completion[rw == WRITE]);
13928c2ecf20Sopenharmony_ci		_req_mod(req, TO_BE_SUBMITTED);
13938c2ecf20Sopenharmony_ci		/* but we need to give up the spinlock to submit */
13948c2ecf20Sopenharmony_ci		submit_private_bio = true;
13958c2ecf20Sopenharmony_ci	} else if (no_remote) {
13968c2ecf20Sopenharmony_cinodata:
13978c2ecf20Sopenharmony_ci		if (__ratelimit(&drbd_ratelimit_state))
13988c2ecf20Sopenharmony_ci			drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
13998c2ecf20Sopenharmony_ci					(unsigned long long)req->i.sector, req->i.size >> 9);
14008c2ecf20Sopenharmony_ci		/* A write may have been queued for send_oos, however.
14018c2ecf20Sopenharmony_ci		 * So we can not simply free it, we must go through drbd_req_put_completion_ref() */
14028c2ecf20Sopenharmony_ci	}
14038c2ecf20Sopenharmony_ci
14048c2ecf20Sopenharmony_ciout:
14058c2ecf20Sopenharmony_ci	drbd_req_put_completion_ref(req, &m, 1);
14068c2ecf20Sopenharmony_ci	spin_unlock_irq(&resource->req_lock);
14078c2ecf20Sopenharmony_ci
14088c2ecf20Sopenharmony_ci	/* Even though above is a kref_put(), this is safe.
14098c2ecf20Sopenharmony_ci	 * As long as we still need to submit our private bio,
14108c2ecf20Sopenharmony_ci	 * we hold a completion ref, and the request cannot disappear.
14118c2ecf20Sopenharmony_ci	 * If however this request did not even have a private bio to submit
14128c2ecf20Sopenharmony_ci	 * (e.g. remote read), req may already be invalid now.
14138c2ecf20Sopenharmony_ci	 * That's why we cannot check on req->private_bio. */
14148c2ecf20Sopenharmony_ci	if (submit_private_bio)
14158c2ecf20Sopenharmony_ci		drbd_submit_req_private_bio(req);
14168c2ecf20Sopenharmony_ci	if (m.bio)
14178c2ecf20Sopenharmony_ci		complete_master_bio(device, &m);
14188c2ecf20Sopenharmony_ci}
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_civoid __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
14218c2ecf20Sopenharmony_ci{
14228c2ecf20Sopenharmony_ci	struct drbd_request *req = drbd_request_prepare(device, bio, start_jif);
14238c2ecf20Sopenharmony_ci	if (IS_ERR_OR_NULL(req))
14248c2ecf20Sopenharmony_ci		return;
14258c2ecf20Sopenharmony_ci	drbd_send_and_submit(device, req);
14268c2ecf20Sopenharmony_ci}
14278c2ecf20Sopenharmony_ci
14288c2ecf20Sopenharmony_cistatic void submit_fast_path(struct drbd_device *device, struct list_head *incoming)
14298c2ecf20Sopenharmony_ci{
14308c2ecf20Sopenharmony_ci	struct blk_plug plug;
14318c2ecf20Sopenharmony_ci	struct drbd_request *req, *tmp;
14328c2ecf20Sopenharmony_ci
14338c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
14348c2ecf20Sopenharmony_ci	list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
14358c2ecf20Sopenharmony_ci		const int rw = bio_data_dir(req->master_bio);
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_ci		if (rw == WRITE /* rw != WRITE should not even end up here! */
14388c2ecf20Sopenharmony_ci		&& req->private_bio && req->i.size
14398c2ecf20Sopenharmony_ci		&& !test_bit(AL_SUSPENDED, &device->flags)) {
14408c2ecf20Sopenharmony_ci			if (!drbd_al_begin_io_fastpath(device, &req->i))
14418c2ecf20Sopenharmony_ci				continue;
14428c2ecf20Sopenharmony_ci
14438c2ecf20Sopenharmony_ci			req->rq_state |= RQ_IN_ACT_LOG;
14448c2ecf20Sopenharmony_ci			req->in_actlog_jif = jiffies;
14458c2ecf20Sopenharmony_ci			atomic_dec(&device->ap_actlog_cnt);
14468c2ecf20Sopenharmony_ci		}
14478c2ecf20Sopenharmony_ci
14488c2ecf20Sopenharmony_ci		list_del_init(&req->tl_requests);
14498c2ecf20Sopenharmony_ci		drbd_send_and_submit(device, req);
14508c2ecf20Sopenharmony_ci	}
14518c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
14528c2ecf20Sopenharmony_ci}
14538c2ecf20Sopenharmony_ci
14548c2ecf20Sopenharmony_cistatic bool prepare_al_transaction_nonblock(struct drbd_device *device,
14558c2ecf20Sopenharmony_ci					    struct list_head *incoming,
14568c2ecf20Sopenharmony_ci					    struct list_head *pending,
14578c2ecf20Sopenharmony_ci					    struct list_head *later)
14588c2ecf20Sopenharmony_ci{
14598c2ecf20Sopenharmony_ci	struct drbd_request *req;
14608c2ecf20Sopenharmony_ci	int wake = 0;
14618c2ecf20Sopenharmony_ci	int err;
14628c2ecf20Sopenharmony_ci
14638c2ecf20Sopenharmony_ci	spin_lock_irq(&device->al_lock);
14648c2ecf20Sopenharmony_ci	while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) {
14658c2ecf20Sopenharmony_ci		err = drbd_al_begin_io_nonblock(device, &req->i);
14668c2ecf20Sopenharmony_ci		if (err == -ENOBUFS)
14678c2ecf20Sopenharmony_ci			break;
14688c2ecf20Sopenharmony_ci		if (err == -EBUSY)
14698c2ecf20Sopenharmony_ci			wake = 1;
14708c2ecf20Sopenharmony_ci		if (err)
14718c2ecf20Sopenharmony_ci			list_move_tail(&req->tl_requests, later);
14728c2ecf20Sopenharmony_ci		else
14738c2ecf20Sopenharmony_ci			list_move_tail(&req->tl_requests, pending);
14748c2ecf20Sopenharmony_ci	}
14758c2ecf20Sopenharmony_ci	spin_unlock_irq(&device->al_lock);
14768c2ecf20Sopenharmony_ci	if (wake)
14778c2ecf20Sopenharmony_ci		wake_up(&device->al_wait);
14788c2ecf20Sopenharmony_ci	return !list_empty(pending);
14798c2ecf20Sopenharmony_ci}
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_cistatic void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
14828c2ecf20Sopenharmony_ci{
14838c2ecf20Sopenharmony_ci	struct blk_plug plug;
14848c2ecf20Sopenharmony_ci	struct drbd_request *req;
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
14878c2ecf20Sopenharmony_ci	while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) {
14888c2ecf20Sopenharmony_ci		req->rq_state |= RQ_IN_ACT_LOG;
14898c2ecf20Sopenharmony_ci		req->in_actlog_jif = jiffies;
14908c2ecf20Sopenharmony_ci		atomic_dec(&device->ap_actlog_cnt);
14918c2ecf20Sopenharmony_ci		list_del_init(&req->tl_requests);
14928c2ecf20Sopenharmony_ci		drbd_send_and_submit(device, req);
14938c2ecf20Sopenharmony_ci	}
14948c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
14958c2ecf20Sopenharmony_ci}
14968c2ecf20Sopenharmony_ci
14978c2ecf20Sopenharmony_civoid do_submit(struct work_struct *ws)
14988c2ecf20Sopenharmony_ci{
14998c2ecf20Sopenharmony_ci	struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
15008c2ecf20Sopenharmony_ci	LIST_HEAD(incoming);	/* from drbd_make_request() */
15018c2ecf20Sopenharmony_ci	LIST_HEAD(pending);	/* to be submitted after next AL-transaction commit */
15028c2ecf20Sopenharmony_ci	LIST_HEAD(busy);	/* blocked by resync requests */
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci	/* grab new incoming requests */
15058c2ecf20Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
15068c2ecf20Sopenharmony_ci	list_splice_tail_init(&device->submit.writes, &incoming);
15078c2ecf20Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
15088c2ecf20Sopenharmony_ci
15098c2ecf20Sopenharmony_ci	for (;;) {
15108c2ecf20Sopenharmony_ci		DEFINE_WAIT(wait);
15118c2ecf20Sopenharmony_ci
15128c2ecf20Sopenharmony_ci		/* move used-to-be-busy back to front of incoming */
15138c2ecf20Sopenharmony_ci		list_splice_init(&busy, &incoming);
15148c2ecf20Sopenharmony_ci		submit_fast_path(device, &incoming);
15158c2ecf20Sopenharmony_ci		if (list_empty(&incoming))
15168c2ecf20Sopenharmony_ci			break;
15178c2ecf20Sopenharmony_ci
15188c2ecf20Sopenharmony_ci		for (;;) {
15198c2ecf20Sopenharmony_ci			prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
15208c2ecf20Sopenharmony_ci
15218c2ecf20Sopenharmony_ci			list_splice_init(&busy, &incoming);
15228c2ecf20Sopenharmony_ci			prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
15238c2ecf20Sopenharmony_ci			if (!list_empty(&pending))
15248c2ecf20Sopenharmony_ci				break;
15258c2ecf20Sopenharmony_ci
15268c2ecf20Sopenharmony_ci			schedule();
15278c2ecf20Sopenharmony_ci
15288c2ecf20Sopenharmony_ci			/* If all currently "hot" activity log extents are kept busy by
15298c2ecf20Sopenharmony_ci			 * incoming requests, we still must not totally starve new
15308c2ecf20Sopenharmony_ci			 * requests to "cold" extents.
15318c2ecf20Sopenharmony_ci			 * Something left on &incoming means there had not been
15328c2ecf20Sopenharmony_ci			 * enough update slots available, and the activity log
15338c2ecf20Sopenharmony_ci			 * has been marked as "starving".
15348c2ecf20Sopenharmony_ci			 *
15358c2ecf20Sopenharmony_ci			 * Try again now, without looking for new requests,
15368c2ecf20Sopenharmony_ci			 * effectively blocking all new requests until we made
15378c2ecf20Sopenharmony_ci			 * at least _some_ progress with what we currently have.
15388c2ecf20Sopenharmony_ci			 */
15398c2ecf20Sopenharmony_ci			if (!list_empty(&incoming))
15408c2ecf20Sopenharmony_ci				continue;
15418c2ecf20Sopenharmony_ci
15428c2ecf20Sopenharmony_ci			/* Nothing moved to pending, but nothing left
15438c2ecf20Sopenharmony_ci			 * on incoming: all moved to busy!
15448c2ecf20Sopenharmony_ci			 * Grab new and iterate. */
15458c2ecf20Sopenharmony_ci			spin_lock_irq(&device->resource->req_lock);
15468c2ecf20Sopenharmony_ci			list_splice_tail_init(&device->submit.writes, &incoming);
15478c2ecf20Sopenharmony_ci			spin_unlock_irq(&device->resource->req_lock);
15488c2ecf20Sopenharmony_ci		}
15498c2ecf20Sopenharmony_ci		finish_wait(&device->al_wait, &wait);
15508c2ecf20Sopenharmony_ci
15518c2ecf20Sopenharmony_ci		/* If the transaction was full, before all incoming requests
15528c2ecf20Sopenharmony_ci		 * had been processed, skip ahead to commit, and iterate
15538c2ecf20Sopenharmony_ci		 * without splicing in more incoming requests from upper layers.
15548c2ecf20Sopenharmony_ci		 *
15558c2ecf20Sopenharmony_ci		 * Else, if all incoming have been processed,
15568c2ecf20Sopenharmony_ci		 * they have become either "pending" (to be submitted after
15578c2ecf20Sopenharmony_ci		 * next transaction commit) or "busy" (blocked by resync).
15588c2ecf20Sopenharmony_ci		 *
15598c2ecf20Sopenharmony_ci		 * Maybe more was queued, while we prepared the transaction?
15608c2ecf20Sopenharmony_ci		 * Try to stuff those into this transaction as well.
15618c2ecf20Sopenharmony_ci		 * Be strictly non-blocking here,
15628c2ecf20Sopenharmony_ci		 * we already have something to commit.
15638c2ecf20Sopenharmony_ci		 *
15648c2ecf20Sopenharmony_ci		 * Commit if we don't make any more progres.
15658c2ecf20Sopenharmony_ci		 */
15668c2ecf20Sopenharmony_ci
15678c2ecf20Sopenharmony_ci		while (list_empty(&incoming)) {
15688c2ecf20Sopenharmony_ci			LIST_HEAD(more_pending);
15698c2ecf20Sopenharmony_ci			LIST_HEAD(more_incoming);
15708c2ecf20Sopenharmony_ci			bool made_progress;
15718c2ecf20Sopenharmony_ci
15728c2ecf20Sopenharmony_ci			/* It is ok to look outside the lock,
15738c2ecf20Sopenharmony_ci			 * it's only an optimization anyways */
15748c2ecf20Sopenharmony_ci			if (list_empty(&device->submit.writes))
15758c2ecf20Sopenharmony_ci				break;
15768c2ecf20Sopenharmony_ci
15778c2ecf20Sopenharmony_ci			spin_lock_irq(&device->resource->req_lock);
15788c2ecf20Sopenharmony_ci			list_splice_tail_init(&device->submit.writes, &more_incoming);
15798c2ecf20Sopenharmony_ci			spin_unlock_irq(&device->resource->req_lock);
15808c2ecf20Sopenharmony_ci
15818c2ecf20Sopenharmony_ci			if (list_empty(&more_incoming))
15828c2ecf20Sopenharmony_ci				break;
15838c2ecf20Sopenharmony_ci
15848c2ecf20Sopenharmony_ci			made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_ci			list_splice_tail_init(&more_pending, &pending);
15878c2ecf20Sopenharmony_ci			list_splice_tail_init(&more_incoming, &incoming);
15888c2ecf20Sopenharmony_ci			if (!made_progress)
15898c2ecf20Sopenharmony_ci				break;
15908c2ecf20Sopenharmony_ci		}
15918c2ecf20Sopenharmony_ci
15928c2ecf20Sopenharmony_ci		drbd_al_begin_io_commit(device);
15938c2ecf20Sopenharmony_ci		send_and_submit_pending(device, &pending);
15948c2ecf20Sopenharmony_ci	}
15958c2ecf20Sopenharmony_ci}
15968c2ecf20Sopenharmony_ci
15978c2ecf20Sopenharmony_ciblk_qc_t drbd_submit_bio(struct bio *bio)
15988c2ecf20Sopenharmony_ci{
15998c2ecf20Sopenharmony_ci	struct drbd_device *device = bio->bi_disk->private_data;
16008c2ecf20Sopenharmony_ci	unsigned long start_jif;
16018c2ecf20Sopenharmony_ci
16028c2ecf20Sopenharmony_ci	blk_queue_split(&bio);
16038c2ecf20Sopenharmony_ci
16048c2ecf20Sopenharmony_ci	start_jif = jiffies;
16058c2ecf20Sopenharmony_ci
16068c2ecf20Sopenharmony_ci	/*
16078c2ecf20Sopenharmony_ci	 * what we "blindly" assume:
16088c2ecf20Sopenharmony_ci	 */
16098c2ecf20Sopenharmony_ci	D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
16108c2ecf20Sopenharmony_ci
16118c2ecf20Sopenharmony_ci	inc_ap_bio(device);
16128c2ecf20Sopenharmony_ci	__drbd_make_request(device, bio, start_jif);
16138c2ecf20Sopenharmony_ci	return BLK_QC_T_NONE;
16148c2ecf20Sopenharmony_ci}
16158c2ecf20Sopenharmony_ci
16168c2ecf20Sopenharmony_cistatic bool net_timeout_reached(struct drbd_request *net_req,
16178c2ecf20Sopenharmony_ci		struct drbd_connection *connection,
16188c2ecf20Sopenharmony_ci		unsigned long now, unsigned long ent,
16198c2ecf20Sopenharmony_ci		unsigned int ko_count, unsigned int timeout)
16208c2ecf20Sopenharmony_ci{
16218c2ecf20Sopenharmony_ci	struct drbd_device *device = net_req->device;
16228c2ecf20Sopenharmony_ci
16238c2ecf20Sopenharmony_ci	if (!time_after(now, net_req->pre_send_jif + ent))
16248c2ecf20Sopenharmony_ci		return false;
16258c2ecf20Sopenharmony_ci
16268c2ecf20Sopenharmony_ci	if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
16278c2ecf20Sopenharmony_ci		return false;
16288c2ecf20Sopenharmony_ci
16298c2ecf20Sopenharmony_ci	if (net_req->rq_state & RQ_NET_PENDING) {
16308c2ecf20Sopenharmony_ci		drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
16318c2ecf20Sopenharmony_ci			jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
16328c2ecf20Sopenharmony_ci		return true;
16338c2ecf20Sopenharmony_ci	}
16348c2ecf20Sopenharmony_ci
16358c2ecf20Sopenharmony_ci	/* We received an ACK already (or are using protocol A),
16368c2ecf20Sopenharmony_ci	 * but are waiting for the epoch closing barrier ack.
16378c2ecf20Sopenharmony_ci	 * Check if we sent the barrier already.  We should not blame the peer
16388c2ecf20Sopenharmony_ci	 * for being unresponsive, if we did not even ask it yet. */
16398c2ecf20Sopenharmony_ci	if (net_req->epoch == connection->send.current_epoch_nr) {
16408c2ecf20Sopenharmony_ci		drbd_warn(device,
16418c2ecf20Sopenharmony_ci			"We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
16428c2ecf20Sopenharmony_ci			jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
16438c2ecf20Sopenharmony_ci		return false;
16448c2ecf20Sopenharmony_ci	}
16458c2ecf20Sopenharmony_ci
16468c2ecf20Sopenharmony_ci	/* Worst case: we may have been blocked for whatever reason, then
16478c2ecf20Sopenharmony_ci	 * suddenly are able to send a lot of requests (and epoch separating
16488c2ecf20Sopenharmony_ci	 * barriers) in quick succession.
16498c2ecf20Sopenharmony_ci	 * The timestamp of the net_req may be much too old and not correspond
16508c2ecf20Sopenharmony_ci	 * to the sending time of the relevant unack'ed barrier packet, so
16518c2ecf20Sopenharmony_ci	 * would trigger a spurious timeout.  The latest barrier packet may
16528c2ecf20Sopenharmony_ci	 * have a too recent timestamp to trigger the timeout, potentially miss
16538c2ecf20Sopenharmony_ci	 * a timeout.  Right now we don't have a place to conveniently store
16548c2ecf20Sopenharmony_ci	 * these timestamps.
16558c2ecf20Sopenharmony_ci	 * But in this particular situation, the application requests are still
16568c2ecf20Sopenharmony_ci	 * completed to upper layers, DRBD should still "feel" responsive.
16578c2ecf20Sopenharmony_ci	 * No need yet to kill this connection, it may still recover.
16588c2ecf20Sopenharmony_ci	 * If not, eventually we will have queued enough into the network for
16598c2ecf20Sopenharmony_ci	 * us to block. From that point of view, the timestamp of the last sent
16608c2ecf20Sopenharmony_ci	 * barrier packet is relevant enough.
16618c2ecf20Sopenharmony_ci	 */
16628c2ecf20Sopenharmony_ci	if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
16638c2ecf20Sopenharmony_ci		drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
16648c2ecf20Sopenharmony_ci			connection->send.last_sent_barrier_jif, now,
16658c2ecf20Sopenharmony_ci			jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
16668c2ecf20Sopenharmony_ci		return true;
16678c2ecf20Sopenharmony_ci	}
16688c2ecf20Sopenharmony_ci	return false;
16698c2ecf20Sopenharmony_ci}
16708c2ecf20Sopenharmony_ci
16718c2ecf20Sopenharmony_ci/* A request is considered timed out, if
16728c2ecf20Sopenharmony_ci * - we have some effective timeout from the configuration,
16738c2ecf20Sopenharmony_ci *   with some state restrictions applied,
16748c2ecf20Sopenharmony_ci * - the oldest request is waiting for a response from the network
16758c2ecf20Sopenharmony_ci *   resp. the local disk,
16768c2ecf20Sopenharmony_ci * - the oldest request is in fact older than the effective timeout,
16778c2ecf20Sopenharmony_ci * - the connection was established (resp. disk was attached)
16788c2ecf20Sopenharmony_ci *   for longer than the timeout already.
16798c2ecf20Sopenharmony_ci * Note that for 32bit jiffies and very stable connections/disks,
16808c2ecf20Sopenharmony_ci * we may have a wrap around, which is catched by
16818c2ecf20Sopenharmony_ci *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
16828c2ecf20Sopenharmony_ci *
16838c2ecf20Sopenharmony_ci * Side effect: once per 32bit wrap-around interval, which means every
16848c2ecf20Sopenharmony_ci * ~198 days with 250 HZ, we have a window where the timeout would need
16858c2ecf20Sopenharmony_ci * to expire twice (worst case) to become effective. Good enough.
16868c2ecf20Sopenharmony_ci */
16878c2ecf20Sopenharmony_ci
16888c2ecf20Sopenharmony_civoid request_timer_fn(struct timer_list *t)
16898c2ecf20Sopenharmony_ci{
16908c2ecf20Sopenharmony_ci	struct drbd_device *device = from_timer(device, t, request_timer);
16918c2ecf20Sopenharmony_ci	struct drbd_connection *connection = first_peer_device(device)->connection;
16928c2ecf20Sopenharmony_ci	struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
16938c2ecf20Sopenharmony_ci	struct net_conf *nc;
16948c2ecf20Sopenharmony_ci	unsigned long oldest_submit_jif;
16958c2ecf20Sopenharmony_ci	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
16968c2ecf20Sopenharmony_ci	unsigned long now;
16978c2ecf20Sopenharmony_ci	unsigned int ko_count = 0, timeout = 0;
16988c2ecf20Sopenharmony_ci
16998c2ecf20Sopenharmony_ci	rcu_read_lock();
17008c2ecf20Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
17018c2ecf20Sopenharmony_ci	if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
17028c2ecf20Sopenharmony_ci		ko_count = nc->ko_count;
17038c2ecf20Sopenharmony_ci		timeout = nc->timeout;
17048c2ecf20Sopenharmony_ci	}
17058c2ecf20Sopenharmony_ci
17068c2ecf20Sopenharmony_ci	if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
17078c2ecf20Sopenharmony_ci		dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
17088c2ecf20Sopenharmony_ci		put_ldev(device);
17098c2ecf20Sopenharmony_ci	}
17108c2ecf20Sopenharmony_ci	rcu_read_unlock();
17118c2ecf20Sopenharmony_ci
17128c2ecf20Sopenharmony_ci
17138c2ecf20Sopenharmony_ci	ent = timeout * HZ/10 * ko_count;
17148c2ecf20Sopenharmony_ci	et = min_not_zero(dt, ent);
17158c2ecf20Sopenharmony_ci
17168c2ecf20Sopenharmony_ci	if (!et)
17178c2ecf20Sopenharmony_ci		return; /* Recurring timer stopped */
17188c2ecf20Sopenharmony_ci
17198c2ecf20Sopenharmony_ci	now = jiffies;
17208c2ecf20Sopenharmony_ci	nt = now + et;
17218c2ecf20Sopenharmony_ci
17228c2ecf20Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
17238c2ecf20Sopenharmony_ci	req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
17248c2ecf20Sopenharmony_ci	req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
17258c2ecf20Sopenharmony_ci
17268c2ecf20Sopenharmony_ci	/* maybe the oldest request waiting for the peer is in fact still
17278c2ecf20Sopenharmony_ci	 * blocking in tcp sendmsg.  That's ok, though, that's handled via the
17288c2ecf20Sopenharmony_ci	 * socket send timeout, requesting a ping, and bumping ko-count in
17298c2ecf20Sopenharmony_ci	 * we_should_drop_the_connection().
17308c2ecf20Sopenharmony_ci	 */
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ci	/* check the oldest request we did successfully sent,
17338c2ecf20Sopenharmony_ci	 * but which is still waiting for an ACK. */
17348c2ecf20Sopenharmony_ci	req_peer = connection->req_ack_pending;
17358c2ecf20Sopenharmony_ci
17368c2ecf20Sopenharmony_ci	/* if we don't have such request (e.g. protocoll A)
17378c2ecf20Sopenharmony_ci	 * check the oldest requests which is still waiting on its epoch
17388c2ecf20Sopenharmony_ci	 * closing barrier ack. */
17398c2ecf20Sopenharmony_ci	if (!req_peer)
17408c2ecf20Sopenharmony_ci		req_peer = connection->req_not_net_done;
17418c2ecf20Sopenharmony_ci
17428c2ecf20Sopenharmony_ci	/* evaluate the oldest peer request only in one timer! */
17438c2ecf20Sopenharmony_ci	if (req_peer && req_peer->device != device)
17448c2ecf20Sopenharmony_ci		req_peer = NULL;
17458c2ecf20Sopenharmony_ci
17468c2ecf20Sopenharmony_ci	/* do we have something to evaluate? */
17478c2ecf20Sopenharmony_ci	if (req_peer == NULL && req_write == NULL && req_read == NULL)
17488c2ecf20Sopenharmony_ci		goto out;
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_ci	oldest_submit_jif =
17518c2ecf20Sopenharmony_ci		(req_write && req_read)
17528c2ecf20Sopenharmony_ci		? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
17538c2ecf20Sopenharmony_ci		  ? req_write->pre_submit_jif : req_read->pre_submit_jif )
17548c2ecf20Sopenharmony_ci		: req_write ? req_write->pre_submit_jif
17558c2ecf20Sopenharmony_ci		: req_read ? req_read->pre_submit_jif : now;
17568c2ecf20Sopenharmony_ci
17578c2ecf20Sopenharmony_ci	if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
17588c2ecf20Sopenharmony_ci		_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
17598c2ecf20Sopenharmony_ci
17608c2ecf20Sopenharmony_ci	if (dt && oldest_submit_jif != now &&
17618c2ecf20Sopenharmony_ci		 time_after(now, oldest_submit_jif + dt) &&
17628c2ecf20Sopenharmony_ci		!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
17638c2ecf20Sopenharmony_ci		drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
17648c2ecf20Sopenharmony_ci		__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
17658c2ecf20Sopenharmony_ci	}
17668c2ecf20Sopenharmony_ci
17678c2ecf20Sopenharmony_ci	/* Reschedule timer for the nearest not already expired timeout.
17688c2ecf20Sopenharmony_ci	 * Fallback to now + min(effective network timeout, disk timeout). */
17698c2ecf20Sopenharmony_ci	ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
17708c2ecf20Sopenharmony_ci		? req_peer->pre_send_jif + ent : now + et;
17718c2ecf20Sopenharmony_ci	dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
17728c2ecf20Sopenharmony_ci		? oldest_submit_jif + dt : now + et;
17738c2ecf20Sopenharmony_ci	nt = time_before(ent, dt) ? ent : dt;
17748c2ecf20Sopenharmony_ciout:
17758c2ecf20Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
17768c2ecf20Sopenharmony_ci	mod_timer(&device->request_timer, nt);
17778c2ecf20Sopenharmony_ci}
1778