162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci drbd_req.h 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci Copyright (C) 2006-2008, LINBIT Information Technologies GmbH. 862306a36Sopenharmony_ci Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 962306a36Sopenharmony_ci Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>. 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#ifndef _DRBD_REQ_H 1462306a36Sopenharmony_ci#define _DRBD_REQ_H 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <linux/module.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <linux/slab.h> 1962306a36Sopenharmony_ci#include <linux/drbd.h> 2062306a36Sopenharmony_ci#include "drbd_int.h" 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci/* The request callbacks will be called in irq context by the IDE drivers, 2362306a36Sopenharmony_ci and in Softirqs/Tasklets/BH context by the SCSI drivers, 2462306a36Sopenharmony_ci and by the receiver and worker in kernel-thread context. 2562306a36Sopenharmony_ci Try to get the locking right :) */ 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci/* 2862306a36Sopenharmony_ci * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are 2962306a36Sopenharmony_ci * associated with IO requests originating from the block layer above us. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * There are quite a few things that may happen to a drbd request 3262306a36Sopenharmony_ci * during its lifetime. 3362306a36Sopenharmony_ci * 3462306a36Sopenharmony_ci * It will be created. 3562306a36Sopenharmony_ci * It will be marked with the intention to be 3662306a36Sopenharmony_ci * submitted to local disk and/or 3762306a36Sopenharmony_ci * send via the network. 3862306a36Sopenharmony_ci * 3962306a36Sopenharmony_ci * It has to be placed on the transfer log and other housekeeping lists, 4062306a36Sopenharmony_ci * In case we have a network connection. 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * It may be identified as a concurrent (write) request 4362306a36Sopenharmony_ci * and be handled accordingly. 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * It may me handed over to the local disk subsystem. 4662306a36Sopenharmony_ci * It may be completed by the local disk subsystem, 4762306a36Sopenharmony_ci * either successfully or with io-error. 4862306a36Sopenharmony_ci * In case it is a READ request, and it failed locally, 4962306a36Sopenharmony_ci * it may be retried remotely. 5062306a36Sopenharmony_ci * 5162306a36Sopenharmony_ci * It may be queued for sending. 5262306a36Sopenharmony_ci * It may be handed over to the network stack, 5362306a36Sopenharmony_ci * which may fail. 5462306a36Sopenharmony_ci * It may be acknowledged by the "peer" according to the wire_protocol in use. 5562306a36Sopenharmony_ci * this may be a negative ack. 5662306a36Sopenharmony_ci * It may receive a faked ack when the network connection is lost and the 5762306a36Sopenharmony_ci * transfer log is cleaned up. 5862306a36Sopenharmony_ci * Sending may be canceled due to network connection loss. 5962306a36Sopenharmony_ci * When it finally has outlived its time, 6062306a36Sopenharmony_ci * corresponding dirty bits in the resync-bitmap may be cleared or set, 6162306a36Sopenharmony_ci * it will be destroyed, 6262306a36Sopenharmony_ci * and completion will be signalled to the originator, 6362306a36Sopenharmony_ci * with or without "success". 6462306a36Sopenharmony_ci */ 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cienum drbd_req_event { 6762306a36Sopenharmony_ci CREATED, 6862306a36Sopenharmony_ci TO_BE_SENT, 6962306a36Sopenharmony_ci TO_BE_SUBMITTED, 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci /* XXX yes, now I am inconsistent... 7262306a36Sopenharmony_ci * these are not "events" but "actions" 7362306a36Sopenharmony_ci * oh, well... */ 7462306a36Sopenharmony_ci QUEUE_FOR_NET_WRITE, 7562306a36Sopenharmony_ci QUEUE_FOR_NET_READ, 7662306a36Sopenharmony_ci QUEUE_FOR_SEND_OOS, 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci /* An empty flush is queued as P_BARRIER, 7962306a36Sopenharmony_ci * which will cause it to complete "successfully", 8062306a36Sopenharmony_ci * even if the local disk flush failed. 8162306a36Sopenharmony_ci * 8262306a36Sopenharmony_ci * Just like "real" requests, empty flushes (blkdev_issue_flush()) will 8362306a36Sopenharmony_ci * only see an error if neither local nor remote data is reachable. */ 8462306a36Sopenharmony_ci QUEUE_AS_DRBD_BARRIER, 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci SEND_CANCELED, 8762306a36Sopenharmony_ci SEND_FAILED, 8862306a36Sopenharmony_ci HANDED_OVER_TO_NETWORK, 8962306a36Sopenharmony_ci OOS_HANDED_TO_NETWORK, 9062306a36Sopenharmony_ci CONNECTION_LOST_WHILE_PENDING, 9162306a36Sopenharmony_ci READ_RETRY_REMOTE_CANCELED, 9262306a36Sopenharmony_ci RECV_ACKED_BY_PEER, 9362306a36Sopenharmony_ci WRITE_ACKED_BY_PEER, 9462306a36Sopenharmony_ci WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */ 9562306a36Sopenharmony_ci CONFLICT_RESOLVED, 9662306a36Sopenharmony_ci POSTPONE_WRITE, 9762306a36Sopenharmony_ci NEG_ACKED, 9862306a36Sopenharmony_ci BARRIER_ACKED, /* in protocol A and B */ 9962306a36Sopenharmony_ci DATA_RECEIVED, /* (remote read) */ 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci COMPLETED_OK, 10262306a36Sopenharmony_ci READ_COMPLETED_WITH_ERROR, 10362306a36Sopenharmony_ci READ_AHEAD_COMPLETED_WITH_ERROR, 10462306a36Sopenharmony_ci WRITE_COMPLETED_WITH_ERROR, 10562306a36Sopenharmony_ci DISCARD_COMPLETED_NOTSUPP, 10662306a36Sopenharmony_ci DISCARD_COMPLETED_WITH_ERROR, 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci ABORT_DISK_IO, 10962306a36Sopenharmony_ci RESEND, 11062306a36Sopenharmony_ci FAIL_FROZEN_DISK_IO, 11162306a36Sopenharmony_ci RESTART_FROZEN_DISK_IO, 11262306a36Sopenharmony_ci NOTHING, 11362306a36Sopenharmony_ci}; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci/* encoding of request states for now. we don't actually need that many bits. 11662306a36Sopenharmony_ci * we don't need to do atomic bit operations either, since most of the time we 11762306a36Sopenharmony_ci * need to look at the connection state and/or manipulate some lists at the 11862306a36Sopenharmony_ci * same time, so we should hold the request lock anyways. 11962306a36Sopenharmony_ci */ 12062306a36Sopenharmony_cienum drbd_req_state_bits { 12162306a36Sopenharmony_ci /* 3210 12262306a36Sopenharmony_ci * 0000: no local possible 12362306a36Sopenharmony_ci * 0001: to be submitted 12462306a36Sopenharmony_ci * UNUSED, we could map: 011: submitted, completion still pending 12562306a36Sopenharmony_ci * 0110: completed ok 12662306a36Sopenharmony_ci * 0010: completed with error 12762306a36Sopenharmony_ci * 1001: Aborted (before completion) 12862306a36Sopenharmony_ci * 1x10: Aborted and completed -> free 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_ci __RQ_LOCAL_PENDING, 13162306a36Sopenharmony_ci __RQ_LOCAL_COMPLETED, 13262306a36Sopenharmony_ci __RQ_LOCAL_OK, 13362306a36Sopenharmony_ci __RQ_LOCAL_ABORTED, 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci /* 87654 13662306a36Sopenharmony_ci * 00000: no network possible 13762306a36Sopenharmony_ci * 00001: to be send 13862306a36Sopenharmony_ci * 00011: to be send, on worker queue 13962306a36Sopenharmony_ci * 00101: sent, expecting recv_ack (B) or write_ack (C) 14062306a36Sopenharmony_ci * 11101: sent, 14162306a36Sopenharmony_ci * recv_ack (B) or implicit "ack" (A), 14262306a36Sopenharmony_ci * still waiting for the barrier ack. 14362306a36Sopenharmony_ci * master_bio may already be completed and invalidated. 14462306a36Sopenharmony_ci * 11100: write acked (C), 14562306a36Sopenharmony_ci * data received (for remote read, any protocol) 14662306a36Sopenharmony_ci * or finally the barrier ack has arrived (B,A)... 14762306a36Sopenharmony_ci * request can be freed 14862306a36Sopenharmony_ci * 01100: neg-acked (write, protocol C) 14962306a36Sopenharmony_ci * or neg-d-acked (read, any protocol) 15062306a36Sopenharmony_ci * or killed from the transfer log 15162306a36Sopenharmony_ci * during cleanup after connection loss 15262306a36Sopenharmony_ci * request can be freed 15362306a36Sopenharmony_ci * 01000: canceled or send failed... 15462306a36Sopenharmony_ci * request can be freed 15562306a36Sopenharmony_ci */ 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci /* if "SENT" is not set, yet, this can still fail or be canceled. 15862306a36Sopenharmony_ci * if "SENT" is set already, we still wait for an Ack packet. 15962306a36Sopenharmony_ci * when cleared, the master_bio may be completed. 16062306a36Sopenharmony_ci * in (B,A) the request object may still linger on the transaction log 16162306a36Sopenharmony_ci * until the corresponding barrier ack comes in */ 16262306a36Sopenharmony_ci __RQ_NET_PENDING, 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci /* If it is QUEUED, and it is a WRITE, it is also registered in the 16562306a36Sopenharmony_ci * transfer log. Currently we need this flag to avoid conflicts between 16662306a36Sopenharmony_ci * worker canceling the request and tl_clear_barrier killing it from 16762306a36Sopenharmony_ci * transfer log. We should restructure the code so this conflict does 16862306a36Sopenharmony_ci * no longer occur. */ 16962306a36Sopenharmony_ci __RQ_NET_QUEUED, 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci /* well, actually only "handed over to the network stack". 17262306a36Sopenharmony_ci * 17362306a36Sopenharmony_ci * TODO can potentially be dropped because of the similar meaning 17462306a36Sopenharmony_ci * of RQ_NET_SENT and ~RQ_NET_QUEUED. 17562306a36Sopenharmony_ci * however it is not exactly the same. before we drop it 17662306a36Sopenharmony_ci * we must ensure that we can tell a request with network part 17762306a36Sopenharmony_ci * from a request without, regardless of what happens to it. */ 17862306a36Sopenharmony_ci __RQ_NET_SENT, 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci /* when set, the request may be freed (if RQ_NET_QUEUED is clear). 18162306a36Sopenharmony_ci * basically this means the corresponding P_BARRIER_ACK was received */ 18262306a36Sopenharmony_ci __RQ_NET_DONE, 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci /* whether or not we know (C) or pretend (B,A) that the write 18562306a36Sopenharmony_ci * was successfully written on the peer. 18662306a36Sopenharmony_ci */ 18762306a36Sopenharmony_ci __RQ_NET_OK, 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci /* peer called drbd_set_in_sync() for this write */ 19062306a36Sopenharmony_ci __RQ_NET_SIS, 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci /* keep this last, its for the RQ_NET_MASK */ 19362306a36Sopenharmony_ci __RQ_NET_MAX, 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci /* Set when this is a write, clear for a read */ 19662306a36Sopenharmony_ci __RQ_WRITE, 19762306a36Sopenharmony_ci __RQ_WSAME, 19862306a36Sopenharmony_ci __RQ_UNMAP, 19962306a36Sopenharmony_ci __RQ_ZEROES, 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci /* Should call drbd_al_complete_io() for this request... */ 20262306a36Sopenharmony_ci __RQ_IN_ACT_LOG, 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci /* This was the most recent request during some blk_finish_plug() 20562306a36Sopenharmony_ci * or its implicit from-schedule equivalent. 20662306a36Sopenharmony_ci * We may use it as hint to send a P_UNPLUG_REMOTE */ 20762306a36Sopenharmony_ci __RQ_UNPLUG, 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci /* The peer has sent a retry ACK */ 21062306a36Sopenharmony_ci __RQ_POSTPONED, 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci /* would have been completed, 21362306a36Sopenharmony_ci * but was not, because of drbd_suspended() */ 21462306a36Sopenharmony_ci __RQ_COMPLETION_SUSP, 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci /* We expect a receive ACK (wire proto B) */ 21762306a36Sopenharmony_ci __RQ_EXP_RECEIVE_ACK, 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci /* We expect a write ACK (wite proto C) */ 22062306a36Sopenharmony_ci __RQ_EXP_WRITE_ACK, 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci /* waiting for a barrier ack, did an extra kref_get */ 22362306a36Sopenharmony_ci __RQ_EXP_BARR_ACK, 22462306a36Sopenharmony_ci}; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) 22762306a36Sopenharmony_ci#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) 22862306a36Sopenharmony_ci#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) 22962306a36Sopenharmony_ci#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) 23462306a36Sopenharmony_ci#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) 23562306a36Sopenharmony_ci#define RQ_NET_SENT (1UL << __RQ_NET_SENT) 23662306a36Sopenharmony_ci#define RQ_NET_DONE (1UL << __RQ_NET_DONE) 23762306a36Sopenharmony_ci#define RQ_NET_OK (1UL << __RQ_NET_OK) 23862306a36Sopenharmony_ci#define RQ_NET_SIS (1UL << __RQ_NET_SIS) 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci#define RQ_WRITE (1UL << __RQ_WRITE) 24362306a36Sopenharmony_ci#define RQ_WSAME (1UL << __RQ_WSAME) 24462306a36Sopenharmony_ci#define RQ_UNMAP (1UL << __RQ_UNMAP) 24562306a36Sopenharmony_ci#define RQ_ZEROES (1UL << __RQ_ZEROES) 24662306a36Sopenharmony_ci#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) 24762306a36Sopenharmony_ci#define RQ_UNPLUG (1UL << __RQ_UNPLUG) 24862306a36Sopenharmony_ci#define RQ_POSTPONED (1UL << __RQ_POSTPONED) 24962306a36Sopenharmony_ci#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP) 25062306a36Sopenharmony_ci#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK) 25162306a36Sopenharmony_ci#define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK) 25262306a36Sopenharmony_ci#define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK) 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci/* For waking up the frozen transfer log mod_req() has to return if the request 25562306a36Sopenharmony_ci should be counted in the epoch object*/ 25662306a36Sopenharmony_ci#define MR_WRITE 1 25762306a36Sopenharmony_ci#define MR_READ 2 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci/* Short lived temporary struct on the stack. 26062306a36Sopenharmony_ci * We could squirrel the error to be returned into 26162306a36Sopenharmony_ci * bio->bi_iter.bi_size, or similar. But that would be too ugly. */ 26262306a36Sopenharmony_cistruct bio_and_error { 26362306a36Sopenharmony_ci struct bio *bio; 26462306a36Sopenharmony_ci int error; 26562306a36Sopenharmony_ci}; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ciextern void start_new_tl_epoch(struct drbd_connection *connection); 26862306a36Sopenharmony_ciextern void drbd_req_destroy(struct kref *kref); 26962306a36Sopenharmony_ciextern int __req_mod(struct drbd_request *req, enum drbd_req_event what, 27062306a36Sopenharmony_ci struct drbd_peer_device *peer_device, 27162306a36Sopenharmony_ci struct bio_and_error *m); 27262306a36Sopenharmony_ciextern void complete_master_bio(struct drbd_device *device, 27362306a36Sopenharmony_ci struct bio_and_error *m); 27462306a36Sopenharmony_ciextern void request_timer_fn(struct timer_list *t); 27562306a36Sopenharmony_ciextern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what); 27662306a36Sopenharmony_ciextern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what); 27762306a36Sopenharmony_ciextern void tl_abort_disk_io(struct drbd_device *device); 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci/* this is in drbd_main.c */ 28062306a36Sopenharmony_ciextern void drbd_restart_request(struct drbd_request *req); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci/* use this if you don't want to deal with calling complete_master_bio() 28362306a36Sopenharmony_ci * outside the spinlock, e.g. when walking some list on cleanup. */ 28462306a36Sopenharmony_cistatic inline int _req_mod(struct drbd_request *req, enum drbd_req_event what, 28562306a36Sopenharmony_ci struct drbd_peer_device *peer_device) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci struct drbd_device *device = req->device; 28862306a36Sopenharmony_ci struct bio_and_error m; 28962306a36Sopenharmony_ci int rv; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci /* __req_mod possibly frees req, do not touch req after that! */ 29262306a36Sopenharmony_ci rv = __req_mod(req, what, peer_device, &m); 29362306a36Sopenharmony_ci if (m.bio) 29462306a36Sopenharmony_ci complete_master_bio(device, &m); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci return rv; 29762306a36Sopenharmony_ci} 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci/* completion of master bio is outside of our spinlock. 30062306a36Sopenharmony_ci * We still may or may not be inside some irqs disabled section 30162306a36Sopenharmony_ci * of the lower level driver completion callback, so we need to 30262306a36Sopenharmony_ci * spin_lock_irqsave here. */ 30362306a36Sopenharmony_cistatic inline int req_mod(struct drbd_request *req, 30462306a36Sopenharmony_ci enum drbd_req_event what, 30562306a36Sopenharmony_ci struct drbd_peer_device *peer_device) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci unsigned long flags; 30862306a36Sopenharmony_ci struct drbd_device *device = req->device; 30962306a36Sopenharmony_ci struct bio_and_error m; 31062306a36Sopenharmony_ci int rv; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci spin_lock_irqsave(&device->resource->req_lock, flags); 31362306a36Sopenharmony_ci rv = __req_mod(req, what, peer_device, &m); 31462306a36Sopenharmony_ci spin_unlock_irqrestore(&device->resource->req_lock, flags); 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci if (m.bio) 31762306a36Sopenharmony_ci complete_master_bio(device, &m); 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci return rv; 32062306a36Sopenharmony_ci} 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ciextern bool drbd_should_do_remote(union drbd_dev_state); 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci#endif 325