162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci drbd.c 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 862306a36Sopenharmony_ci Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 962306a36Sopenharmony_ci Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev 1262306a36Sopenharmony_ci from Logicworks, Inc. for making SDP replication support possible. 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci */ 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include <linux/module.h> 2062306a36Sopenharmony_ci#include <linux/jiffies.h> 2162306a36Sopenharmony_ci#include <linux/drbd.h> 2262306a36Sopenharmony_ci#include <linux/uaccess.h> 2362306a36Sopenharmony_ci#include <asm/types.h> 2462306a36Sopenharmony_ci#include <net/sock.h> 2562306a36Sopenharmony_ci#include <linux/ctype.h> 2662306a36Sopenharmony_ci#include <linux/mutex.h> 2762306a36Sopenharmony_ci#include <linux/fs.h> 2862306a36Sopenharmony_ci#include <linux/file.h> 2962306a36Sopenharmony_ci#include <linux/proc_fs.h> 3062306a36Sopenharmony_ci#include <linux/init.h> 3162306a36Sopenharmony_ci#include <linux/mm.h> 3262306a36Sopenharmony_ci#include <linux/memcontrol.h> 3362306a36Sopenharmony_ci#include <linux/mm_inline.h> 3462306a36Sopenharmony_ci#include <linux/slab.h> 3562306a36Sopenharmony_ci#include <linux/random.h> 3662306a36Sopenharmony_ci#include <linux/reboot.h> 3762306a36Sopenharmony_ci#include <linux/notifier.h> 3862306a36Sopenharmony_ci#include <linux/kthread.h> 3962306a36Sopenharmony_ci#include <linux/workqueue.h> 4062306a36Sopenharmony_ci#include <linux/unistd.h> 4162306a36Sopenharmony_ci#include <linux/vmalloc.h> 4262306a36Sopenharmony_ci#include <linux/sched/signal.h> 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci#include <linux/drbd_limits.h> 4562306a36Sopenharmony_ci#include "drbd_int.h" 4662306a36Sopenharmony_ci#include "drbd_protocol.h" 4762306a36Sopenharmony_ci#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ 4862306a36Sopenharmony_ci#include "drbd_vli.h" 4962306a36Sopenharmony_ci#include "drbd_debugfs.h" 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic DEFINE_MUTEX(drbd_main_mutex); 5262306a36Sopenharmony_cistatic int drbd_open(struct gendisk *disk, blk_mode_t mode); 5362306a36Sopenharmony_cistatic void drbd_release(struct gendisk *gd); 5462306a36Sopenharmony_cistatic void md_sync_timer_fn(struct timer_list *t); 5562306a36Sopenharmony_cistatic int w_bitmap_io(struct drbd_work *w, int unused); 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ciMODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 5862306a36Sopenharmony_ci "Lars Ellenberg <lars@linbit.com>"); 5962306a36Sopenharmony_ciMODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); 6062306a36Sopenharmony_ciMODULE_VERSION(REL_VERSION); 6162306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 6262306a36Sopenharmony_ciMODULE_PARM_DESC(minor_count, "Approximate number of drbd devices (" 6362306a36Sopenharmony_ci __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")"); 6462306a36Sopenharmony_ciMODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci#include <linux/moduleparam.h> 6762306a36Sopenharmony_ci/* thanks to these macros, if compiled into the kernel (not-module), 6862306a36Sopenharmony_ci * these become boot parameters (e.g., drbd.minor_count) */ 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci#ifdef CONFIG_DRBD_FAULT_INJECTION 7162306a36Sopenharmony_ciint drbd_enable_faults; 7262306a36Sopenharmony_ciint drbd_fault_rate; 7362306a36Sopenharmony_cistatic int drbd_fault_count; 7462306a36Sopenharmony_cistatic int drbd_fault_devs; 7562306a36Sopenharmony_ci/* bitmap of enabled faults */ 7662306a36Sopenharmony_cimodule_param_named(enable_faults, drbd_enable_faults, int, 0664); 7762306a36Sopenharmony_ci/* fault rate % value - applies to all enabled faults */ 7862306a36Sopenharmony_cimodule_param_named(fault_rate, drbd_fault_rate, int, 0664); 7962306a36Sopenharmony_ci/* count of faults inserted */ 8062306a36Sopenharmony_cimodule_param_named(fault_count, drbd_fault_count, int, 0664); 8162306a36Sopenharmony_ci/* bitmap of devices to insert faults on */ 8262306a36Sopenharmony_cimodule_param_named(fault_devs, drbd_fault_devs, int, 0644); 8362306a36Sopenharmony_ci#endif 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci/* module parameters we can keep static */ 8662306a36Sopenharmony_cistatic bool drbd_allow_oos; /* allow_open_on_secondary */ 8762306a36Sopenharmony_cistatic bool drbd_disable_sendpage; 8862306a36Sopenharmony_ciMODULE_PARM_DESC(allow_oos, "DONT USE!"); 8962306a36Sopenharmony_cimodule_param_named(allow_oos, drbd_allow_oos, bool, 0); 9062306a36Sopenharmony_cimodule_param_named(disable_sendpage, drbd_disable_sendpage, bool, 0644); 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci/* module parameters we share */ 9362306a36Sopenharmony_ciint drbd_proc_details; /* Detail level in proc drbd*/ 9462306a36Sopenharmony_cimodule_param_named(proc_details, drbd_proc_details, int, 0644); 9562306a36Sopenharmony_ci/* module parameters shared with defaults */ 9662306a36Sopenharmony_ciunsigned int drbd_minor_count = DRBD_MINOR_COUNT_DEF; 9762306a36Sopenharmony_ci/* Module parameter for setting the user mode helper program 9862306a36Sopenharmony_ci * to run. Default is /sbin/drbdadm */ 9962306a36Sopenharmony_cichar drbd_usermode_helper[80] = "/sbin/drbdadm"; 10062306a36Sopenharmony_cimodule_param_named(minor_count, drbd_minor_count, uint, 0444); 10162306a36Sopenharmony_cimodule_param_string(usermode_helper, drbd_usermode_helper, sizeof(drbd_usermode_helper), 0644); 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci/* in 2.6.x, our device mapping and config info contains our virtual gendisks 10462306a36Sopenharmony_ci * as member "struct gendisk *vdisk;" 10562306a36Sopenharmony_ci */ 10662306a36Sopenharmony_cistruct idr drbd_devices; 10762306a36Sopenharmony_cistruct list_head drbd_resources; 10862306a36Sopenharmony_cistruct mutex resources_mutex; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_cistruct kmem_cache *drbd_request_cache; 11162306a36Sopenharmony_cistruct kmem_cache *drbd_ee_cache; /* peer requests */ 11262306a36Sopenharmony_cistruct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ 11362306a36Sopenharmony_cistruct kmem_cache *drbd_al_ext_cache; /* activity log extents */ 11462306a36Sopenharmony_cimempool_t drbd_request_mempool; 11562306a36Sopenharmony_cimempool_t drbd_ee_mempool; 11662306a36Sopenharmony_cimempool_t drbd_md_io_page_pool; 11762306a36Sopenharmony_cistruct bio_set drbd_md_io_bio_set; 11862306a36Sopenharmony_cistruct bio_set drbd_io_bio_set; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci/* I do not use a standard mempool, because: 12162306a36Sopenharmony_ci 1) I want to hand out the pre-allocated objects first. 12262306a36Sopenharmony_ci 2) I want to be able to interrupt sleeping allocation with a signal. 12362306a36Sopenharmony_ci Note: This is a single linked list, the next pointer is the private 12462306a36Sopenharmony_ci member of struct page. 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_cistruct page *drbd_pp_pool; 12762306a36Sopenharmony_ciDEFINE_SPINLOCK(drbd_pp_lock); 12862306a36Sopenharmony_ciint drbd_pp_vacant; 12962306a36Sopenharmony_ciwait_queue_head_t drbd_pp_wait; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ciDEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic const struct block_device_operations drbd_ops = { 13462306a36Sopenharmony_ci .owner = THIS_MODULE, 13562306a36Sopenharmony_ci .submit_bio = drbd_submit_bio, 13662306a36Sopenharmony_ci .open = drbd_open, 13762306a36Sopenharmony_ci .release = drbd_release, 13862306a36Sopenharmony_ci}; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci#ifdef __CHECKER__ 14162306a36Sopenharmony_ci/* When checking with sparse, and this is an inline function, sparse will 14262306a36Sopenharmony_ci give tons of false positives. When this is a real functions sparse works. 14362306a36Sopenharmony_ci */ 14462306a36Sopenharmony_ciint _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins) 14562306a36Sopenharmony_ci{ 14662306a36Sopenharmony_ci int io_allowed; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci atomic_inc(&device->local_cnt); 14962306a36Sopenharmony_ci io_allowed = (device->state.disk >= mins); 15062306a36Sopenharmony_ci if (!io_allowed) { 15162306a36Sopenharmony_ci if (atomic_dec_and_test(&device->local_cnt)) 15262306a36Sopenharmony_ci wake_up(&device->misc_wait); 15362306a36Sopenharmony_ci } 15462306a36Sopenharmony_ci return io_allowed; 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci#endif 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci/** 16062306a36Sopenharmony_ci * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch 16162306a36Sopenharmony_ci * @connection: DRBD connection. 16262306a36Sopenharmony_ci * @barrier_nr: Expected identifier of the DRBD write barrier packet. 16362306a36Sopenharmony_ci * @set_size: Expected number of requests before that barrier. 16462306a36Sopenharmony_ci * 16562306a36Sopenharmony_ci * In case the passed barrier_nr or set_size does not match the oldest 16662306a36Sopenharmony_ci * epoch of not yet barrier-acked requests, this function will cause a 16762306a36Sopenharmony_ci * termination of the connection. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_civoid tl_release(struct drbd_connection *connection, unsigned int barrier_nr, 17062306a36Sopenharmony_ci unsigned int set_size) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci struct drbd_request *r; 17362306a36Sopenharmony_ci struct drbd_request *req = NULL, *tmp = NULL; 17462306a36Sopenharmony_ci int expect_epoch = 0; 17562306a36Sopenharmony_ci int expect_size = 0; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci spin_lock_irq(&connection->resource->req_lock); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci /* find oldest not yet barrier-acked write request, 18062306a36Sopenharmony_ci * count writes in its epoch. */ 18162306a36Sopenharmony_ci list_for_each_entry(r, &connection->transfer_log, tl_requests) { 18262306a36Sopenharmony_ci const unsigned s = r->rq_state; 18362306a36Sopenharmony_ci if (!req) { 18462306a36Sopenharmony_ci if (!(s & RQ_WRITE)) 18562306a36Sopenharmony_ci continue; 18662306a36Sopenharmony_ci if (!(s & RQ_NET_MASK)) 18762306a36Sopenharmony_ci continue; 18862306a36Sopenharmony_ci if (s & RQ_NET_DONE) 18962306a36Sopenharmony_ci continue; 19062306a36Sopenharmony_ci req = r; 19162306a36Sopenharmony_ci expect_epoch = req->epoch; 19262306a36Sopenharmony_ci expect_size ++; 19362306a36Sopenharmony_ci } else { 19462306a36Sopenharmony_ci if (r->epoch != expect_epoch) 19562306a36Sopenharmony_ci break; 19662306a36Sopenharmony_ci if (!(s & RQ_WRITE)) 19762306a36Sopenharmony_ci continue; 19862306a36Sopenharmony_ci /* if (s & RQ_DONE): not expected */ 19962306a36Sopenharmony_ci /* if (!(s & RQ_NET_MASK)): not expected */ 20062306a36Sopenharmony_ci expect_size++; 20162306a36Sopenharmony_ci } 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci /* first some paranoia code */ 20562306a36Sopenharmony_ci if (req == NULL) { 20662306a36Sopenharmony_ci drbd_err(connection, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", 20762306a36Sopenharmony_ci barrier_nr); 20862306a36Sopenharmony_ci goto bail; 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci if (expect_epoch != barrier_nr) { 21162306a36Sopenharmony_ci drbd_err(connection, "BAD! BarrierAck #%u received, expected #%u!\n", 21262306a36Sopenharmony_ci barrier_nr, expect_epoch); 21362306a36Sopenharmony_ci goto bail; 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci if (expect_size != set_size) { 21762306a36Sopenharmony_ci drbd_err(connection, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", 21862306a36Sopenharmony_ci barrier_nr, set_size, expect_size); 21962306a36Sopenharmony_ci goto bail; 22062306a36Sopenharmony_ci } 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci /* Clean up list of requests processed during current epoch. */ 22362306a36Sopenharmony_ci /* this extra list walk restart is paranoia, 22462306a36Sopenharmony_ci * to catch requests being barrier-acked "unexpectedly". 22562306a36Sopenharmony_ci * It usually should find the same req again, or some READ preceding it. */ 22662306a36Sopenharmony_ci list_for_each_entry(req, &connection->transfer_log, tl_requests) 22762306a36Sopenharmony_ci if (req->epoch == expect_epoch) { 22862306a36Sopenharmony_ci tmp = req; 22962306a36Sopenharmony_ci break; 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); 23262306a36Sopenharmony_ci list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { 23362306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 23462306a36Sopenharmony_ci if (req->epoch != expect_epoch) 23562306a36Sopenharmony_ci break; 23662306a36Sopenharmony_ci peer_device = conn_peer_device(connection, req->device->vnr); 23762306a36Sopenharmony_ci _req_mod(req, BARRIER_ACKED, peer_device); 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci return; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_cibail: 24462306a36Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 24562306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci/** 25062306a36Sopenharmony_ci * _tl_restart() - Walks the transfer log, and applies an action to all requests 25162306a36Sopenharmony_ci * @connection: DRBD connection to operate on. 25262306a36Sopenharmony_ci * @what: The action/event to perform with all request objects 25362306a36Sopenharmony_ci * 25462306a36Sopenharmony_ci * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, 25562306a36Sopenharmony_ci * RESTART_FROZEN_DISK_IO. 25662306a36Sopenharmony_ci */ 25762306a36Sopenharmony_ci/* must hold resource->req_lock */ 25862306a36Sopenharmony_civoid _tl_restart(struct drbd_connection *connection, enum drbd_req_event what) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 26162306a36Sopenharmony_ci struct drbd_request *req, *r; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) { 26462306a36Sopenharmony_ci peer_device = conn_peer_device(connection, req->device->vnr); 26562306a36Sopenharmony_ci _req_mod(req, what, peer_device); 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_civoid tl_restart(struct drbd_connection *connection, enum drbd_req_event what) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci spin_lock_irq(&connection->resource->req_lock); 27262306a36Sopenharmony_ci _tl_restart(connection, what); 27362306a36Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci/** 27762306a36Sopenharmony_ci * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL 27862306a36Sopenharmony_ci * @connection: DRBD connection. 27962306a36Sopenharmony_ci * 28062306a36Sopenharmony_ci * This is called after the connection to the peer was lost. The storage covered 28162306a36Sopenharmony_ci * by the requests on the transfer gets marked as our of sync. Called from the 28262306a36Sopenharmony_ci * receiver thread and the worker thread. 28362306a36Sopenharmony_ci */ 28462306a36Sopenharmony_civoid tl_clear(struct drbd_connection *connection) 28562306a36Sopenharmony_ci{ 28662306a36Sopenharmony_ci tl_restart(connection, CONNECTION_LOST_WHILE_PENDING); 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci/** 29062306a36Sopenharmony_ci * tl_abort_disk_io() - Abort disk I/O for all requests for a certain device in the TL 29162306a36Sopenharmony_ci * @device: DRBD device. 29262306a36Sopenharmony_ci */ 29362306a36Sopenharmony_civoid tl_abort_disk_io(struct drbd_device *device) 29462306a36Sopenharmony_ci{ 29562306a36Sopenharmony_ci struct drbd_connection *connection = first_peer_device(device)->connection; 29662306a36Sopenharmony_ci struct drbd_request *req, *r; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci spin_lock_irq(&connection->resource->req_lock); 29962306a36Sopenharmony_ci list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) { 30062306a36Sopenharmony_ci if (!(req->rq_state & RQ_LOCAL_PENDING)) 30162306a36Sopenharmony_ci continue; 30262306a36Sopenharmony_ci if (req->device != device) 30362306a36Sopenharmony_ci continue; 30462306a36Sopenharmony_ci _req_mod(req, ABORT_DISK_IO, NULL); 30562306a36Sopenharmony_ci } 30662306a36Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 30762306a36Sopenharmony_ci} 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_cistatic int drbd_thread_setup(void *arg) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci struct drbd_thread *thi = (struct drbd_thread *) arg; 31262306a36Sopenharmony_ci struct drbd_resource *resource = thi->resource; 31362306a36Sopenharmony_ci unsigned long flags; 31462306a36Sopenharmony_ci int retval; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s", 31762306a36Sopenharmony_ci thi->name[0], 31862306a36Sopenharmony_ci resource->name); 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci allow_kernel_signal(DRBD_SIGKILL); 32162306a36Sopenharmony_ci allow_kernel_signal(SIGXCPU); 32262306a36Sopenharmony_cirestart: 32362306a36Sopenharmony_ci retval = thi->function(thi); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci spin_lock_irqsave(&thi->t_lock, flags); 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci /* if the receiver has been "EXITING", the last thing it did 32862306a36Sopenharmony_ci * was set the conn state to "StandAlone", 32962306a36Sopenharmony_ci * if now a re-connect request comes in, conn state goes C_UNCONNECTED, 33062306a36Sopenharmony_ci * and receiver thread will be "started". 33162306a36Sopenharmony_ci * drbd_thread_start needs to set "RESTARTING" in that case. 33262306a36Sopenharmony_ci * t_state check and assignment needs to be within the same spinlock, 33362306a36Sopenharmony_ci * so either thread_start sees EXITING, and can remap to RESTARTING, 33462306a36Sopenharmony_ci * or thread_start see NONE, and can proceed as normal. 33562306a36Sopenharmony_ci */ 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci if (thi->t_state == RESTARTING) { 33862306a36Sopenharmony_ci drbd_info(resource, "Restarting %s thread\n", thi->name); 33962306a36Sopenharmony_ci thi->t_state = RUNNING; 34062306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 34162306a36Sopenharmony_ci goto restart; 34262306a36Sopenharmony_ci } 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci thi->task = NULL; 34562306a36Sopenharmony_ci thi->t_state = NONE; 34662306a36Sopenharmony_ci smp_mb(); 34762306a36Sopenharmony_ci complete_all(&thi->stop); 34862306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci drbd_info(resource, "Terminating %s\n", current->comm); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci /* Release mod reference taken when thread was started */ 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (thi->connection) 35562306a36Sopenharmony_ci kref_put(&thi->connection->kref, drbd_destroy_connection); 35662306a36Sopenharmony_ci kref_put(&resource->kref, drbd_destroy_resource); 35762306a36Sopenharmony_ci module_put(THIS_MODULE); 35862306a36Sopenharmony_ci return retval; 35962306a36Sopenharmony_ci} 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_cistatic void drbd_thread_init(struct drbd_resource *resource, struct drbd_thread *thi, 36262306a36Sopenharmony_ci int (*func) (struct drbd_thread *), const char *name) 36362306a36Sopenharmony_ci{ 36462306a36Sopenharmony_ci spin_lock_init(&thi->t_lock); 36562306a36Sopenharmony_ci thi->task = NULL; 36662306a36Sopenharmony_ci thi->t_state = NONE; 36762306a36Sopenharmony_ci thi->function = func; 36862306a36Sopenharmony_ci thi->resource = resource; 36962306a36Sopenharmony_ci thi->connection = NULL; 37062306a36Sopenharmony_ci thi->name = name; 37162306a36Sopenharmony_ci} 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ciint drbd_thread_start(struct drbd_thread *thi) 37462306a36Sopenharmony_ci{ 37562306a36Sopenharmony_ci struct drbd_resource *resource = thi->resource; 37662306a36Sopenharmony_ci struct task_struct *nt; 37762306a36Sopenharmony_ci unsigned long flags; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci /* is used from state engine doing drbd_thread_stop_nowait, 38062306a36Sopenharmony_ci * while holding the req lock irqsave */ 38162306a36Sopenharmony_ci spin_lock_irqsave(&thi->t_lock, flags); 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci switch (thi->t_state) { 38462306a36Sopenharmony_ci case NONE: 38562306a36Sopenharmony_ci drbd_info(resource, "Starting %s thread (from %s [%d])\n", 38662306a36Sopenharmony_ci thi->name, current->comm, current->pid); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci /* Get ref on module for thread - this is released when thread exits */ 38962306a36Sopenharmony_ci if (!try_module_get(THIS_MODULE)) { 39062306a36Sopenharmony_ci drbd_err(resource, "Failed to get module reference in drbd_thread_start\n"); 39162306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 39262306a36Sopenharmony_ci return false; 39362306a36Sopenharmony_ci } 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci kref_get(&resource->kref); 39662306a36Sopenharmony_ci if (thi->connection) 39762306a36Sopenharmony_ci kref_get(&thi->connection->kref); 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci init_completion(&thi->stop); 40062306a36Sopenharmony_ci thi->reset_cpu_mask = 1; 40162306a36Sopenharmony_ci thi->t_state = RUNNING; 40262306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 40362306a36Sopenharmony_ci flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci nt = kthread_create(drbd_thread_setup, (void *) thi, 40662306a36Sopenharmony_ci "drbd_%c_%s", thi->name[0], thi->resource->name); 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci if (IS_ERR(nt)) { 40962306a36Sopenharmony_ci drbd_err(resource, "Couldn't start thread\n"); 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci if (thi->connection) 41262306a36Sopenharmony_ci kref_put(&thi->connection->kref, drbd_destroy_connection); 41362306a36Sopenharmony_ci kref_put(&resource->kref, drbd_destroy_resource); 41462306a36Sopenharmony_ci module_put(THIS_MODULE); 41562306a36Sopenharmony_ci return false; 41662306a36Sopenharmony_ci } 41762306a36Sopenharmony_ci spin_lock_irqsave(&thi->t_lock, flags); 41862306a36Sopenharmony_ci thi->task = nt; 41962306a36Sopenharmony_ci thi->t_state = RUNNING; 42062306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 42162306a36Sopenharmony_ci wake_up_process(nt); 42262306a36Sopenharmony_ci break; 42362306a36Sopenharmony_ci case EXITING: 42462306a36Sopenharmony_ci thi->t_state = RESTARTING; 42562306a36Sopenharmony_ci drbd_info(resource, "Restarting %s thread (from %s [%d])\n", 42662306a36Sopenharmony_ci thi->name, current->comm, current->pid); 42762306a36Sopenharmony_ci fallthrough; 42862306a36Sopenharmony_ci case RUNNING: 42962306a36Sopenharmony_ci case RESTARTING: 43062306a36Sopenharmony_ci default: 43162306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 43262306a36Sopenharmony_ci break; 43362306a36Sopenharmony_ci } 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci return true; 43662306a36Sopenharmony_ci} 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_civoid _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) 44062306a36Sopenharmony_ci{ 44162306a36Sopenharmony_ci unsigned long flags; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci enum drbd_thread_state ns = restart ? RESTARTING : EXITING; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci /* may be called from state engine, holding the req lock irqsave */ 44662306a36Sopenharmony_ci spin_lock_irqsave(&thi->t_lock, flags); 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci if (thi->t_state == NONE) { 44962306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 45062306a36Sopenharmony_ci if (restart) 45162306a36Sopenharmony_ci drbd_thread_start(thi); 45262306a36Sopenharmony_ci return; 45362306a36Sopenharmony_ci } 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci if (thi->t_state != ns) { 45662306a36Sopenharmony_ci if (thi->task == NULL) { 45762306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 45862306a36Sopenharmony_ci return; 45962306a36Sopenharmony_ci } 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci thi->t_state = ns; 46262306a36Sopenharmony_ci smp_mb(); 46362306a36Sopenharmony_ci init_completion(&thi->stop); 46462306a36Sopenharmony_ci if (thi->task != current) 46562306a36Sopenharmony_ci send_sig(DRBD_SIGKILL, thi->task, 1); 46662306a36Sopenharmony_ci } 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci spin_unlock_irqrestore(&thi->t_lock, flags); 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci if (wait) 47162306a36Sopenharmony_ci wait_for_completion(&thi->stop); 47262306a36Sopenharmony_ci} 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ciint conn_lowest_minor(struct drbd_connection *connection) 47562306a36Sopenharmony_ci{ 47662306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 47762306a36Sopenharmony_ci int vnr = 0, minor = -1; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci rcu_read_lock(); 48062306a36Sopenharmony_ci peer_device = idr_get_next(&connection->peer_devices, &vnr); 48162306a36Sopenharmony_ci if (peer_device) 48262306a36Sopenharmony_ci minor = device_to_minor(peer_device->device); 48362306a36Sopenharmony_ci rcu_read_unlock(); 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci return minor; 48662306a36Sopenharmony_ci} 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci#ifdef CONFIG_SMP 48962306a36Sopenharmony_ci/* 49062306a36Sopenharmony_ci * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs 49162306a36Sopenharmony_ci * 49262306a36Sopenharmony_ci * Forces all threads of a resource onto the same CPU. This is beneficial for 49362306a36Sopenharmony_ci * DRBD's performance. May be overwritten by user's configuration. 49462306a36Sopenharmony_ci */ 49562306a36Sopenharmony_cistatic void drbd_calc_cpu_mask(cpumask_var_t *cpu_mask) 49662306a36Sopenharmony_ci{ 49762306a36Sopenharmony_ci unsigned int *resources_per_cpu, min_index = ~0; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci resources_per_cpu = kcalloc(nr_cpu_ids, sizeof(*resources_per_cpu), 50062306a36Sopenharmony_ci GFP_KERNEL); 50162306a36Sopenharmony_ci if (resources_per_cpu) { 50262306a36Sopenharmony_ci struct drbd_resource *resource; 50362306a36Sopenharmony_ci unsigned int cpu, min = ~0; 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci rcu_read_lock(); 50662306a36Sopenharmony_ci for_each_resource_rcu(resource, &drbd_resources) { 50762306a36Sopenharmony_ci for_each_cpu(cpu, resource->cpu_mask) 50862306a36Sopenharmony_ci resources_per_cpu[cpu]++; 50962306a36Sopenharmony_ci } 51062306a36Sopenharmony_ci rcu_read_unlock(); 51162306a36Sopenharmony_ci for_each_online_cpu(cpu) { 51262306a36Sopenharmony_ci if (resources_per_cpu[cpu] < min) { 51362306a36Sopenharmony_ci min = resources_per_cpu[cpu]; 51462306a36Sopenharmony_ci min_index = cpu; 51562306a36Sopenharmony_ci } 51662306a36Sopenharmony_ci } 51762306a36Sopenharmony_ci kfree(resources_per_cpu); 51862306a36Sopenharmony_ci } 51962306a36Sopenharmony_ci if (min_index == ~0) { 52062306a36Sopenharmony_ci cpumask_setall(*cpu_mask); 52162306a36Sopenharmony_ci return; 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci cpumask_set_cpu(min_index, *cpu_mask); 52462306a36Sopenharmony_ci} 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci/** 52762306a36Sopenharmony_ci * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread 52862306a36Sopenharmony_ci * @thi: drbd_thread object 52962306a36Sopenharmony_ci * 53062306a36Sopenharmony_ci * call in the "main loop" of _all_ threads, no need for any mutex, current won't die 53162306a36Sopenharmony_ci * prematurely. 53262306a36Sopenharmony_ci */ 53362306a36Sopenharmony_civoid drbd_thread_current_set_cpu(struct drbd_thread *thi) 53462306a36Sopenharmony_ci{ 53562306a36Sopenharmony_ci struct drbd_resource *resource = thi->resource; 53662306a36Sopenharmony_ci struct task_struct *p = current; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci if (!thi->reset_cpu_mask) 53962306a36Sopenharmony_ci return; 54062306a36Sopenharmony_ci thi->reset_cpu_mask = 0; 54162306a36Sopenharmony_ci set_cpus_allowed_ptr(p, resource->cpu_mask); 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci#else 54462306a36Sopenharmony_ci#define drbd_calc_cpu_mask(A) ({}) 54562306a36Sopenharmony_ci#endif 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci/* 54862306a36Sopenharmony_ci * drbd_header_size - size of a packet header 54962306a36Sopenharmony_ci * 55062306a36Sopenharmony_ci * The header size is a multiple of 8, so any payload following the header is 55162306a36Sopenharmony_ci * word aligned on 64-bit architectures. (The bitmap send and receive code 55262306a36Sopenharmony_ci * relies on this.) 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_ciunsigned int drbd_header_size(struct drbd_connection *connection) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci if (connection->agreed_pro_version >= 100) { 55762306a36Sopenharmony_ci BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8)); 55862306a36Sopenharmony_ci return sizeof(struct p_header100); 55962306a36Sopenharmony_ci } else { 56062306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct p_header80) != 56162306a36Sopenharmony_ci sizeof(struct p_header95)); 56262306a36Sopenharmony_ci BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); 56362306a36Sopenharmony_ci return sizeof(struct p_header80); 56462306a36Sopenharmony_ci } 56562306a36Sopenharmony_ci} 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_cistatic unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci h->magic = cpu_to_be32(DRBD_MAGIC); 57062306a36Sopenharmony_ci h->command = cpu_to_be16(cmd); 57162306a36Sopenharmony_ci h->length = cpu_to_be16(size); 57262306a36Sopenharmony_ci return sizeof(struct p_header80); 57362306a36Sopenharmony_ci} 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_cistatic unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci h->magic = cpu_to_be16(DRBD_MAGIC_BIG); 57862306a36Sopenharmony_ci h->command = cpu_to_be16(cmd); 57962306a36Sopenharmony_ci h->length = cpu_to_be32(size); 58062306a36Sopenharmony_ci return sizeof(struct p_header95); 58162306a36Sopenharmony_ci} 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_cistatic unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd, 58462306a36Sopenharmony_ci int size, int vnr) 58562306a36Sopenharmony_ci{ 58662306a36Sopenharmony_ci h->magic = cpu_to_be32(DRBD_MAGIC_100); 58762306a36Sopenharmony_ci h->volume = cpu_to_be16(vnr); 58862306a36Sopenharmony_ci h->command = cpu_to_be16(cmd); 58962306a36Sopenharmony_ci h->length = cpu_to_be32(size); 59062306a36Sopenharmony_ci h->pad = 0; 59162306a36Sopenharmony_ci return sizeof(struct p_header100); 59262306a36Sopenharmony_ci} 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_cistatic unsigned int prepare_header(struct drbd_connection *connection, int vnr, 59562306a36Sopenharmony_ci void *buffer, enum drbd_packet cmd, int size) 59662306a36Sopenharmony_ci{ 59762306a36Sopenharmony_ci if (connection->agreed_pro_version >= 100) 59862306a36Sopenharmony_ci return prepare_header100(buffer, cmd, size, vnr); 59962306a36Sopenharmony_ci else if (connection->agreed_pro_version >= 95 && 60062306a36Sopenharmony_ci size > DRBD_MAX_SIZE_H80_PACKET) 60162306a36Sopenharmony_ci return prepare_header95(buffer, cmd, size); 60262306a36Sopenharmony_ci else 60362306a36Sopenharmony_ci return prepare_header80(buffer, cmd, size); 60462306a36Sopenharmony_ci} 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_cistatic void *__conn_prepare_command(struct drbd_connection *connection, 60762306a36Sopenharmony_ci struct drbd_socket *sock) 60862306a36Sopenharmony_ci{ 60962306a36Sopenharmony_ci if (!sock->socket) 61062306a36Sopenharmony_ci return NULL; 61162306a36Sopenharmony_ci return sock->sbuf + drbd_header_size(connection); 61262306a36Sopenharmony_ci} 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_civoid *conn_prepare_command(struct drbd_connection *connection, struct drbd_socket *sock) 61562306a36Sopenharmony_ci{ 61662306a36Sopenharmony_ci void *p; 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci mutex_lock(&sock->mutex); 61962306a36Sopenharmony_ci p = __conn_prepare_command(connection, sock); 62062306a36Sopenharmony_ci if (!p) 62162306a36Sopenharmony_ci mutex_unlock(&sock->mutex); 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci return p; 62462306a36Sopenharmony_ci} 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_civoid *drbd_prepare_command(struct drbd_peer_device *peer_device, struct drbd_socket *sock) 62762306a36Sopenharmony_ci{ 62862306a36Sopenharmony_ci return conn_prepare_command(peer_device->connection, sock); 62962306a36Sopenharmony_ci} 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_cistatic int __send_command(struct drbd_connection *connection, int vnr, 63262306a36Sopenharmony_ci struct drbd_socket *sock, enum drbd_packet cmd, 63362306a36Sopenharmony_ci unsigned int header_size, void *data, 63462306a36Sopenharmony_ci unsigned int size) 63562306a36Sopenharmony_ci{ 63662306a36Sopenharmony_ci int msg_flags; 63762306a36Sopenharmony_ci int err; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci /* 64062306a36Sopenharmony_ci * Called with @data == NULL and the size of the data blocks in @size 64162306a36Sopenharmony_ci * for commands that send data blocks. For those commands, omit the 64262306a36Sopenharmony_ci * MSG_MORE flag: this will increase the likelihood that data blocks 64362306a36Sopenharmony_ci * which are page aligned on the sender will end up page aligned on the 64462306a36Sopenharmony_ci * receiver. 64562306a36Sopenharmony_ci */ 64662306a36Sopenharmony_ci msg_flags = data ? MSG_MORE : 0; 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci header_size += prepare_header(connection, vnr, sock->sbuf, cmd, 64962306a36Sopenharmony_ci header_size + size); 65062306a36Sopenharmony_ci err = drbd_send_all(connection, sock->socket, sock->sbuf, header_size, 65162306a36Sopenharmony_ci msg_flags); 65262306a36Sopenharmony_ci if (data && !err) 65362306a36Sopenharmony_ci err = drbd_send_all(connection, sock->socket, data, size, 0); 65462306a36Sopenharmony_ci /* DRBD protocol "pings" are latency critical. 65562306a36Sopenharmony_ci * This is supposed to trigger tcp_push_pending_frames() */ 65662306a36Sopenharmony_ci if (!err && (cmd == P_PING || cmd == P_PING_ACK)) 65762306a36Sopenharmony_ci tcp_sock_set_nodelay(sock->socket->sk); 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci return err; 66062306a36Sopenharmony_ci} 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_cistatic int __conn_send_command(struct drbd_connection *connection, struct drbd_socket *sock, 66362306a36Sopenharmony_ci enum drbd_packet cmd, unsigned int header_size, 66462306a36Sopenharmony_ci void *data, unsigned int size) 66562306a36Sopenharmony_ci{ 66662306a36Sopenharmony_ci return __send_command(connection, 0, sock, cmd, header_size, data, size); 66762306a36Sopenharmony_ci} 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ciint conn_send_command(struct drbd_connection *connection, struct drbd_socket *sock, 67062306a36Sopenharmony_ci enum drbd_packet cmd, unsigned int header_size, 67162306a36Sopenharmony_ci void *data, unsigned int size) 67262306a36Sopenharmony_ci{ 67362306a36Sopenharmony_ci int err; 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci err = __conn_send_command(connection, sock, cmd, header_size, data, size); 67662306a36Sopenharmony_ci mutex_unlock(&sock->mutex); 67762306a36Sopenharmony_ci return err; 67862306a36Sopenharmony_ci} 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ciint drbd_send_command(struct drbd_peer_device *peer_device, struct drbd_socket *sock, 68162306a36Sopenharmony_ci enum drbd_packet cmd, unsigned int header_size, 68262306a36Sopenharmony_ci void *data, unsigned int size) 68362306a36Sopenharmony_ci{ 68462306a36Sopenharmony_ci int err; 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci err = __send_command(peer_device->connection, peer_device->device->vnr, 68762306a36Sopenharmony_ci sock, cmd, header_size, data, size); 68862306a36Sopenharmony_ci mutex_unlock(&sock->mutex); 68962306a36Sopenharmony_ci return err; 69062306a36Sopenharmony_ci} 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ciint drbd_send_ping(struct drbd_connection *connection) 69362306a36Sopenharmony_ci{ 69462306a36Sopenharmony_ci struct drbd_socket *sock; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci sock = &connection->meta; 69762306a36Sopenharmony_ci if (!conn_prepare_command(connection, sock)) 69862306a36Sopenharmony_ci return -EIO; 69962306a36Sopenharmony_ci return conn_send_command(connection, sock, P_PING, 0, NULL, 0); 70062306a36Sopenharmony_ci} 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ciint drbd_send_ping_ack(struct drbd_connection *connection) 70362306a36Sopenharmony_ci{ 70462306a36Sopenharmony_ci struct drbd_socket *sock; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci sock = &connection->meta; 70762306a36Sopenharmony_ci if (!conn_prepare_command(connection, sock)) 70862306a36Sopenharmony_ci return -EIO; 70962306a36Sopenharmony_ci return conn_send_command(connection, sock, P_PING_ACK, 0, NULL, 0); 71062306a36Sopenharmony_ci} 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ciint drbd_send_sync_param(struct drbd_peer_device *peer_device) 71362306a36Sopenharmony_ci{ 71462306a36Sopenharmony_ci struct drbd_socket *sock; 71562306a36Sopenharmony_ci struct p_rs_param_95 *p; 71662306a36Sopenharmony_ci int size; 71762306a36Sopenharmony_ci const int apv = peer_device->connection->agreed_pro_version; 71862306a36Sopenharmony_ci enum drbd_packet cmd; 71962306a36Sopenharmony_ci struct net_conf *nc; 72062306a36Sopenharmony_ci struct disk_conf *dc; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci sock = &peer_device->connection->data; 72362306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 72462306a36Sopenharmony_ci if (!p) 72562306a36Sopenharmony_ci return -EIO; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci rcu_read_lock(); 72862306a36Sopenharmony_ci nc = rcu_dereference(peer_device->connection->net_conf); 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci size = apv <= 87 ? sizeof(struct p_rs_param) 73162306a36Sopenharmony_ci : apv == 88 ? sizeof(struct p_rs_param) 73262306a36Sopenharmony_ci + strlen(nc->verify_alg) + 1 73362306a36Sopenharmony_ci : apv <= 94 ? sizeof(struct p_rs_param_89) 73462306a36Sopenharmony_ci : /* apv >= 95 */ sizeof(struct p_rs_param_95); 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci /* initialize verify_alg and csums_alg */ 73962306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX); 74062306a36Sopenharmony_ci memset(&p->algs, 0, sizeof(p->algs)); 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci if (get_ldev(peer_device->device)) { 74362306a36Sopenharmony_ci dc = rcu_dereference(peer_device->device->ldev->disk_conf); 74462306a36Sopenharmony_ci p->resync_rate = cpu_to_be32(dc->resync_rate); 74562306a36Sopenharmony_ci p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); 74662306a36Sopenharmony_ci p->c_delay_target = cpu_to_be32(dc->c_delay_target); 74762306a36Sopenharmony_ci p->c_fill_target = cpu_to_be32(dc->c_fill_target); 74862306a36Sopenharmony_ci p->c_max_rate = cpu_to_be32(dc->c_max_rate); 74962306a36Sopenharmony_ci put_ldev(peer_device->device); 75062306a36Sopenharmony_ci } else { 75162306a36Sopenharmony_ci p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF); 75262306a36Sopenharmony_ci p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); 75362306a36Sopenharmony_ci p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); 75462306a36Sopenharmony_ci p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); 75562306a36Sopenharmony_ci p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF); 75662306a36Sopenharmony_ci } 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci if (apv >= 88) 75962306a36Sopenharmony_ci strcpy(p->verify_alg, nc->verify_alg); 76062306a36Sopenharmony_ci if (apv >= 89) 76162306a36Sopenharmony_ci strcpy(p->csums_alg, nc->csums_alg); 76262306a36Sopenharmony_ci rcu_read_unlock(); 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, cmd, size, NULL, 0); 76562306a36Sopenharmony_ci} 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ciint __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cmd) 76862306a36Sopenharmony_ci{ 76962306a36Sopenharmony_ci struct drbd_socket *sock; 77062306a36Sopenharmony_ci struct p_protocol *p; 77162306a36Sopenharmony_ci struct net_conf *nc; 77262306a36Sopenharmony_ci int size, cf; 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci sock = &connection->data; 77562306a36Sopenharmony_ci p = __conn_prepare_command(connection, sock); 77662306a36Sopenharmony_ci if (!p) 77762306a36Sopenharmony_ci return -EIO; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci rcu_read_lock(); 78062306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci if (nc->tentative && connection->agreed_pro_version < 92) { 78362306a36Sopenharmony_ci rcu_read_unlock(); 78462306a36Sopenharmony_ci drbd_err(connection, "--dry-run is not supported by peer"); 78562306a36Sopenharmony_ci return -EOPNOTSUPP; 78662306a36Sopenharmony_ci } 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci size = sizeof(*p); 78962306a36Sopenharmony_ci if (connection->agreed_pro_version >= 87) 79062306a36Sopenharmony_ci size += strlen(nc->integrity_alg) + 1; 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci p->protocol = cpu_to_be32(nc->wire_protocol); 79362306a36Sopenharmony_ci p->after_sb_0p = cpu_to_be32(nc->after_sb_0p); 79462306a36Sopenharmony_ci p->after_sb_1p = cpu_to_be32(nc->after_sb_1p); 79562306a36Sopenharmony_ci p->after_sb_2p = cpu_to_be32(nc->after_sb_2p); 79662306a36Sopenharmony_ci p->two_primaries = cpu_to_be32(nc->two_primaries); 79762306a36Sopenharmony_ci cf = 0; 79862306a36Sopenharmony_ci if (nc->discard_my_data) 79962306a36Sopenharmony_ci cf |= CF_DISCARD_MY_DATA; 80062306a36Sopenharmony_ci if (nc->tentative) 80162306a36Sopenharmony_ci cf |= CF_DRY_RUN; 80262306a36Sopenharmony_ci p->conn_flags = cpu_to_be32(cf); 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci if (connection->agreed_pro_version >= 87) 80562306a36Sopenharmony_ci strcpy(p->integrity_alg, nc->integrity_alg); 80662306a36Sopenharmony_ci rcu_read_unlock(); 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci return __conn_send_command(connection, sock, cmd, size, NULL, 0); 80962306a36Sopenharmony_ci} 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ciint drbd_send_protocol(struct drbd_connection *connection) 81262306a36Sopenharmony_ci{ 81362306a36Sopenharmony_ci int err; 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci mutex_lock(&connection->data.mutex); 81662306a36Sopenharmony_ci err = __drbd_send_protocol(connection, P_PROTOCOL); 81762306a36Sopenharmony_ci mutex_unlock(&connection->data.mutex); 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci return err; 82062306a36Sopenharmony_ci} 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_cistatic int _drbd_send_uuids(struct drbd_peer_device *peer_device, u64 uuid_flags) 82362306a36Sopenharmony_ci{ 82462306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 82562306a36Sopenharmony_ci struct drbd_socket *sock; 82662306a36Sopenharmony_ci struct p_uuids *p; 82762306a36Sopenharmony_ci int i; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci if (!get_ldev_if_state(device, D_NEGOTIATING)) 83062306a36Sopenharmony_ci return 0; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci sock = &peer_device->connection->data; 83362306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 83462306a36Sopenharmony_ci if (!p) { 83562306a36Sopenharmony_ci put_ldev(device); 83662306a36Sopenharmony_ci return -EIO; 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci spin_lock_irq(&device->ldev->md.uuid_lock); 83962306a36Sopenharmony_ci for (i = UI_CURRENT; i < UI_SIZE; i++) 84062306a36Sopenharmony_ci p->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]); 84162306a36Sopenharmony_ci spin_unlock_irq(&device->ldev->md.uuid_lock); 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci device->comm_bm_set = drbd_bm_total_weight(device); 84462306a36Sopenharmony_ci p->uuid[UI_SIZE] = cpu_to_be64(device->comm_bm_set); 84562306a36Sopenharmony_ci rcu_read_lock(); 84662306a36Sopenharmony_ci uuid_flags |= rcu_dereference(peer_device->connection->net_conf)->discard_my_data ? 1 : 0; 84762306a36Sopenharmony_ci rcu_read_unlock(); 84862306a36Sopenharmony_ci uuid_flags |= test_bit(CRASHED_PRIMARY, &device->flags) ? 2 : 0; 84962306a36Sopenharmony_ci uuid_flags |= device->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; 85062306a36Sopenharmony_ci p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci put_ldev(device); 85362306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, P_UUIDS, sizeof(*p), NULL, 0); 85462306a36Sopenharmony_ci} 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ciint drbd_send_uuids(struct drbd_peer_device *peer_device) 85762306a36Sopenharmony_ci{ 85862306a36Sopenharmony_ci return _drbd_send_uuids(peer_device, 0); 85962306a36Sopenharmony_ci} 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ciint drbd_send_uuids_skip_initial_sync(struct drbd_peer_device *peer_device) 86262306a36Sopenharmony_ci{ 86362306a36Sopenharmony_ci return _drbd_send_uuids(peer_device, 8); 86462306a36Sopenharmony_ci} 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_civoid drbd_print_uuids(struct drbd_device *device, const char *text) 86762306a36Sopenharmony_ci{ 86862306a36Sopenharmony_ci if (get_ldev_if_state(device, D_NEGOTIATING)) { 86962306a36Sopenharmony_ci u64 *uuid = device->ldev->md.uuid; 87062306a36Sopenharmony_ci drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX\n", 87162306a36Sopenharmony_ci text, 87262306a36Sopenharmony_ci (unsigned long long)uuid[UI_CURRENT], 87362306a36Sopenharmony_ci (unsigned long long)uuid[UI_BITMAP], 87462306a36Sopenharmony_ci (unsigned long long)uuid[UI_HISTORY_START], 87562306a36Sopenharmony_ci (unsigned long long)uuid[UI_HISTORY_END]); 87662306a36Sopenharmony_ci put_ldev(device); 87762306a36Sopenharmony_ci } else { 87862306a36Sopenharmony_ci drbd_info(device, "%s effective data uuid: %016llX\n", 87962306a36Sopenharmony_ci text, 88062306a36Sopenharmony_ci (unsigned long long)device->ed_uuid); 88162306a36Sopenharmony_ci } 88262306a36Sopenharmony_ci} 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_civoid drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device) 88562306a36Sopenharmony_ci{ 88662306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 88762306a36Sopenharmony_ci struct drbd_socket *sock; 88862306a36Sopenharmony_ci struct p_rs_uuid *p; 88962306a36Sopenharmony_ci u64 uuid; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci D_ASSERT(device, device->state.disk == D_UP_TO_DATE); 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci uuid = device->ldev->md.uuid[UI_BITMAP]; 89462306a36Sopenharmony_ci if (uuid && uuid != UUID_JUST_CREATED) 89562306a36Sopenharmony_ci uuid = uuid + UUID_NEW_BM_OFFSET; 89662306a36Sopenharmony_ci else 89762306a36Sopenharmony_ci get_random_bytes(&uuid, sizeof(u64)); 89862306a36Sopenharmony_ci drbd_uuid_set(device, UI_BITMAP, uuid); 89962306a36Sopenharmony_ci drbd_print_uuids(device, "updated sync UUID"); 90062306a36Sopenharmony_ci drbd_md_sync(device); 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci sock = &peer_device->connection->data; 90362306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 90462306a36Sopenharmony_ci if (p) { 90562306a36Sopenharmony_ci p->uuid = cpu_to_be64(uuid); 90662306a36Sopenharmony_ci drbd_send_command(peer_device, sock, P_SYNC_UUID, sizeof(*p), NULL, 0); 90762306a36Sopenharmony_ci } 90862306a36Sopenharmony_ci} 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ciint drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags) 91162306a36Sopenharmony_ci{ 91262306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 91362306a36Sopenharmony_ci struct drbd_socket *sock; 91462306a36Sopenharmony_ci struct p_sizes *p; 91562306a36Sopenharmony_ci sector_t d_size, u_size; 91662306a36Sopenharmony_ci int q_order_type; 91762306a36Sopenharmony_ci unsigned int max_bio_size; 91862306a36Sopenharmony_ci unsigned int packet_size; 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci sock = &peer_device->connection->data; 92162306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 92262306a36Sopenharmony_ci if (!p) 92362306a36Sopenharmony_ci return -EIO; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci packet_size = sizeof(*p); 92662306a36Sopenharmony_ci if (peer_device->connection->agreed_features & DRBD_FF_WSAME) 92762306a36Sopenharmony_ci packet_size += sizeof(p->qlim[0]); 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci memset(p, 0, packet_size); 93062306a36Sopenharmony_ci if (get_ldev_if_state(device, D_NEGOTIATING)) { 93162306a36Sopenharmony_ci struct block_device *bdev = device->ldev->backing_bdev; 93262306a36Sopenharmony_ci struct request_queue *q = bdev_get_queue(bdev); 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci d_size = drbd_get_max_capacity(device->ldev); 93562306a36Sopenharmony_ci rcu_read_lock(); 93662306a36Sopenharmony_ci u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; 93762306a36Sopenharmony_ci rcu_read_unlock(); 93862306a36Sopenharmony_ci q_order_type = drbd_queue_order_type(device); 93962306a36Sopenharmony_ci max_bio_size = queue_max_hw_sectors(q) << 9; 94062306a36Sopenharmony_ci max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE); 94162306a36Sopenharmony_ci p->qlim->physical_block_size = 94262306a36Sopenharmony_ci cpu_to_be32(bdev_physical_block_size(bdev)); 94362306a36Sopenharmony_ci p->qlim->logical_block_size = 94462306a36Sopenharmony_ci cpu_to_be32(bdev_logical_block_size(bdev)); 94562306a36Sopenharmony_ci p->qlim->alignment_offset = 94662306a36Sopenharmony_ci cpu_to_be32(bdev_alignment_offset(bdev)); 94762306a36Sopenharmony_ci p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev)); 94862306a36Sopenharmony_ci p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev)); 94962306a36Sopenharmony_ci p->qlim->discard_enabled = !!bdev_max_discard_sectors(bdev); 95062306a36Sopenharmony_ci put_ldev(device); 95162306a36Sopenharmony_ci } else { 95262306a36Sopenharmony_ci struct request_queue *q = device->rq_queue; 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci p->qlim->physical_block_size = 95562306a36Sopenharmony_ci cpu_to_be32(queue_physical_block_size(q)); 95662306a36Sopenharmony_ci p->qlim->logical_block_size = 95762306a36Sopenharmony_ci cpu_to_be32(queue_logical_block_size(q)); 95862306a36Sopenharmony_ci p->qlim->alignment_offset = 0; 95962306a36Sopenharmony_ci p->qlim->io_min = cpu_to_be32(queue_io_min(q)); 96062306a36Sopenharmony_ci p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); 96162306a36Sopenharmony_ci p->qlim->discard_enabled = 0; 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci d_size = 0; 96462306a36Sopenharmony_ci u_size = 0; 96562306a36Sopenharmony_ci q_order_type = QUEUE_ORDERED_NONE; 96662306a36Sopenharmony_ci max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ 96762306a36Sopenharmony_ci } 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci if (peer_device->connection->agreed_pro_version <= 94) 97062306a36Sopenharmony_ci max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 97162306a36Sopenharmony_ci else if (peer_device->connection->agreed_pro_version < 100) 97262306a36Sopenharmony_ci max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE_P95); 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci p->d_size = cpu_to_be64(d_size); 97562306a36Sopenharmony_ci p->u_size = cpu_to_be64(u_size); 97662306a36Sopenharmony_ci if (trigger_reply) 97762306a36Sopenharmony_ci p->c_size = 0; 97862306a36Sopenharmony_ci else 97962306a36Sopenharmony_ci p->c_size = cpu_to_be64(get_capacity(device->vdisk)); 98062306a36Sopenharmony_ci p->max_bio_size = cpu_to_be32(max_bio_size); 98162306a36Sopenharmony_ci p->queue_order_type = cpu_to_be16(q_order_type); 98262306a36Sopenharmony_ci p->dds_flags = cpu_to_be16(flags); 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, P_SIZES, packet_size, NULL, 0); 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci/** 98862306a36Sopenharmony_ci * drbd_send_current_state() - Sends the drbd state to the peer 98962306a36Sopenharmony_ci * @peer_device: DRBD peer device. 99062306a36Sopenharmony_ci */ 99162306a36Sopenharmony_ciint drbd_send_current_state(struct drbd_peer_device *peer_device) 99262306a36Sopenharmony_ci{ 99362306a36Sopenharmony_ci struct drbd_socket *sock; 99462306a36Sopenharmony_ci struct p_state *p; 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci sock = &peer_device->connection->data; 99762306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 99862306a36Sopenharmony_ci if (!p) 99962306a36Sopenharmony_ci return -EIO; 100062306a36Sopenharmony_ci p->state = cpu_to_be32(peer_device->device->state.i); /* Within the send mutex */ 100162306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, P_STATE, sizeof(*p), NULL, 0); 100262306a36Sopenharmony_ci} 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci/** 100562306a36Sopenharmony_ci * drbd_send_state() - After a state change, sends the new state to the peer 100662306a36Sopenharmony_ci * @peer_device: DRBD peer device. 100762306a36Sopenharmony_ci * @state: the state to send, not necessarily the current state. 100862306a36Sopenharmony_ci * 100962306a36Sopenharmony_ci * Each state change queues an "after_state_ch" work, which will eventually 101062306a36Sopenharmony_ci * send the resulting new state to the peer. If more state changes happen 101162306a36Sopenharmony_ci * between queuing and processing of the after_state_ch work, we still 101262306a36Sopenharmony_ci * want to send each intermediary state in the order it occurred. 101362306a36Sopenharmony_ci */ 101462306a36Sopenharmony_ciint drbd_send_state(struct drbd_peer_device *peer_device, union drbd_state state) 101562306a36Sopenharmony_ci{ 101662306a36Sopenharmony_ci struct drbd_socket *sock; 101762306a36Sopenharmony_ci struct p_state *p; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci sock = &peer_device->connection->data; 102062306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 102162306a36Sopenharmony_ci if (!p) 102262306a36Sopenharmony_ci return -EIO; 102362306a36Sopenharmony_ci p->state = cpu_to_be32(state.i); /* Within the send mutex */ 102462306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, P_STATE, sizeof(*p), NULL, 0); 102562306a36Sopenharmony_ci} 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ciint drbd_send_state_req(struct drbd_peer_device *peer_device, union drbd_state mask, union drbd_state val) 102862306a36Sopenharmony_ci{ 102962306a36Sopenharmony_ci struct drbd_socket *sock; 103062306a36Sopenharmony_ci struct p_req_state *p; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci sock = &peer_device->connection->data; 103362306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 103462306a36Sopenharmony_ci if (!p) 103562306a36Sopenharmony_ci return -EIO; 103662306a36Sopenharmony_ci p->mask = cpu_to_be32(mask.i); 103762306a36Sopenharmony_ci p->val = cpu_to_be32(val.i); 103862306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0); 103962306a36Sopenharmony_ci} 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ciint conn_send_state_req(struct drbd_connection *connection, union drbd_state mask, union drbd_state val) 104262306a36Sopenharmony_ci{ 104362306a36Sopenharmony_ci enum drbd_packet cmd; 104462306a36Sopenharmony_ci struct drbd_socket *sock; 104562306a36Sopenharmony_ci struct p_req_state *p; 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci cmd = connection->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; 104862306a36Sopenharmony_ci sock = &connection->data; 104962306a36Sopenharmony_ci p = conn_prepare_command(connection, sock); 105062306a36Sopenharmony_ci if (!p) 105162306a36Sopenharmony_ci return -EIO; 105262306a36Sopenharmony_ci p->mask = cpu_to_be32(mask.i); 105362306a36Sopenharmony_ci p->val = cpu_to_be32(val.i); 105462306a36Sopenharmony_ci return conn_send_command(connection, sock, cmd, sizeof(*p), NULL, 0); 105562306a36Sopenharmony_ci} 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_civoid drbd_send_sr_reply(struct drbd_peer_device *peer_device, enum drbd_state_rv retcode) 105862306a36Sopenharmony_ci{ 105962306a36Sopenharmony_ci struct drbd_socket *sock; 106062306a36Sopenharmony_ci struct p_req_state_reply *p; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci sock = &peer_device->connection->meta; 106362306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 106462306a36Sopenharmony_ci if (p) { 106562306a36Sopenharmony_ci p->retcode = cpu_to_be32(retcode); 106662306a36Sopenharmony_ci drbd_send_command(peer_device, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0); 106762306a36Sopenharmony_ci } 106862306a36Sopenharmony_ci} 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_civoid conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode) 107162306a36Sopenharmony_ci{ 107262306a36Sopenharmony_ci struct drbd_socket *sock; 107362306a36Sopenharmony_ci struct p_req_state_reply *p; 107462306a36Sopenharmony_ci enum drbd_packet cmd = connection->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY; 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_ci sock = &connection->meta; 107762306a36Sopenharmony_ci p = conn_prepare_command(connection, sock); 107862306a36Sopenharmony_ci if (p) { 107962306a36Sopenharmony_ci p->retcode = cpu_to_be32(retcode); 108062306a36Sopenharmony_ci conn_send_command(connection, sock, cmd, sizeof(*p), NULL, 0); 108162306a36Sopenharmony_ci } 108262306a36Sopenharmony_ci} 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_cistatic void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) 108562306a36Sopenharmony_ci{ 108662306a36Sopenharmony_ci BUG_ON(code & ~0xf); 108762306a36Sopenharmony_ci p->encoding = (p->encoding & ~0xf) | code; 108862306a36Sopenharmony_ci} 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_cistatic void dcbp_set_start(struct p_compressed_bm *p, int set) 109162306a36Sopenharmony_ci{ 109262306a36Sopenharmony_ci p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); 109362306a36Sopenharmony_ci} 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_cistatic void dcbp_set_pad_bits(struct p_compressed_bm *p, int n) 109662306a36Sopenharmony_ci{ 109762306a36Sopenharmony_ci BUG_ON(n & ~0x7); 109862306a36Sopenharmony_ci p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); 109962306a36Sopenharmony_ci} 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_cistatic int fill_bitmap_rle_bits(struct drbd_device *device, 110262306a36Sopenharmony_ci struct p_compressed_bm *p, 110362306a36Sopenharmony_ci unsigned int size, 110462306a36Sopenharmony_ci struct bm_xfer_ctx *c) 110562306a36Sopenharmony_ci{ 110662306a36Sopenharmony_ci struct bitstream bs; 110762306a36Sopenharmony_ci unsigned long plain_bits; 110862306a36Sopenharmony_ci unsigned long tmp; 110962306a36Sopenharmony_ci unsigned long rl; 111062306a36Sopenharmony_ci unsigned len; 111162306a36Sopenharmony_ci unsigned toggle; 111262306a36Sopenharmony_ci int bits, use_rle; 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci /* may we use this feature? */ 111562306a36Sopenharmony_ci rcu_read_lock(); 111662306a36Sopenharmony_ci use_rle = rcu_dereference(first_peer_device(device)->connection->net_conf)->use_rle; 111762306a36Sopenharmony_ci rcu_read_unlock(); 111862306a36Sopenharmony_ci if (!use_rle || first_peer_device(device)->connection->agreed_pro_version < 90) 111962306a36Sopenharmony_ci return 0; 112062306a36Sopenharmony_ci 112162306a36Sopenharmony_ci if (c->bit_offset >= c->bm_bits) 112262306a36Sopenharmony_ci return 0; /* nothing to do. */ 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci /* use at most thus many bytes */ 112562306a36Sopenharmony_ci bitstream_init(&bs, p->code, size, 0); 112662306a36Sopenharmony_ci memset(p->code, 0, size); 112762306a36Sopenharmony_ci /* plain bits covered in this code string */ 112862306a36Sopenharmony_ci plain_bits = 0; 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci /* p->encoding & 0x80 stores whether the first run length is set. 113162306a36Sopenharmony_ci * bit offset is implicit. 113262306a36Sopenharmony_ci * start with toggle == 2 to be able to tell the first iteration */ 113362306a36Sopenharmony_ci toggle = 2; 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci /* see how much plain bits we can stuff into one packet 113662306a36Sopenharmony_ci * using RLE and VLI. */ 113762306a36Sopenharmony_ci do { 113862306a36Sopenharmony_ci tmp = (toggle == 0) ? _drbd_bm_find_next_zero(device, c->bit_offset) 113962306a36Sopenharmony_ci : _drbd_bm_find_next(device, c->bit_offset); 114062306a36Sopenharmony_ci if (tmp == -1UL) 114162306a36Sopenharmony_ci tmp = c->bm_bits; 114262306a36Sopenharmony_ci rl = tmp - c->bit_offset; 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci if (toggle == 2) { /* first iteration */ 114562306a36Sopenharmony_ci if (rl == 0) { 114662306a36Sopenharmony_ci /* the first checked bit was set, 114762306a36Sopenharmony_ci * store start value, */ 114862306a36Sopenharmony_ci dcbp_set_start(p, 1); 114962306a36Sopenharmony_ci /* but skip encoding of zero run length */ 115062306a36Sopenharmony_ci toggle = !toggle; 115162306a36Sopenharmony_ci continue; 115262306a36Sopenharmony_ci } 115362306a36Sopenharmony_ci dcbp_set_start(p, 0); 115462306a36Sopenharmony_ci } 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci /* paranoia: catch zero runlength. 115762306a36Sopenharmony_ci * can only happen if bitmap is modified while we scan it. */ 115862306a36Sopenharmony_ci if (rl == 0) { 115962306a36Sopenharmony_ci drbd_err(device, "unexpected zero runlength while encoding bitmap " 116062306a36Sopenharmony_ci "t:%u bo:%lu\n", toggle, c->bit_offset); 116162306a36Sopenharmony_ci return -1; 116262306a36Sopenharmony_ci } 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci bits = vli_encode_bits(&bs, rl); 116562306a36Sopenharmony_ci if (bits == -ENOBUFS) /* buffer full */ 116662306a36Sopenharmony_ci break; 116762306a36Sopenharmony_ci if (bits <= 0) { 116862306a36Sopenharmony_ci drbd_err(device, "error while encoding bitmap: %d\n", bits); 116962306a36Sopenharmony_ci return 0; 117062306a36Sopenharmony_ci } 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci toggle = !toggle; 117362306a36Sopenharmony_ci plain_bits += rl; 117462306a36Sopenharmony_ci c->bit_offset = tmp; 117562306a36Sopenharmony_ci } while (c->bit_offset < c->bm_bits); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci len = bs.cur.b - p->code + !!bs.cur.bit; 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci if (plain_bits < (len << 3)) { 118062306a36Sopenharmony_ci /* incompressible with this method. 118162306a36Sopenharmony_ci * we need to rewind both word and bit position. */ 118262306a36Sopenharmony_ci c->bit_offset -= plain_bits; 118362306a36Sopenharmony_ci bm_xfer_ctx_bit_to_word_offset(c); 118462306a36Sopenharmony_ci c->bit_offset = c->word_offset * BITS_PER_LONG; 118562306a36Sopenharmony_ci return 0; 118662306a36Sopenharmony_ci } 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci /* RLE + VLI was able to compress it just fine. 118962306a36Sopenharmony_ci * update c->word_offset. */ 119062306a36Sopenharmony_ci bm_xfer_ctx_bit_to_word_offset(c); 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci /* store pad_bits */ 119362306a36Sopenharmony_ci dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7); 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_ci return len; 119662306a36Sopenharmony_ci} 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci/* 119962306a36Sopenharmony_ci * send_bitmap_rle_or_plain 120062306a36Sopenharmony_ci * 120162306a36Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error 120262306a36Sopenharmony_ci * code upon failure. 120362306a36Sopenharmony_ci */ 120462306a36Sopenharmony_cistatic int 120562306a36Sopenharmony_cisend_bitmap_rle_or_plain(struct drbd_peer_device *peer_device, struct bm_xfer_ctx *c) 120662306a36Sopenharmony_ci{ 120762306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 120862306a36Sopenharmony_ci struct drbd_socket *sock = &peer_device->connection->data; 120962306a36Sopenharmony_ci unsigned int header_size = drbd_header_size(peer_device->connection); 121062306a36Sopenharmony_ci struct p_compressed_bm *p = sock->sbuf + header_size; 121162306a36Sopenharmony_ci int len, err; 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_ci len = fill_bitmap_rle_bits(device, p, 121462306a36Sopenharmony_ci DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c); 121562306a36Sopenharmony_ci if (len < 0) 121662306a36Sopenharmony_ci return -EIO; 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci if (len) { 121962306a36Sopenharmony_ci dcbp_set_code(p, RLE_VLI_Bits); 122062306a36Sopenharmony_ci err = __send_command(peer_device->connection, device->vnr, sock, 122162306a36Sopenharmony_ci P_COMPRESSED_BITMAP, sizeof(*p) + len, 122262306a36Sopenharmony_ci NULL, 0); 122362306a36Sopenharmony_ci c->packets[0]++; 122462306a36Sopenharmony_ci c->bytes[0] += header_size + sizeof(*p) + len; 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci if (c->bit_offset >= c->bm_bits) 122762306a36Sopenharmony_ci len = 0; /* DONE */ 122862306a36Sopenharmony_ci } else { 122962306a36Sopenharmony_ci /* was not compressible. 123062306a36Sopenharmony_ci * send a buffer full of plain text bits instead. */ 123162306a36Sopenharmony_ci unsigned int data_size; 123262306a36Sopenharmony_ci unsigned long num_words; 123362306a36Sopenharmony_ci unsigned long *p = sock->sbuf + header_size; 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; 123662306a36Sopenharmony_ci num_words = min_t(size_t, data_size / sizeof(*p), 123762306a36Sopenharmony_ci c->bm_words - c->word_offset); 123862306a36Sopenharmony_ci len = num_words * sizeof(*p); 123962306a36Sopenharmony_ci if (len) 124062306a36Sopenharmony_ci drbd_bm_get_lel(device, c->word_offset, num_words, p); 124162306a36Sopenharmony_ci err = __send_command(peer_device->connection, device->vnr, sock, P_BITMAP, 124262306a36Sopenharmony_ci len, NULL, 0); 124362306a36Sopenharmony_ci c->word_offset += num_words; 124462306a36Sopenharmony_ci c->bit_offset = c->word_offset * BITS_PER_LONG; 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci c->packets[1]++; 124762306a36Sopenharmony_ci c->bytes[1] += header_size + len; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci if (c->bit_offset > c->bm_bits) 125062306a36Sopenharmony_ci c->bit_offset = c->bm_bits; 125162306a36Sopenharmony_ci } 125262306a36Sopenharmony_ci if (!err) { 125362306a36Sopenharmony_ci if (len == 0) { 125462306a36Sopenharmony_ci INFO_bm_xfer_stats(peer_device, "send", c); 125562306a36Sopenharmony_ci return 0; 125662306a36Sopenharmony_ci } else 125762306a36Sopenharmony_ci return 1; 125862306a36Sopenharmony_ci } 125962306a36Sopenharmony_ci return -EIO; 126062306a36Sopenharmony_ci} 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci/* See the comment at receive_bitmap() */ 126362306a36Sopenharmony_cistatic int _drbd_send_bitmap(struct drbd_device *device, 126462306a36Sopenharmony_ci struct drbd_peer_device *peer_device) 126562306a36Sopenharmony_ci{ 126662306a36Sopenharmony_ci struct bm_xfer_ctx c; 126762306a36Sopenharmony_ci int err; 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_ci if (!expect(device, device->bitmap)) 127062306a36Sopenharmony_ci return false; 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci if (get_ldev(device)) { 127362306a36Sopenharmony_ci if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC)) { 127462306a36Sopenharmony_ci drbd_info(device, "Writing the whole bitmap, MDF_FullSync was set.\n"); 127562306a36Sopenharmony_ci drbd_bm_set_all(device); 127662306a36Sopenharmony_ci if (drbd_bm_write(device, peer_device)) { 127762306a36Sopenharmony_ci /* write_bm did fail! Leave full sync flag set in Meta P_DATA 127862306a36Sopenharmony_ci * but otherwise process as per normal - need to tell other 127962306a36Sopenharmony_ci * side that a full resync is required! */ 128062306a36Sopenharmony_ci drbd_err(device, "Failed to write bitmap to disk!\n"); 128162306a36Sopenharmony_ci } else { 128262306a36Sopenharmony_ci drbd_md_clear_flag(device, MDF_FULL_SYNC); 128362306a36Sopenharmony_ci drbd_md_sync(device); 128462306a36Sopenharmony_ci } 128562306a36Sopenharmony_ci } 128662306a36Sopenharmony_ci put_ldev(device); 128762306a36Sopenharmony_ci } 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci c = (struct bm_xfer_ctx) { 129062306a36Sopenharmony_ci .bm_bits = drbd_bm_bits(device), 129162306a36Sopenharmony_ci .bm_words = drbd_bm_words(device), 129262306a36Sopenharmony_ci }; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci do { 129562306a36Sopenharmony_ci err = send_bitmap_rle_or_plain(peer_device, &c); 129662306a36Sopenharmony_ci } while (err > 0); 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci return err == 0; 129962306a36Sopenharmony_ci} 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ciint drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device) 130262306a36Sopenharmony_ci{ 130362306a36Sopenharmony_ci struct drbd_socket *sock = &peer_device->connection->data; 130462306a36Sopenharmony_ci int err = -1; 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci mutex_lock(&sock->mutex); 130762306a36Sopenharmony_ci if (sock->socket) 130862306a36Sopenharmony_ci err = !_drbd_send_bitmap(device, peer_device); 130962306a36Sopenharmony_ci mutex_unlock(&sock->mutex); 131062306a36Sopenharmony_ci return err; 131162306a36Sopenharmony_ci} 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_civoid drbd_send_b_ack(struct drbd_connection *connection, u32 barrier_nr, u32 set_size) 131462306a36Sopenharmony_ci{ 131562306a36Sopenharmony_ci struct drbd_socket *sock; 131662306a36Sopenharmony_ci struct p_barrier_ack *p; 131762306a36Sopenharmony_ci 131862306a36Sopenharmony_ci if (connection->cstate < C_WF_REPORT_PARAMS) 131962306a36Sopenharmony_ci return; 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci sock = &connection->meta; 132262306a36Sopenharmony_ci p = conn_prepare_command(connection, sock); 132362306a36Sopenharmony_ci if (!p) 132462306a36Sopenharmony_ci return; 132562306a36Sopenharmony_ci p->barrier = barrier_nr; 132662306a36Sopenharmony_ci p->set_size = cpu_to_be32(set_size); 132762306a36Sopenharmony_ci conn_send_command(connection, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0); 132862306a36Sopenharmony_ci} 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_ci/** 133162306a36Sopenharmony_ci * _drbd_send_ack() - Sends an ack packet 133262306a36Sopenharmony_ci * @peer_device: DRBD peer device. 133362306a36Sopenharmony_ci * @cmd: Packet command code. 133462306a36Sopenharmony_ci * @sector: sector, needs to be in big endian byte order 133562306a36Sopenharmony_ci * @blksize: size in byte, needs to be in big endian byte order 133662306a36Sopenharmony_ci * @block_id: Id, big endian byte order 133762306a36Sopenharmony_ci */ 133862306a36Sopenharmony_cistatic int _drbd_send_ack(struct drbd_peer_device *peer_device, enum drbd_packet cmd, 133962306a36Sopenharmony_ci u64 sector, u32 blksize, u64 block_id) 134062306a36Sopenharmony_ci{ 134162306a36Sopenharmony_ci struct drbd_socket *sock; 134262306a36Sopenharmony_ci struct p_block_ack *p; 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci if (peer_device->device->state.conn < C_CONNECTED) 134562306a36Sopenharmony_ci return -EIO; 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci sock = &peer_device->connection->meta; 134862306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 134962306a36Sopenharmony_ci if (!p) 135062306a36Sopenharmony_ci return -EIO; 135162306a36Sopenharmony_ci p->sector = sector; 135262306a36Sopenharmony_ci p->block_id = block_id; 135362306a36Sopenharmony_ci p->blksize = blksize; 135462306a36Sopenharmony_ci p->seq_num = cpu_to_be32(atomic_inc_return(&peer_device->device->packet_seq)); 135562306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, cmd, sizeof(*p), NULL, 0); 135662306a36Sopenharmony_ci} 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_ci/* dp->sector and dp->block_id already/still in network byte order, 135962306a36Sopenharmony_ci * data_size is payload size according to dp->head, 136062306a36Sopenharmony_ci * and may need to be corrected for digest size. */ 136162306a36Sopenharmony_civoid drbd_send_ack_dp(struct drbd_peer_device *peer_device, enum drbd_packet cmd, 136262306a36Sopenharmony_ci struct p_data *dp, int data_size) 136362306a36Sopenharmony_ci{ 136462306a36Sopenharmony_ci if (peer_device->connection->peer_integrity_tfm) 136562306a36Sopenharmony_ci data_size -= crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); 136662306a36Sopenharmony_ci _drbd_send_ack(peer_device, cmd, dp->sector, cpu_to_be32(data_size), 136762306a36Sopenharmony_ci dp->block_id); 136862306a36Sopenharmony_ci} 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_civoid drbd_send_ack_rp(struct drbd_peer_device *peer_device, enum drbd_packet cmd, 137162306a36Sopenharmony_ci struct p_block_req *rp) 137262306a36Sopenharmony_ci{ 137362306a36Sopenharmony_ci _drbd_send_ack(peer_device, cmd, rp->sector, rp->blksize, rp->block_id); 137462306a36Sopenharmony_ci} 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci/** 137762306a36Sopenharmony_ci * drbd_send_ack() - Sends an ack packet 137862306a36Sopenharmony_ci * @peer_device: DRBD peer device 137962306a36Sopenharmony_ci * @cmd: packet command code 138062306a36Sopenharmony_ci * @peer_req: peer request 138162306a36Sopenharmony_ci */ 138262306a36Sopenharmony_ciint drbd_send_ack(struct drbd_peer_device *peer_device, enum drbd_packet cmd, 138362306a36Sopenharmony_ci struct drbd_peer_request *peer_req) 138462306a36Sopenharmony_ci{ 138562306a36Sopenharmony_ci return _drbd_send_ack(peer_device, cmd, 138662306a36Sopenharmony_ci cpu_to_be64(peer_req->i.sector), 138762306a36Sopenharmony_ci cpu_to_be32(peer_req->i.size), 138862306a36Sopenharmony_ci peer_req->block_id); 138962306a36Sopenharmony_ci} 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci/* This function misuses the block_id field to signal if the blocks 139262306a36Sopenharmony_ci * are is sync or not. */ 139362306a36Sopenharmony_ciint drbd_send_ack_ex(struct drbd_peer_device *peer_device, enum drbd_packet cmd, 139462306a36Sopenharmony_ci sector_t sector, int blksize, u64 block_id) 139562306a36Sopenharmony_ci{ 139662306a36Sopenharmony_ci return _drbd_send_ack(peer_device, cmd, 139762306a36Sopenharmony_ci cpu_to_be64(sector), 139862306a36Sopenharmony_ci cpu_to_be32(blksize), 139962306a36Sopenharmony_ci cpu_to_be64(block_id)); 140062306a36Sopenharmony_ci} 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ciint drbd_send_rs_deallocated(struct drbd_peer_device *peer_device, 140362306a36Sopenharmony_ci struct drbd_peer_request *peer_req) 140462306a36Sopenharmony_ci{ 140562306a36Sopenharmony_ci struct drbd_socket *sock; 140662306a36Sopenharmony_ci struct p_block_desc *p; 140762306a36Sopenharmony_ci 140862306a36Sopenharmony_ci sock = &peer_device->connection->data; 140962306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 141062306a36Sopenharmony_ci if (!p) 141162306a36Sopenharmony_ci return -EIO; 141262306a36Sopenharmony_ci p->sector = cpu_to_be64(peer_req->i.sector); 141362306a36Sopenharmony_ci p->blksize = cpu_to_be32(peer_req->i.size); 141462306a36Sopenharmony_ci p->pad = 0; 141562306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, P_RS_DEALLOCATED, sizeof(*p), NULL, 0); 141662306a36Sopenharmony_ci} 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ciint drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd, 141962306a36Sopenharmony_ci sector_t sector, int size, u64 block_id) 142062306a36Sopenharmony_ci{ 142162306a36Sopenharmony_ci struct drbd_socket *sock; 142262306a36Sopenharmony_ci struct p_block_req *p; 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci sock = &peer_device->connection->data; 142562306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 142662306a36Sopenharmony_ci if (!p) 142762306a36Sopenharmony_ci return -EIO; 142862306a36Sopenharmony_ci p->sector = cpu_to_be64(sector); 142962306a36Sopenharmony_ci p->block_id = block_id; 143062306a36Sopenharmony_ci p->blksize = cpu_to_be32(size); 143162306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, cmd, sizeof(*p), NULL, 0); 143262306a36Sopenharmony_ci} 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ciint drbd_send_drequest_csum(struct drbd_peer_device *peer_device, sector_t sector, int size, 143562306a36Sopenharmony_ci void *digest, int digest_size, enum drbd_packet cmd) 143662306a36Sopenharmony_ci{ 143762306a36Sopenharmony_ci struct drbd_socket *sock; 143862306a36Sopenharmony_ci struct p_block_req *p; 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci /* FIXME: Put the digest into the preallocated socket buffer. */ 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_ci sock = &peer_device->connection->data; 144362306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 144462306a36Sopenharmony_ci if (!p) 144562306a36Sopenharmony_ci return -EIO; 144662306a36Sopenharmony_ci p->sector = cpu_to_be64(sector); 144762306a36Sopenharmony_ci p->block_id = ID_SYNCER /* unused */; 144862306a36Sopenharmony_ci p->blksize = cpu_to_be32(size); 144962306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, cmd, sizeof(*p), digest, digest_size); 145062306a36Sopenharmony_ci} 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ciint drbd_send_ov_request(struct drbd_peer_device *peer_device, sector_t sector, int size) 145362306a36Sopenharmony_ci{ 145462306a36Sopenharmony_ci struct drbd_socket *sock; 145562306a36Sopenharmony_ci struct p_block_req *p; 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci sock = &peer_device->connection->data; 145862306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 145962306a36Sopenharmony_ci if (!p) 146062306a36Sopenharmony_ci return -EIO; 146162306a36Sopenharmony_ci p->sector = cpu_to_be64(sector); 146262306a36Sopenharmony_ci p->block_id = ID_SYNCER /* unused */; 146362306a36Sopenharmony_ci p->blksize = cpu_to_be32(size); 146462306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, P_OV_REQUEST, sizeof(*p), NULL, 0); 146562306a36Sopenharmony_ci} 146662306a36Sopenharmony_ci 146762306a36Sopenharmony_ci/* called on sndtimeo 146862306a36Sopenharmony_ci * returns false if we should retry, 146962306a36Sopenharmony_ci * true if we think connection is dead 147062306a36Sopenharmony_ci */ 147162306a36Sopenharmony_cistatic int we_should_drop_the_connection(struct drbd_connection *connection, struct socket *sock) 147262306a36Sopenharmony_ci{ 147362306a36Sopenharmony_ci int drop_it; 147462306a36Sopenharmony_ci /* long elapsed = (long)(jiffies - device->last_received); */ 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci drop_it = connection->meta.socket == sock 147762306a36Sopenharmony_ci || !connection->ack_receiver.task 147862306a36Sopenharmony_ci || get_t_state(&connection->ack_receiver) != RUNNING 147962306a36Sopenharmony_ci || connection->cstate < C_WF_REPORT_PARAMS; 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_ci if (drop_it) 148262306a36Sopenharmony_ci return true; 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_ci drop_it = !--connection->ko_count; 148562306a36Sopenharmony_ci if (!drop_it) { 148662306a36Sopenharmony_ci drbd_err(connection, "[%s/%d] sock_sendmsg time expired, ko = %u\n", 148762306a36Sopenharmony_ci current->comm, current->pid, connection->ko_count); 148862306a36Sopenharmony_ci request_ping(connection); 148962306a36Sopenharmony_ci } 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_ci return drop_it; /* && (device->state == R_PRIMARY) */; 149262306a36Sopenharmony_ci} 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_cistatic void drbd_update_congested(struct drbd_connection *connection) 149562306a36Sopenharmony_ci{ 149662306a36Sopenharmony_ci struct sock *sk = connection->data.socket->sk; 149762306a36Sopenharmony_ci if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) 149862306a36Sopenharmony_ci set_bit(NET_CONGESTED, &connection->flags); 149962306a36Sopenharmony_ci} 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci/* The idea of sendpage seems to be to put some kind of reference 150262306a36Sopenharmony_ci * to the page into the skb, and to hand it over to the NIC. In 150362306a36Sopenharmony_ci * this process get_page() gets called. 150462306a36Sopenharmony_ci * 150562306a36Sopenharmony_ci * As soon as the page was really sent over the network put_page() 150662306a36Sopenharmony_ci * gets called by some part of the network layer. [ NIC driver? ] 150762306a36Sopenharmony_ci * 150862306a36Sopenharmony_ci * [ get_page() / put_page() increment/decrement the count. If count 150962306a36Sopenharmony_ci * reaches 0 the page will be freed. ] 151062306a36Sopenharmony_ci * 151162306a36Sopenharmony_ci * This works nicely with pages from FSs. 151262306a36Sopenharmony_ci * But this means that in protocol A we might signal IO completion too early! 151362306a36Sopenharmony_ci * 151462306a36Sopenharmony_ci * In order not to corrupt data during a resync we must make sure 151562306a36Sopenharmony_ci * that we do not reuse our own buffer pages (EEs) to early, therefore 151662306a36Sopenharmony_ci * we have the net_ee list. 151762306a36Sopenharmony_ci * 151862306a36Sopenharmony_ci * XFS seems to have problems, still, it submits pages with page_count == 0! 151962306a36Sopenharmony_ci * As a workaround, we disable sendpage on pages 152062306a36Sopenharmony_ci * with page_count == 0 or PageSlab. 152162306a36Sopenharmony_ci */ 152262306a36Sopenharmony_cistatic int _drbd_no_send_page(struct drbd_peer_device *peer_device, struct page *page, 152362306a36Sopenharmony_ci int offset, size_t size, unsigned msg_flags) 152462306a36Sopenharmony_ci{ 152562306a36Sopenharmony_ci struct socket *socket; 152662306a36Sopenharmony_ci void *addr; 152762306a36Sopenharmony_ci int err; 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci socket = peer_device->connection->data.socket; 153062306a36Sopenharmony_ci addr = kmap(page) + offset; 153162306a36Sopenharmony_ci err = drbd_send_all(peer_device->connection, socket, addr, size, msg_flags); 153262306a36Sopenharmony_ci kunmap(page); 153362306a36Sopenharmony_ci if (!err) 153462306a36Sopenharmony_ci peer_device->device->send_cnt += size >> 9; 153562306a36Sopenharmony_ci return err; 153662306a36Sopenharmony_ci} 153762306a36Sopenharmony_ci 153862306a36Sopenharmony_cistatic int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *page, 153962306a36Sopenharmony_ci int offset, size_t size, unsigned msg_flags) 154062306a36Sopenharmony_ci{ 154162306a36Sopenharmony_ci struct socket *socket = peer_device->connection->data.socket; 154262306a36Sopenharmony_ci struct msghdr msg = { .msg_flags = msg_flags, }; 154362306a36Sopenharmony_ci struct bio_vec bvec; 154462306a36Sopenharmony_ci int len = size; 154562306a36Sopenharmony_ci int err = -EIO; 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci /* e.g. XFS meta- & log-data is in slab pages, which have a 154862306a36Sopenharmony_ci * page_count of 0 and/or have PageSlab() set. 154962306a36Sopenharmony_ci * we cannot use send_page for those, as that does get_page(); 155062306a36Sopenharmony_ci * put_page(); and would cause either a VM_BUG directly, or 155162306a36Sopenharmony_ci * __page_cache_release a page that would actually still be referenced 155262306a36Sopenharmony_ci * by someone, leading to some obscure delayed Oops somewhere else. */ 155362306a36Sopenharmony_ci if (!drbd_disable_sendpage && sendpage_ok(page)) 155462306a36Sopenharmony_ci msg.msg_flags |= MSG_NOSIGNAL | MSG_SPLICE_PAGES; 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci drbd_update_congested(peer_device->connection); 155762306a36Sopenharmony_ci do { 155862306a36Sopenharmony_ci int sent; 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_ci bvec_set_page(&bvec, page, len, offset); 156162306a36Sopenharmony_ci iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci sent = sock_sendmsg(socket, &msg); 156462306a36Sopenharmony_ci if (sent <= 0) { 156562306a36Sopenharmony_ci if (sent == -EAGAIN) { 156662306a36Sopenharmony_ci if (we_should_drop_the_connection(peer_device->connection, socket)) 156762306a36Sopenharmony_ci break; 156862306a36Sopenharmony_ci continue; 156962306a36Sopenharmony_ci } 157062306a36Sopenharmony_ci drbd_warn(peer_device->device, "%s: size=%d len=%d sent=%d\n", 157162306a36Sopenharmony_ci __func__, (int)size, len, sent); 157262306a36Sopenharmony_ci if (sent < 0) 157362306a36Sopenharmony_ci err = sent; 157462306a36Sopenharmony_ci break; 157562306a36Sopenharmony_ci } 157662306a36Sopenharmony_ci len -= sent; 157762306a36Sopenharmony_ci offset += sent; 157862306a36Sopenharmony_ci } while (len > 0 /* THINK && device->cstate >= C_CONNECTED*/); 157962306a36Sopenharmony_ci clear_bit(NET_CONGESTED, &peer_device->connection->flags); 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci if (len == 0) { 158262306a36Sopenharmony_ci err = 0; 158362306a36Sopenharmony_ci peer_device->device->send_cnt += size >> 9; 158462306a36Sopenharmony_ci } 158562306a36Sopenharmony_ci return err; 158662306a36Sopenharmony_ci} 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_cistatic int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio) 158962306a36Sopenharmony_ci{ 159062306a36Sopenharmony_ci struct bio_vec bvec; 159162306a36Sopenharmony_ci struct bvec_iter iter; 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci /* hint all but last page with MSG_MORE */ 159462306a36Sopenharmony_ci bio_for_each_segment(bvec, bio, iter) { 159562306a36Sopenharmony_ci int err; 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci err = _drbd_no_send_page(peer_device, bvec.bv_page, 159862306a36Sopenharmony_ci bvec.bv_offset, bvec.bv_len, 159962306a36Sopenharmony_ci bio_iter_last(bvec, iter) 160062306a36Sopenharmony_ci ? 0 : MSG_MORE); 160162306a36Sopenharmony_ci if (err) 160262306a36Sopenharmony_ci return err; 160362306a36Sopenharmony_ci } 160462306a36Sopenharmony_ci return 0; 160562306a36Sopenharmony_ci} 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_cistatic int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *bio) 160862306a36Sopenharmony_ci{ 160962306a36Sopenharmony_ci struct bio_vec bvec; 161062306a36Sopenharmony_ci struct bvec_iter iter; 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci /* hint all but last page with MSG_MORE */ 161362306a36Sopenharmony_ci bio_for_each_segment(bvec, bio, iter) { 161462306a36Sopenharmony_ci int err; 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci err = _drbd_send_page(peer_device, bvec.bv_page, 161762306a36Sopenharmony_ci bvec.bv_offset, bvec.bv_len, 161862306a36Sopenharmony_ci bio_iter_last(bvec, iter) ? 0 : MSG_MORE); 161962306a36Sopenharmony_ci if (err) 162062306a36Sopenharmony_ci return err; 162162306a36Sopenharmony_ci } 162262306a36Sopenharmony_ci return 0; 162362306a36Sopenharmony_ci} 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_cistatic int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, 162662306a36Sopenharmony_ci struct drbd_peer_request *peer_req) 162762306a36Sopenharmony_ci{ 162862306a36Sopenharmony_ci struct page *page = peer_req->pages; 162962306a36Sopenharmony_ci unsigned len = peer_req->i.size; 163062306a36Sopenharmony_ci int err; 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci /* hint all but last page with MSG_MORE */ 163362306a36Sopenharmony_ci page_chain_for_each(page) { 163462306a36Sopenharmony_ci unsigned l = min_t(unsigned, len, PAGE_SIZE); 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci err = _drbd_send_page(peer_device, page, 0, l, 163762306a36Sopenharmony_ci page_chain_next(page) ? MSG_MORE : 0); 163862306a36Sopenharmony_ci if (err) 163962306a36Sopenharmony_ci return err; 164062306a36Sopenharmony_ci len -= l; 164162306a36Sopenharmony_ci } 164262306a36Sopenharmony_ci return 0; 164362306a36Sopenharmony_ci} 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_cistatic u32 bio_flags_to_wire(struct drbd_connection *connection, 164662306a36Sopenharmony_ci struct bio *bio) 164762306a36Sopenharmony_ci{ 164862306a36Sopenharmony_ci if (connection->agreed_pro_version >= 95) 164962306a36Sopenharmony_ci return (bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0) | 165062306a36Sopenharmony_ci (bio->bi_opf & REQ_FUA ? DP_FUA : 0) | 165162306a36Sopenharmony_ci (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) | 165262306a36Sopenharmony_ci (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) | 165362306a36Sopenharmony_ci (bio_op(bio) == REQ_OP_WRITE_ZEROES ? 165462306a36Sopenharmony_ci ((connection->agreed_features & DRBD_FF_WZEROES) ? 165562306a36Sopenharmony_ci (DP_ZEROES |(!(bio->bi_opf & REQ_NOUNMAP) ? DP_DISCARD : 0)) 165662306a36Sopenharmony_ci : DP_DISCARD) 165762306a36Sopenharmony_ci : 0); 165862306a36Sopenharmony_ci else 165962306a36Sopenharmony_ci return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0; 166062306a36Sopenharmony_ci} 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci/* Used to send write or TRIM aka REQ_OP_DISCARD requests 166362306a36Sopenharmony_ci * R_PRIMARY -> Peer (P_DATA, P_TRIM) 166462306a36Sopenharmony_ci */ 166562306a36Sopenharmony_ciint drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req) 166662306a36Sopenharmony_ci{ 166762306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 166862306a36Sopenharmony_ci struct drbd_socket *sock; 166962306a36Sopenharmony_ci struct p_data *p; 167062306a36Sopenharmony_ci void *digest_out; 167162306a36Sopenharmony_ci unsigned int dp_flags = 0; 167262306a36Sopenharmony_ci int digest_size; 167362306a36Sopenharmony_ci int err; 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_ci sock = &peer_device->connection->data; 167662306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 167762306a36Sopenharmony_ci digest_size = peer_device->connection->integrity_tfm ? 167862306a36Sopenharmony_ci crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0; 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci if (!p) 168162306a36Sopenharmony_ci return -EIO; 168262306a36Sopenharmony_ci p->sector = cpu_to_be64(req->i.sector); 168362306a36Sopenharmony_ci p->block_id = (unsigned long)req; 168462306a36Sopenharmony_ci p->seq_num = cpu_to_be32(atomic_inc_return(&device->packet_seq)); 168562306a36Sopenharmony_ci dp_flags = bio_flags_to_wire(peer_device->connection, req->master_bio); 168662306a36Sopenharmony_ci if (device->state.conn >= C_SYNC_SOURCE && 168762306a36Sopenharmony_ci device->state.conn <= C_PAUSED_SYNC_T) 168862306a36Sopenharmony_ci dp_flags |= DP_MAY_SET_IN_SYNC; 168962306a36Sopenharmony_ci if (peer_device->connection->agreed_pro_version >= 100) { 169062306a36Sopenharmony_ci if (req->rq_state & RQ_EXP_RECEIVE_ACK) 169162306a36Sopenharmony_ci dp_flags |= DP_SEND_RECEIVE_ACK; 169262306a36Sopenharmony_ci /* During resync, request an explicit write ack, 169362306a36Sopenharmony_ci * even in protocol != C */ 169462306a36Sopenharmony_ci if (req->rq_state & RQ_EXP_WRITE_ACK 169562306a36Sopenharmony_ci || (dp_flags & DP_MAY_SET_IN_SYNC)) 169662306a36Sopenharmony_ci dp_flags |= DP_SEND_WRITE_ACK; 169762306a36Sopenharmony_ci } 169862306a36Sopenharmony_ci p->dp_flags = cpu_to_be32(dp_flags); 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_ci if (dp_flags & (DP_DISCARD|DP_ZEROES)) { 170162306a36Sopenharmony_ci enum drbd_packet cmd = (dp_flags & DP_ZEROES) ? P_ZEROES : P_TRIM; 170262306a36Sopenharmony_ci struct p_trim *t = (struct p_trim*)p; 170362306a36Sopenharmony_ci t->size = cpu_to_be32(req->i.size); 170462306a36Sopenharmony_ci err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0); 170562306a36Sopenharmony_ci goto out; 170662306a36Sopenharmony_ci } 170762306a36Sopenharmony_ci digest_out = p + 1; 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci /* our digest is still only over the payload. 171062306a36Sopenharmony_ci * TRIM does not carry any payload. */ 171162306a36Sopenharmony_ci if (digest_size) 171262306a36Sopenharmony_ci drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest_out); 171362306a36Sopenharmony_ci err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, 171462306a36Sopenharmony_ci sizeof(*p) + digest_size, NULL, req->i.size); 171562306a36Sopenharmony_ci if (!err) { 171662306a36Sopenharmony_ci /* For protocol A, we have to memcpy the payload into 171762306a36Sopenharmony_ci * socket buffers, as we may complete right away 171862306a36Sopenharmony_ci * as soon as we handed it over to tcp, at which point the data 171962306a36Sopenharmony_ci * pages may become invalid. 172062306a36Sopenharmony_ci * 172162306a36Sopenharmony_ci * For data-integrity enabled, we copy it as well, so we can be 172262306a36Sopenharmony_ci * sure that even if the bio pages may still be modified, it 172362306a36Sopenharmony_ci * won't change the data on the wire, thus if the digest checks 172462306a36Sopenharmony_ci * out ok after sending on this side, but does not fit on the 172562306a36Sopenharmony_ci * receiving side, we sure have detected corruption elsewhere. 172662306a36Sopenharmony_ci */ 172762306a36Sopenharmony_ci if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || digest_size) 172862306a36Sopenharmony_ci err = _drbd_send_bio(peer_device, req->master_bio); 172962306a36Sopenharmony_ci else 173062306a36Sopenharmony_ci err = _drbd_send_zc_bio(peer_device, req->master_bio); 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_ci /* double check digest, sometimes buffers have been modified in flight. */ 173362306a36Sopenharmony_ci if (digest_size > 0 && digest_size <= 64) { 173462306a36Sopenharmony_ci /* 64 byte, 512 bit, is the largest digest size 173562306a36Sopenharmony_ci * currently supported in kernel crypto. */ 173662306a36Sopenharmony_ci unsigned char digest[64]; 173762306a36Sopenharmony_ci drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest); 173862306a36Sopenharmony_ci if (memcmp(p + 1, digest, digest_size)) { 173962306a36Sopenharmony_ci drbd_warn(device, 174062306a36Sopenharmony_ci "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", 174162306a36Sopenharmony_ci (unsigned long long)req->i.sector, req->i.size); 174262306a36Sopenharmony_ci } 174362306a36Sopenharmony_ci } /* else if (digest_size > 64) { 174462306a36Sopenharmony_ci ... Be noisy about digest too large ... 174562306a36Sopenharmony_ci } */ 174662306a36Sopenharmony_ci } 174762306a36Sopenharmony_ciout: 174862306a36Sopenharmony_ci mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_ci return err; 175162306a36Sopenharmony_ci} 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_ci/* answer packet, used to send data back for read requests: 175462306a36Sopenharmony_ci * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY) 175562306a36Sopenharmony_ci * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) 175662306a36Sopenharmony_ci */ 175762306a36Sopenharmony_ciint drbd_send_block(struct drbd_peer_device *peer_device, enum drbd_packet cmd, 175862306a36Sopenharmony_ci struct drbd_peer_request *peer_req) 175962306a36Sopenharmony_ci{ 176062306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 176162306a36Sopenharmony_ci struct drbd_socket *sock; 176262306a36Sopenharmony_ci struct p_data *p; 176362306a36Sopenharmony_ci int err; 176462306a36Sopenharmony_ci int digest_size; 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ci sock = &peer_device->connection->data; 176762306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci digest_size = peer_device->connection->integrity_tfm ? 177062306a36Sopenharmony_ci crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0; 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_ci if (!p) 177362306a36Sopenharmony_ci return -EIO; 177462306a36Sopenharmony_ci p->sector = cpu_to_be64(peer_req->i.sector); 177562306a36Sopenharmony_ci p->block_id = peer_req->block_id; 177662306a36Sopenharmony_ci p->seq_num = 0; /* unused */ 177762306a36Sopenharmony_ci p->dp_flags = 0; 177862306a36Sopenharmony_ci if (digest_size) 177962306a36Sopenharmony_ci drbd_csum_ee(peer_device->connection->integrity_tfm, peer_req, p + 1); 178062306a36Sopenharmony_ci err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*p) + digest_size, NULL, peer_req->i.size); 178162306a36Sopenharmony_ci if (!err) 178262306a36Sopenharmony_ci err = _drbd_send_zc_ee(peer_device, peer_req); 178362306a36Sopenharmony_ci mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ 178462306a36Sopenharmony_ci 178562306a36Sopenharmony_ci return err; 178662306a36Sopenharmony_ci} 178762306a36Sopenharmony_ci 178862306a36Sopenharmony_ciint drbd_send_out_of_sync(struct drbd_peer_device *peer_device, struct drbd_request *req) 178962306a36Sopenharmony_ci{ 179062306a36Sopenharmony_ci struct drbd_socket *sock; 179162306a36Sopenharmony_ci struct p_block_desc *p; 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci sock = &peer_device->connection->data; 179462306a36Sopenharmony_ci p = drbd_prepare_command(peer_device, sock); 179562306a36Sopenharmony_ci if (!p) 179662306a36Sopenharmony_ci return -EIO; 179762306a36Sopenharmony_ci p->sector = cpu_to_be64(req->i.sector); 179862306a36Sopenharmony_ci p->blksize = cpu_to_be32(req->i.size); 179962306a36Sopenharmony_ci return drbd_send_command(peer_device, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0); 180062306a36Sopenharmony_ci} 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci/* 180362306a36Sopenharmony_ci drbd_send distinguishes two cases: 180462306a36Sopenharmony_ci 180562306a36Sopenharmony_ci Packets sent via the data socket "sock" 180662306a36Sopenharmony_ci and packets sent via the meta data socket "msock" 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_ci sock msock 180962306a36Sopenharmony_ci -----------------+-------------------------+------------------------------ 181062306a36Sopenharmony_ci timeout conf.timeout / 2 conf.timeout / 2 181162306a36Sopenharmony_ci timeout action send a ping via msock Abort communication 181262306a36Sopenharmony_ci and close all sockets 181362306a36Sopenharmony_ci*/ 181462306a36Sopenharmony_ci 181562306a36Sopenharmony_ci/* 181662306a36Sopenharmony_ci * you must have down()ed the appropriate [m]sock_mutex elsewhere! 181762306a36Sopenharmony_ci */ 181862306a36Sopenharmony_ciint drbd_send(struct drbd_connection *connection, struct socket *sock, 181962306a36Sopenharmony_ci void *buf, size_t size, unsigned msg_flags) 182062306a36Sopenharmony_ci{ 182162306a36Sopenharmony_ci struct kvec iov = {.iov_base = buf, .iov_len = size}; 182262306a36Sopenharmony_ci struct msghdr msg = {.msg_flags = msg_flags | MSG_NOSIGNAL}; 182362306a36Sopenharmony_ci int rv, sent = 0; 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_ci if (!sock) 182662306a36Sopenharmony_ci return -EBADR; 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci /* THINK if (signal_pending) return ... ? */ 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, size); 183162306a36Sopenharmony_ci 183262306a36Sopenharmony_ci if (sock == connection->data.socket) { 183362306a36Sopenharmony_ci rcu_read_lock(); 183462306a36Sopenharmony_ci connection->ko_count = rcu_dereference(connection->net_conf)->ko_count; 183562306a36Sopenharmony_ci rcu_read_unlock(); 183662306a36Sopenharmony_ci drbd_update_congested(connection); 183762306a36Sopenharmony_ci } 183862306a36Sopenharmony_ci do { 183962306a36Sopenharmony_ci rv = sock_sendmsg(sock, &msg); 184062306a36Sopenharmony_ci if (rv == -EAGAIN) { 184162306a36Sopenharmony_ci if (we_should_drop_the_connection(connection, sock)) 184262306a36Sopenharmony_ci break; 184362306a36Sopenharmony_ci else 184462306a36Sopenharmony_ci continue; 184562306a36Sopenharmony_ci } 184662306a36Sopenharmony_ci if (rv == -EINTR) { 184762306a36Sopenharmony_ci flush_signals(current); 184862306a36Sopenharmony_ci rv = 0; 184962306a36Sopenharmony_ci } 185062306a36Sopenharmony_ci if (rv < 0) 185162306a36Sopenharmony_ci break; 185262306a36Sopenharmony_ci sent += rv; 185362306a36Sopenharmony_ci } while (sent < size); 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci if (sock == connection->data.socket) 185662306a36Sopenharmony_ci clear_bit(NET_CONGESTED, &connection->flags); 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_ci if (rv <= 0) { 185962306a36Sopenharmony_ci if (rv != -EAGAIN) { 186062306a36Sopenharmony_ci drbd_err(connection, "%s_sendmsg returned %d\n", 186162306a36Sopenharmony_ci sock == connection->meta.socket ? "msock" : "sock", 186262306a36Sopenharmony_ci rv); 186362306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD); 186462306a36Sopenharmony_ci } else 186562306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD); 186662306a36Sopenharmony_ci } 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ci return sent; 186962306a36Sopenharmony_ci} 187062306a36Sopenharmony_ci 187162306a36Sopenharmony_ci/* 187262306a36Sopenharmony_ci * drbd_send_all - Send an entire buffer 187362306a36Sopenharmony_ci * 187462306a36Sopenharmony_ci * Returns 0 upon success and a negative error value otherwise. 187562306a36Sopenharmony_ci */ 187662306a36Sopenharmony_ciint drbd_send_all(struct drbd_connection *connection, struct socket *sock, void *buffer, 187762306a36Sopenharmony_ci size_t size, unsigned msg_flags) 187862306a36Sopenharmony_ci{ 187962306a36Sopenharmony_ci int err; 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ci err = drbd_send(connection, sock, buffer, size, msg_flags); 188262306a36Sopenharmony_ci if (err < 0) 188362306a36Sopenharmony_ci return err; 188462306a36Sopenharmony_ci if (err != size) 188562306a36Sopenharmony_ci return -EIO; 188662306a36Sopenharmony_ci return 0; 188762306a36Sopenharmony_ci} 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_cistatic int drbd_open(struct gendisk *disk, blk_mode_t mode) 189062306a36Sopenharmony_ci{ 189162306a36Sopenharmony_ci struct drbd_device *device = disk->private_data; 189262306a36Sopenharmony_ci unsigned long flags; 189362306a36Sopenharmony_ci int rv = 0; 189462306a36Sopenharmony_ci 189562306a36Sopenharmony_ci mutex_lock(&drbd_main_mutex); 189662306a36Sopenharmony_ci spin_lock_irqsave(&device->resource->req_lock, flags); 189762306a36Sopenharmony_ci /* to have a stable device->state.role 189862306a36Sopenharmony_ci * and no race with updating open_cnt */ 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci if (device->state.role != R_PRIMARY) { 190162306a36Sopenharmony_ci if (mode & BLK_OPEN_WRITE) 190262306a36Sopenharmony_ci rv = -EROFS; 190362306a36Sopenharmony_ci else if (!drbd_allow_oos) 190462306a36Sopenharmony_ci rv = -EMEDIUMTYPE; 190562306a36Sopenharmony_ci } 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci if (!rv) 190862306a36Sopenharmony_ci device->open_cnt++; 190962306a36Sopenharmony_ci spin_unlock_irqrestore(&device->resource->req_lock, flags); 191062306a36Sopenharmony_ci mutex_unlock(&drbd_main_mutex); 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci return rv; 191362306a36Sopenharmony_ci} 191462306a36Sopenharmony_ci 191562306a36Sopenharmony_cistatic void drbd_release(struct gendisk *gd) 191662306a36Sopenharmony_ci{ 191762306a36Sopenharmony_ci struct drbd_device *device = gd->private_data; 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci mutex_lock(&drbd_main_mutex); 192062306a36Sopenharmony_ci device->open_cnt--; 192162306a36Sopenharmony_ci mutex_unlock(&drbd_main_mutex); 192262306a36Sopenharmony_ci} 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_ci/* need to hold resource->req_lock */ 192562306a36Sopenharmony_civoid drbd_queue_unplug(struct drbd_device *device) 192662306a36Sopenharmony_ci{ 192762306a36Sopenharmony_ci if (device->state.pdsk >= D_INCONSISTENT && device->state.conn >= C_CONNECTED) { 192862306a36Sopenharmony_ci D_ASSERT(device, device->state.role == R_PRIMARY); 192962306a36Sopenharmony_ci if (test_and_clear_bit(UNPLUG_REMOTE, &device->flags)) { 193062306a36Sopenharmony_ci drbd_queue_work_if_unqueued( 193162306a36Sopenharmony_ci &first_peer_device(device)->connection->sender_work, 193262306a36Sopenharmony_ci &device->unplug_work); 193362306a36Sopenharmony_ci } 193462306a36Sopenharmony_ci } 193562306a36Sopenharmony_ci} 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_cistatic void drbd_set_defaults(struct drbd_device *device) 193862306a36Sopenharmony_ci{ 193962306a36Sopenharmony_ci /* Beware! The actual layout differs 194062306a36Sopenharmony_ci * between big endian and little endian */ 194162306a36Sopenharmony_ci device->state = (union drbd_dev_state) { 194262306a36Sopenharmony_ci { .role = R_SECONDARY, 194362306a36Sopenharmony_ci .peer = R_UNKNOWN, 194462306a36Sopenharmony_ci .conn = C_STANDALONE, 194562306a36Sopenharmony_ci .disk = D_DISKLESS, 194662306a36Sopenharmony_ci .pdsk = D_UNKNOWN, 194762306a36Sopenharmony_ci } }; 194862306a36Sopenharmony_ci} 194962306a36Sopenharmony_ci 195062306a36Sopenharmony_civoid drbd_init_set_defaults(struct drbd_device *device) 195162306a36Sopenharmony_ci{ 195262306a36Sopenharmony_ci /* the memset(,0,) did most of this. 195362306a36Sopenharmony_ci * note: only assignments, no allocation in here */ 195462306a36Sopenharmony_ci 195562306a36Sopenharmony_ci drbd_set_defaults(device); 195662306a36Sopenharmony_ci 195762306a36Sopenharmony_ci atomic_set(&device->ap_bio_cnt, 0); 195862306a36Sopenharmony_ci atomic_set(&device->ap_actlog_cnt, 0); 195962306a36Sopenharmony_ci atomic_set(&device->ap_pending_cnt, 0); 196062306a36Sopenharmony_ci atomic_set(&device->rs_pending_cnt, 0); 196162306a36Sopenharmony_ci atomic_set(&device->unacked_cnt, 0); 196262306a36Sopenharmony_ci atomic_set(&device->local_cnt, 0); 196362306a36Sopenharmony_ci atomic_set(&device->pp_in_use_by_net, 0); 196462306a36Sopenharmony_ci atomic_set(&device->rs_sect_in, 0); 196562306a36Sopenharmony_ci atomic_set(&device->rs_sect_ev, 0); 196662306a36Sopenharmony_ci atomic_set(&device->ap_in_flight, 0); 196762306a36Sopenharmony_ci atomic_set(&device->md_io.in_use, 0); 196862306a36Sopenharmony_ci 196962306a36Sopenharmony_ci mutex_init(&device->own_state_mutex); 197062306a36Sopenharmony_ci device->state_mutex = &device->own_state_mutex; 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_ci spin_lock_init(&device->al_lock); 197362306a36Sopenharmony_ci spin_lock_init(&device->peer_seq_lock); 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci INIT_LIST_HEAD(&device->active_ee); 197662306a36Sopenharmony_ci INIT_LIST_HEAD(&device->sync_ee); 197762306a36Sopenharmony_ci INIT_LIST_HEAD(&device->done_ee); 197862306a36Sopenharmony_ci INIT_LIST_HEAD(&device->read_ee); 197962306a36Sopenharmony_ci INIT_LIST_HEAD(&device->net_ee); 198062306a36Sopenharmony_ci INIT_LIST_HEAD(&device->resync_reads); 198162306a36Sopenharmony_ci INIT_LIST_HEAD(&device->resync_work.list); 198262306a36Sopenharmony_ci INIT_LIST_HEAD(&device->unplug_work.list); 198362306a36Sopenharmony_ci INIT_LIST_HEAD(&device->bm_io_work.w.list); 198462306a36Sopenharmony_ci INIT_LIST_HEAD(&device->pending_master_completion[0]); 198562306a36Sopenharmony_ci INIT_LIST_HEAD(&device->pending_master_completion[1]); 198662306a36Sopenharmony_ci INIT_LIST_HEAD(&device->pending_completion[0]); 198762306a36Sopenharmony_ci INIT_LIST_HEAD(&device->pending_completion[1]); 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_ci device->resync_work.cb = w_resync_timer; 199062306a36Sopenharmony_ci device->unplug_work.cb = w_send_write_hint; 199162306a36Sopenharmony_ci device->bm_io_work.w.cb = w_bitmap_io; 199262306a36Sopenharmony_ci 199362306a36Sopenharmony_ci timer_setup(&device->resync_timer, resync_timer_fn, 0); 199462306a36Sopenharmony_ci timer_setup(&device->md_sync_timer, md_sync_timer_fn, 0); 199562306a36Sopenharmony_ci timer_setup(&device->start_resync_timer, start_resync_timer_fn, 0); 199662306a36Sopenharmony_ci timer_setup(&device->request_timer, request_timer_fn, 0); 199762306a36Sopenharmony_ci 199862306a36Sopenharmony_ci init_waitqueue_head(&device->misc_wait); 199962306a36Sopenharmony_ci init_waitqueue_head(&device->state_wait); 200062306a36Sopenharmony_ci init_waitqueue_head(&device->ee_wait); 200162306a36Sopenharmony_ci init_waitqueue_head(&device->al_wait); 200262306a36Sopenharmony_ci init_waitqueue_head(&device->seq_wait); 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci device->resync_wenr = LC_FREE; 200562306a36Sopenharmony_ci device->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; 200662306a36Sopenharmony_ci device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; 200762306a36Sopenharmony_ci} 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_civoid drbd_set_my_capacity(struct drbd_device *device, sector_t size) 201062306a36Sopenharmony_ci{ 201162306a36Sopenharmony_ci char ppb[10]; 201262306a36Sopenharmony_ci 201362306a36Sopenharmony_ci set_capacity_and_notify(device->vdisk, size); 201462306a36Sopenharmony_ci 201562306a36Sopenharmony_ci drbd_info(device, "size = %s (%llu KB)\n", 201662306a36Sopenharmony_ci ppsize(ppb, size>>1), (unsigned long long)size>>1); 201762306a36Sopenharmony_ci} 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_civoid drbd_device_cleanup(struct drbd_device *device) 202062306a36Sopenharmony_ci{ 202162306a36Sopenharmony_ci int i; 202262306a36Sopenharmony_ci if (first_peer_device(device)->connection->receiver.t_state != NONE) 202362306a36Sopenharmony_ci drbd_err(device, "ASSERT FAILED: receiver t_state == %d expected 0.\n", 202462306a36Sopenharmony_ci first_peer_device(device)->connection->receiver.t_state); 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci device->al_writ_cnt = 202762306a36Sopenharmony_ci device->bm_writ_cnt = 202862306a36Sopenharmony_ci device->read_cnt = 202962306a36Sopenharmony_ci device->recv_cnt = 203062306a36Sopenharmony_ci device->send_cnt = 203162306a36Sopenharmony_ci device->writ_cnt = 203262306a36Sopenharmony_ci device->p_size = 203362306a36Sopenharmony_ci device->rs_start = 203462306a36Sopenharmony_ci device->rs_total = 203562306a36Sopenharmony_ci device->rs_failed = 0; 203662306a36Sopenharmony_ci device->rs_last_events = 0; 203762306a36Sopenharmony_ci device->rs_last_sect_ev = 0; 203862306a36Sopenharmony_ci for (i = 0; i < DRBD_SYNC_MARKS; i++) { 203962306a36Sopenharmony_ci device->rs_mark_left[i] = 0; 204062306a36Sopenharmony_ci device->rs_mark_time[i] = 0; 204162306a36Sopenharmony_ci } 204262306a36Sopenharmony_ci D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci set_capacity_and_notify(device->vdisk, 0); 204562306a36Sopenharmony_ci if (device->bitmap) { 204662306a36Sopenharmony_ci /* maybe never allocated. */ 204762306a36Sopenharmony_ci drbd_bm_resize(device, 0, 1); 204862306a36Sopenharmony_ci drbd_bm_cleanup(device); 204962306a36Sopenharmony_ci } 205062306a36Sopenharmony_ci 205162306a36Sopenharmony_ci drbd_backing_dev_free(device, device->ldev); 205262306a36Sopenharmony_ci device->ldev = NULL; 205362306a36Sopenharmony_ci 205462306a36Sopenharmony_ci clear_bit(AL_SUSPENDED, &device->flags); 205562306a36Sopenharmony_ci 205662306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->active_ee)); 205762306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->sync_ee)); 205862306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->done_ee)); 205962306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->read_ee)); 206062306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->net_ee)); 206162306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->resync_reads)); 206262306a36Sopenharmony_ci D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); 206362306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->resync_work.list)); 206462306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->unplug_work.list)); 206562306a36Sopenharmony_ci 206662306a36Sopenharmony_ci drbd_set_defaults(device); 206762306a36Sopenharmony_ci} 206862306a36Sopenharmony_ci 206962306a36Sopenharmony_ci 207062306a36Sopenharmony_cistatic void drbd_destroy_mempools(void) 207162306a36Sopenharmony_ci{ 207262306a36Sopenharmony_ci struct page *page; 207362306a36Sopenharmony_ci 207462306a36Sopenharmony_ci while (drbd_pp_pool) { 207562306a36Sopenharmony_ci page = drbd_pp_pool; 207662306a36Sopenharmony_ci drbd_pp_pool = (struct page *)page_private(page); 207762306a36Sopenharmony_ci __free_page(page); 207862306a36Sopenharmony_ci drbd_pp_vacant--; 207962306a36Sopenharmony_ci } 208062306a36Sopenharmony_ci 208162306a36Sopenharmony_ci /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci bioset_exit(&drbd_io_bio_set); 208462306a36Sopenharmony_ci bioset_exit(&drbd_md_io_bio_set); 208562306a36Sopenharmony_ci mempool_exit(&drbd_md_io_page_pool); 208662306a36Sopenharmony_ci mempool_exit(&drbd_ee_mempool); 208762306a36Sopenharmony_ci mempool_exit(&drbd_request_mempool); 208862306a36Sopenharmony_ci kmem_cache_destroy(drbd_ee_cache); 208962306a36Sopenharmony_ci kmem_cache_destroy(drbd_request_cache); 209062306a36Sopenharmony_ci kmem_cache_destroy(drbd_bm_ext_cache); 209162306a36Sopenharmony_ci kmem_cache_destroy(drbd_al_ext_cache); 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci drbd_ee_cache = NULL; 209462306a36Sopenharmony_ci drbd_request_cache = NULL; 209562306a36Sopenharmony_ci drbd_bm_ext_cache = NULL; 209662306a36Sopenharmony_ci drbd_al_ext_cache = NULL; 209762306a36Sopenharmony_ci 209862306a36Sopenharmony_ci return; 209962306a36Sopenharmony_ci} 210062306a36Sopenharmony_ci 210162306a36Sopenharmony_cistatic int drbd_create_mempools(void) 210262306a36Sopenharmony_ci{ 210362306a36Sopenharmony_ci struct page *page; 210462306a36Sopenharmony_ci const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count; 210562306a36Sopenharmony_ci int i, ret; 210662306a36Sopenharmony_ci 210762306a36Sopenharmony_ci /* caches */ 210862306a36Sopenharmony_ci drbd_request_cache = kmem_cache_create( 210962306a36Sopenharmony_ci "drbd_req", sizeof(struct drbd_request), 0, 0, NULL); 211062306a36Sopenharmony_ci if (drbd_request_cache == NULL) 211162306a36Sopenharmony_ci goto Enomem; 211262306a36Sopenharmony_ci 211362306a36Sopenharmony_ci drbd_ee_cache = kmem_cache_create( 211462306a36Sopenharmony_ci "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL); 211562306a36Sopenharmony_ci if (drbd_ee_cache == NULL) 211662306a36Sopenharmony_ci goto Enomem; 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_ci drbd_bm_ext_cache = kmem_cache_create( 211962306a36Sopenharmony_ci "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL); 212062306a36Sopenharmony_ci if (drbd_bm_ext_cache == NULL) 212162306a36Sopenharmony_ci goto Enomem; 212262306a36Sopenharmony_ci 212362306a36Sopenharmony_ci drbd_al_ext_cache = kmem_cache_create( 212462306a36Sopenharmony_ci "drbd_al", sizeof(struct lc_element), 0, 0, NULL); 212562306a36Sopenharmony_ci if (drbd_al_ext_cache == NULL) 212662306a36Sopenharmony_ci goto Enomem; 212762306a36Sopenharmony_ci 212862306a36Sopenharmony_ci /* mempools */ 212962306a36Sopenharmony_ci ret = bioset_init(&drbd_io_bio_set, BIO_POOL_SIZE, 0, 0); 213062306a36Sopenharmony_ci if (ret) 213162306a36Sopenharmony_ci goto Enomem; 213262306a36Sopenharmony_ci 213362306a36Sopenharmony_ci ret = bioset_init(&drbd_md_io_bio_set, DRBD_MIN_POOL_PAGES, 0, 213462306a36Sopenharmony_ci BIOSET_NEED_BVECS); 213562306a36Sopenharmony_ci if (ret) 213662306a36Sopenharmony_ci goto Enomem; 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_ci ret = mempool_init_page_pool(&drbd_md_io_page_pool, DRBD_MIN_POOL_PAGES, 0); 213962306a36Sopenharmony_ci if (ret) 214062306a36Sopenharmony_ci goto Enomem; 214162306a36Sopenharmony_ci 214262306a36Sopenharmony_ci ret = mempool_init_slab_pool(&drbd_request_mempool, number, 214362306a36Sopenharmony_ci drbd_request_cache); 214462306a36Sopenharmony_ci if (ret) 214562306a36Sopenharmony_ci goto Enomem; 214662306a36Sopenharmony_ci 214762306a36Sopenharmony_ci ret = mempool_init_slab_pool(&drbd_ee_mempool, number, drbd_ee_cache); 214862306a36Sopenharmony_ci if (ret) 214962306a36Sopenharmony_ci goto Enomem; 215062306a36Sopenharmony_ci 215162306a36Sopenharmony_ci for (i = 0; i < number; i++) { 215262306a36Sopenharmony_ci page = alloc_page(GFP_HIGHUSER); 215362306a36Sopenharmony_ci if (!page) 215462306a36Sopenharmony_ci goto Enomem; 215562306a36Sopenharmony_ci set_page_private(page, (unsigned long)drbd_pp_pool); 215662306a36Sopenharmony_ci drbd_pp_pool = page; 215762306a36Sopenharmony_ci } 215862306a36Sopenharmony_ci drbd_pp_vacant = number; 215962306a36Sopenharmony_ci 216062306a36Sopenharmony_ci return 0; 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ciEnomem: 216362306a36Sopenharmony_ci drbd_destroy_mempools(); /* in case we allocated some */ 216462306a36Sopenharmony_ci return -ENOMEM; 216562306a36Sopenharmony_ci} 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_cistatic void drbd_release_all_peer_reqs(struct drbd_device *device) 216862306a36Sopenharmony_ci{ 216962306a36Sopenharmony_ci int rr; 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci rr = drbd_free_peer_reqs(device, &device->active_ee); 217262306a36Sopenharmony_ci if (rr) 217362306a36Sopenharmony_ci drbd_err(device, "%d EEs in active list found!\n", rr); 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ci rr = drbd_free_peer_reqs(device, &device->sync_ee); 217662306a36Sopenharmony_ci if (rr) 217762306a36Sopenharmony_ci drbd_err(device, "%d EEs in sync list found!\n", rr); 217862306a36Sopenharmony_ci 217962306a36Sopenharmony_ci rr = drbd_free_peer_reqs(device, &device->read_ee); 218062306a36Sopenharmony_ci if (rr) 218162306a36Sopenharmony_ci drbd_err(device, "%d EEs in read list found!\n", rr); 218262306a36Sopenharmony_ci 218362306a36Sopenharmony_ci rr = drbd_free_peer_reqs(device, &device->done_ee); 218462306a36Sopenharmony_ci if (rr) 218562306a36Sopenharmony_ci drbd_err(device, "%d EEs in done list found!\n", rr); 218662306a36Sopenharmony_ci 218762306a36Sopenharmony_ci rr = drbd_free_peer_reqs(device, &device->net_ee); 218862306a36Sopenharmony_ci if (rr) 218962306a36Sopenharmony_ci drbd_err(device, "%d EEs in net list found!\n", rr); 219062306a36Sopenharmony_ci} 219162306a36Sopenharmony_ci 219262306a36Sopenharmony_ci/* caution. no locking. */ 219362306a36Sopenharmony_civoid drbd_destroy_device(struct kref *kref) 219462306a36Sopenharmony_ci{ 219562306a36Sopenharmony_ci struct drbd_device *device = container_of(kref, struct drbd_device, kref); 219662306a36Sopenharmony_ci struct drbd_resource *resource = device->resource; 219762306a36Sopenharmony_ci struct drbd_peer_device *peer_device, *tmp_peer_device; 219862306a36Sopenharmony_ci 219962306a36Sopenharmony_ci timer_shutdown_sync(&device->request_timer); 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci /* paranoia asserts */ 220262306a36Sopenharmony_ci D_ASSERT(device, device->open_cnt == 0); 220362306a36Sopenharmony_ci /* end paranoia asserts */ 220462306a36Sopenharmony_ci 220562306a36Sopenharmony_ci /* cleanup stuff that may have been allocated during 220662306a36Sopenharmony_ci * device (re-)configuration or state changes */ 220762306a36Sopenharmony_ci 220862306a36Sopenharmony_ci drbd_backing_dev_free(device, device->ldev); 220962306a36Sopenharmony_ci device->ldev = NULL; 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci drbd_release_all_peer_reqs(device); 221262306a36Sopenharmony_ci 221362306a36Sopenharmony_ci lc_destroy(device->act_log); 221462306a36Sopenharmony_ci lc_destroy(device->resync); 221562306a36Sopenharmony_ci 221662306a36Sopenharmony_ci kfree(device->p_uuid); 221762306a36Sopenharmony_ci /* device->p_uuid = NULL; */ 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_ci if (device->bitmap) /* should no longer be there. */ 222062306a36Sopenharmony_ci drbd_bm_cleanup(device); 222162306a36Sopenharmony_ci __free_page(device->md_io.page); 222262306a36Sopenharmony_ci put_disk(device->vdisk); 222362306a36Sopenharmony_ci kfree(device->rs_plan_s); 222462306a36Sopenharmony_ci 222562306a36Sopenharmony_ci /* not for_each_connection(connection, resource): 222662306a36Sopenharmony_ci * those may have been cleaned up and disassociated already. 222762306a36Sopenharmony_ci */ 222862306a36Sopenharmony_ci for_each_peer_device_safe(peer_device, tmp_peer_device, device) { 222962306a36Sopenharmony_ci kref_put(&peer_device->connection->kref, drbd_destroy_connection); 223062306a36Sopenharmony_ci kfree(peer_device); 223162306a36Sopenharmony_ci } 223262306a36Sopenharmony_ci if (device->submit.wq) 223362306a36Sopenharmony_ci destroy_workqueue(device->submit.wq); 223462306a36Sopenharmony_ci kfree(device); 223562306a36Sopenharmony_ci kref_put(&resource->kref, drbd_destroy_resource); 223662306a36Sopenharmony_ci} 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_ci/* One global retry thread, if we need to push back some bio and have it 223962306a36Sopenharmony_ci * reinserted through our make request function. 224062306a36Sopenharmony_ci */ 224162306a36Sopenharmony_cistatic struct retry_worker { 224262306a36Sopenharmony_ci struct workqueue_struct *wq; 224362306a36Sopenharmony_ci struct work_struct worker; 224462306a36Sopenharmony_ci 224562306a36Sopenharmony_ci spinlock_t lock; 224662306a36Sopenharmony_ci struct list_head writes; 224762306a36Sopenharmony_ci} retry; 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_cistatic void do_retry(struct work_struct *ws) 225062306a36Sopenharmony_ci{ 225162306a36Sopenharmony_ci struct retry_worker *retry = container_of(ws, struct retry_worker, worker); 225262306a36Sopenharmony_ci LIST_HEAD(writes); 225362306a36Sopenharmony_ci struct drbd_request *req, *tmp; 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ci spin_lock_irq(&retry->lock); 225662306a36Sopenharmony_ci list_splice_init(&retry->writes, &writes); 225762306a36Sopenharmony_ci spin_unlock_irq(&retry->lock); 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_ci list_for_each_entry_safe(req, tmp, &writes, tl_requests) { 226062306a36Sopenharmony_ci struct drbd_device *device = req->device; 226162306a36Sopenharmony_ci struct bio *bio = req->master_bio; 226262306a36Sopenharmony_ci bool expected; 226362306a36Sopenharmony_ci 226462306a36Sopenharmony_ci expected = 226562306a36Sopenharmony_ci expect(device, atomic_read(&req->completion_ref) == 0) && 226662306a36Sopenharmony_ci expect(device, req->rq_state & RQ_POSTPONED) && 226762306a36Sopenharmony_ci expect(device, (req->rq_state & RQ_LOCAL_PENDING) == 0 || 226862306a36Sopenharmony_ci (req->rq_state & RQ_LOCAL_ABORTED) != 0); 226962306a36Sopenharmony_ci 227062306a36Sopenharmony_ci if (!expected) 227162306a36Sopenharmony_ci drbd_err(device, "req=%p completion_ref=%d rq_state=%x\n", 227262306a36Sopenharmony_ci req, atomic_read(&req->completion_ref), 227362306a36Sopenharmony_ci req->rq_state); 227462306a36Sopenharmony_ci 227562306a36Sopenharmony_ci /* We still need to put one kref associated with the 227662306a36Sopenharmony_ci * "completion_ref" going zero in the code path that queued it 227762306a36Sopenharmony_ci * here. The request object may still be referenced by a 227862306a36Sopenharmony_ci * frozen local req->private_bio, in case we force-detached. 227962306a36Sopenharmony_ci */ 228062306a36Sopenharmony_ci kref_put(&req->kref, drbd_req_destroy); 228162306a36Sopenharmony_ci 228262306a36Sopenharmony_ci /* A single suspended or otherwise blocking device may stall 228362306a36Sopenharmony_ci * all others as well. Fortunately, this code path is to 228462306a36Sopenharmony_ci * recover from a situation that "should not happen": 228562306a36Sopenharmony_ci * concurrent writes in multi-primary setup. 228662306a36Sopenharmony_ci * In a "normal" lifecycle, this workqueue is supposed to be 228762306a36Sopenharmony_ci * destroyed without ever doing anything. 228862306a36Sopenharmony_ci * If it turns out to be an issue anyways, we can do per 228962306a36Sopenharmony_ci * resource (replication group) or per device (minor) retry 229062306a36Sopenharmony_ci * workqueues instead. 229162306a36Sopenharmony_ci */ 229262306a36Sopenharmony_ci 229362306a36Sopenharmony_ci /* We are not just doing submit_bio_noacct(), 229462306a36Sopenharmony_ci * as we want to keep the start_time information. */ 229562306a36Sopenharmony_ci inc_ap_bio(device); 229662306a36Sopenharmony_ci __drbd_make_request(device, bio); 229762306a36Sopenharmony_ci } 229862306a36Sopenharmony_ci} 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci/* called via drbd_req_put_completion_ref(), 230162306a36Sopenharmony_ci * holds resource->req_lock */ 230262306a36Sopenharmony_civoid drbd_restart_request(struct drbd_request *req) 230362306a36Sopenharmony_ci{ 230462306a36Sopenharmony_ci unsigned long flags; 230562306a36Sopenharmony_ci spin_lock_irqsave(&retry.lock, flags); 230662306a36Sopenharmony_ci list_move_tail(&req->tl_requests, &retry.writes); 230762306a36Sopenharmony_ci spin_unlock_irqrestore(&retry.lock, flags); 230862306a36Sopenharmony_ci 230962306a36Sopenharmony_ci /* Drop the extra reference that would otherwise 231062306a36Sopenharmony_ci * have been dropped by complete_master_bio. 231162306a36Sopenharmony_ci * do_retry() needs to grab a new one. */ 231262306a36Sopenharmony_ci dec_ap_bio(req->device); 231362306a36Sopenharmony_ci 231462306a36Sopenharmony_ci queue_work(retry.wq, &retry.worker); 231562306a36Sopenharmony_ci} 231662306a36Sopenharmony_ci 231762306a36Sopenharmony_civoid drbd_destroy_resource(struct kref *kref) 231862306a36Sopenharmony_ci{ 231962306a36Sopenharmony_ci struct drbd_resource *resource = 232062306a36Sopenharmony_ci container_of(kref, struct drbd_resource, kref); 232162306a36Sopenharmony_ci 232262306a36Sopenharmony_ci idr_destroy(&resource->devices); 232362306a36Sopenharmony_ci free_cpumask_var(resource->cpu_mask); 232462306a36Sopenharmony_ci kfree(resource->name); 232562306a36Sopenharmony_ci kfree(resource); 232662306a36Sopenharmony_ci} 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_civoid drbd_free_resource(struct drbd_resource *resource) 232962306a36Sopenharmony_ci{ 233062306a36Sopenharmony_ci struct drbd_connection *connection, *tmp; 233162306a36Sopenharmony_ci 233262306a36Sopenharmony_ci for_each_connection_safe(connection, tmp, resource) { 233362306a36Sopenharmony_ci list_del(&connection->connections); 233462306a36Sopenharmony_ci drbd_debugfs_connection_cleanup(connection); 233562306a36Sopenharmony_ci kref_put(&connection->kref, drbd_destroy_connection); 233662306a36Sopenharmony_ci } 233762306a36Sopenharmony_ci drbd_debugfs_resource_cleanup(resource); 233862306a36Sopenharmony_ci kref_put(&resource->kref, drbd_destroy_resource); 233962306a36Sopenharmony_ci} 234062306a36Sopenharmony_ci 234162306a36Sopenharmony_cistatic void drbd_cleanup(void) 234262306a36Sopenharmony_ci{ 234362306a36Sopenharmony_ci unsigned int i; 234462306a36Sopenharmony_ci struct drbd_device *device; 234562306a36Sopenharmony_ci struct drbd_resource *resource, *tmp; 234662306a36Sopenharmony_ci 234762306a36Sopenharmony_ci /* first remove proc, 234862306a36Sopenharmony_ci * drbdsetup uses it's presence to detect 234962306a36Sopenharmony_ci * whether DRBD is loaded. 235062306a36Sopenharmony_ci * If we would get stuck in proc removal, 235162306a36Sopenharmony_ci * but have netlink already deregistered, 235262306a36Sopenharmony_ci * some drbdsetup commands may wait forever 235362306a36Sopenharmony_ci * for an answer. 235462306a36Sopenharmony_ci */ 235562306a36Sopenharmony_ci if (drbd_proc) 235662306a36Sopenharmony_ci remove_proc_entry("drbd", NULL); 235762306a36Sopenharmony_ci 235862306a36Sopenharmony_ci if (retry.wq) 235962306a36Sopenharmony_ci destroy_workqueue(retry.wq); 236062306a36Sopenharmony_ci 236162306a36Sopenharmony_ci drbd_genl_unregister(); 236262306a36Sopenharmony_ci 236362306a36Sopenharmony_ci idr_for_each_entry(&drbd_devices, device, i) 236462306a36Sopenharmony_ci drbd_delete_device(device); 236562306a36Sopenharmony_ci 236662306a36Sopenharmony_ci /* not _rcu since, no other updater anymore. Genl already unregistered */ 236762306a36Sopenharmony_ci for_each_resource_safe(resource, tmp, &drbd_resources) { 236862306a36Sopenharmony_ci list_del(&resource->resources); 236962306a36Sopenharmony_ci drbd_free_resource(resource); 237062306a36Sopenharmony_ci } 237162306a36Sopenharmony_ci 237262306a36Sopenharmony_ci drbd_debugfs_cleanup(); 237362306a36Sopenharmony_ci 237462306a36Sopenharmony_ci drbd_destroy_mempools(); 237562306a36Sopenharmony_ci unregister_blkdev(DRBD_MAJOR, "drbd"); 237662306a36Sopenharmony_ci 237762306a36Sopenharmony_ci idr_destroy(&drbd_devices); 237862306a36Sopenharmony_ci 237962306a36Sopenharmony_ci pr_info("module cleanup done.\n"); 238062306a36Sopenharmony_ci} 238162306a36Sopenharmony_ci 238262306a36Sopenharmony_cistatic void drbd_init_workqueue(struct drbd_work_queue* wq) 238362306a36Sopenharmony_ci{ 238462306a36Sopenharmony_ci spin_lock_init(&wq->q_lock); 238562306a36Sopenharmony_ci INIT_LIST_HEAD(&wq->q); 238662306a36Sopenharmony_ci init_waitqueue_head(&wq->q_wait); 238762306a36Sopenharmony_ci} 238862306a36Sopenharmony_ci 238962306a36Sopenharmony_cistruct completion_work { 239062306a36Sopenharmony_ci struct drbd_work w; 239162306a36Sopenharmony_ci struct completion done; 239262306a36Sopenharmony_ci}; 239362306a36Sopenharmony_ci 239462306a36Sopenharmony_cistatic int w_complete(struct drbd_work *w, int cancel) 239562306a36Sopenharmony_ci{ 239662306a36Sopenharmony_ci struct completion_work *completion_work = 239762306a36Sopenharmony_ci container_of(w, struct completion_work, w); 239862306a36Sopenharmony_ci 239962306a36Sopenharmony_ci complete(&completion_work->done); 240062306a36Sopenharmony_ci return 0; 240162306a36Sopenharmony_ci} 240262306a36Sopenharmony_ci 240362306a36Sopenharmony_civoid drbd_flush_workqueue(struct drbd_work_queue *work_queue) 240462306a36Sopenharmony_ci{ 240562306a36Sopenharmony_ci struct completion_work completion_work; 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci completion_work.w.cb = w_complete; 240862306a36Sopenharmony_ci init_completion(&completion_work.done); 240962306a36Sopenharmony_ci drbd_queue_work(work_queue, &completion_work.w); 241062306a36Sopenharmony_ci wait_for_completion(&completion_work.done); 241162306a36Sopenharmony_ci} 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_cistruct drbd_resource *drbd_find_resource(const char *name) 241462306a36Sopenharmony_ci{ 241562306a36Sopenharmony_ci struct drbd_resource *resource; 241662306a36Sopenharmony_ci 241762306a36Sopenharmony_ci if (!name || !name[0]) 241862306a36Sopenharmony_ci return NULL; 241962306a36Sopenharmony_ci 242062306a36Sopenharmony_ci rcu_read_lock(); 242162306a36Sopenharmony_ci for_each_resource_rcu(resource, &drbd_resources) { 242262306a36Sopenharmony_ci if (!strcmp(resource->name, name)) { 242362306a36Sopenharmony_ci kref_get(&resource->kref); 242462306a36Sopenharmony_ci goto found; 242562306a36Sopenharmony_ci } 242662306a36Sopenharmony_ci } 242762306a36Sopenharmony_ci resource = NULL; 242862306a36Sopenharmony_cifound: 242962306a36Sopenharmony_ci rcu_read_unlock(); 243062306a36Sopenharmony_ci return resource; 243162306a36Sopenharmony_ci} 243262306a36Sopenharmony_ci 243362306a36Sopenharmony_cistruct drbd_connection *conn_get_by_addrs(void *my_addr, int my_addr_len, 243462306a36Sopenharmony_ci void *peer_addr, int peer_addr_len) 243562306a36Sopenharmony_ci{ 243662306a36Sopenharmony_ci struct drbd_resource *resource; 243762306a36Sopenharmony_ci struct drbd_connection *connection; 243862306a36Sopenharmony_ci 243962306a36Sopenharmony_ci rcu_read_lock(); 244062306a36Sopenharmony_ci for_each_resource_rcu(resource, &drbd_resources) { 244162306a36Sopenharmony_ci for_each_connection_rcu(connection, resource) { 244262306a36Sopenharmony_ci if (connection->my_addr_len == my_addr_len && 244362306a36Sopenharmony_ci connection->peer_addr_len == peer_addr_len && 244462306a36Sopenharmony_ci !memcmp(&connection->my_addr, my_addr, my_addr_len) && 244562306a36Sopenharmony_ci !memcmp(&connection->peer_addr, peer_addr, peer_addr_len)) { 244662306a36Sopenharmony_ci kref_get(&connection->kref); 244762306a36Sopenharmony_ci goto found; 244862306a36Sopenharmony_ci } 244962306a36Sopenharmony_ci } 245062306a36Sopenharmony_ci } 245162306a36Sopenharmony_ci connection = NULL; 245262306a36Sopenharmony_cifound: 245362306a36Sopenharmony_ci rcu_read_unlock(); 245462306a36Sopenharmony_ci return connection; 245562306a36Sopenharmony_ci} 245662306a36Sopenharmony_ci 245762306a36Sopenharmony_cistatic int drbd_alloc_socket(struct drbd_socket *socket) 245862306a36Sopenharmony_ci{ 245962306a36Sopenharmony_ci socket->rbuf = (void *) __get_free_page(GFP_KERNEL); 246062306a36Sopenharmony_ci if (!socket->rbuf) 246162306a36Sopenharmony_ci return -ENOMEM; 246262306a36Sopenharmony_ci socket->sbuf = (void *) __get_free_page(GFP_KERNEL); 246362306a36Sopenharmony_ci if (!socket->sbuf) 246462306a36Sopenharmony_ci return -ENOMEM; 246562306a36Sopenharmony_ci return 0; 246662306a36Sopenharmony_ci} 246762306a36Sopenharmony_ci 246862306a36Sopenharmony_cistatic void drbd_free_socket(struct drbd_socket *socket) 246962306a36Sopenharmony_ci{ 247062306a36Sopenharmony_ci free_page((unsigned long) socket->sbuf); 247162306a36Sopenharmony_ci free_page((unsigned long) socket->rbuf); 247262306a36Sopenharmony_ci} 247362306a36Sopenharmony_ci 247462306a36Sopenharmony_civoid conn_free_crypto(struct drbd_connection *connection) 247562306a36Sopenharmony_ci{ 247662306a36Sopenharmony_ci drbd_free_sock(connection); 247762306a36Sopenharmony_ci 247862306a36Sopenharmony_ci crypto_free_shash(connection->csums_tfm); 247962306a36Sopenharmony_ci crypto_free_shash(connection->verify_tfm); 248062306a36Sopenharmony_ci crypto_free_shash(connection->cram_hmac_tfm); 248162306a36Sopenharmony_ci crypto_free_shash(connection->integrity_tfm); 248262306a36Sopenharmony_ci crypto_free_shash(connection->peer_integrity_tfm); 248362306a36Sopenharmony_ci kfree(connection->int_dig_in); 248462306a36Sopenharmony_ci kfree(connection->int_dig_vv); 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci connection->csums_tfm = NULL; 248762306a36Sopenharmony_ci connection->verify_tfm = NULL; 248862306a36Sopenharmony_ci connection->cram_hmac_tfm = NULL; 248962306a36Sopenharmony_ci connection->integrity_tfm = NULL; 249062306a36Sopenharmony_ci connection->peer_integrity_tfm = NULL; 249162306a36Sopenharmony_ci connection->int_dig_in = NULL; 249262306a36Sopenharmony_ci connection->int_dig_vv = NULL; 249362306a36Sopenharmony_ci} 249462306a36Sopenharmony_ci 249562306a36Sopenharmony_ciint set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts) 249662306a36Sopenharmony_ci{ 249762306a36Sopenharmony_ci struct drbd_connection *connection; 249862306a36Sopenharmony_ci cpumask_var_t new_cpu_mask; 249962306a36Sopenharmony_ci int err; 250062306a36Sopenharmony_ci 250162306a36Sopenharmony_ci if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) 250262306a36Sopenharmony_ci return -ENOMEM; 250362306a36Sopenharmony_ci 250462306a36Sopenharmony_ci /* silently ignore cpu mask on UP kernel */ 250562306a36Sopenharmony_ci if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { 250662306a36Sopenharmony_ci err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE, 250762306a36Sopenharmony_ci cpumask_bits(new_cpu_mask), nr_cpu_ids); 250862306a36Sopenharmony_ci if (err == -EOVERFLOW) { 250962306a36Sopenharmony_ci /* So what. mask it out. */ 251062306a36Sopenharmony_ci cpumask_var_t tmp_cpu_mask; 251162306a36Sopenharmony_ci if (zalloc_cpumask_var(&tmp_cpu_mask, GFP_KERNEL)) { 251262306a36Sopenharmony_ci cpumask_setall(tmp_cpu_mask); 251362306a36Sopenharmony_ci cpumask_and(new_cpu_mask, new_cpu_mask, tmp_cpu_mask); 251462306a36Sopenharmony_ci drbd_warn(resource, "Overflow in bitmap_parse(%.12s%s), truncating to %u bits\n", 251562306a36Sopenharmony_ci res_opts->cpu_mask, 251662306a36Sopenharmony_ci strlen(res_opts->cpu_mask) > 12 ? "..." : "", 251762306a36Sopenharmony_ci nr_cpu_ids); 251862306a36Sopenharmony_ci free_cpumask_var(tmp_cpu_mask); 251962306a36Sopenharmony_ci err = 0; 252062306a36Sopenharmony_ci } 252162306a36Sopenharmony_ci } 252262306a36Sopenharmony_ci if (err) { 252362306a36Sopenharmony_ci drbd_warn(resource, "bitmap_parse() failed with %d\n", err); 252462306a36Sopenharmony_ci /* retcode = ERR_CPU_MASK_PARSE; */ 252562306a36Sopenharmony_ci goto fail; 252662306a36Sopenharmony_ci } 252762306a36Sopenharmony_ci } 252862306a36Sopenharmony_ci resource->res_opts = *res_opts; 252962306a36Sopenharmony_ci if (cpumask_empty(new_cpu_mask)) 253062306a36Sopenharmony_ci drbd_calc_cpu_mask(&new_cpu_mask); 253162306a36Sopenharmony_ci if (!cpumask_equal(resource->cpu_mask, new_cpu_mask)) { 253262306a36Sopenharmony_ci cpumask_copy(resource->cpu_mask, new_cpu_mask); 253362306a36Sopenharmony_ci for_each_connection_rcu(connection, resource) { 253462306a36Sopenharmony_ci connection->receiver.reset_cpu_mask = 1; 253562306a36Sopenharmony_ci connection->ack_receiver.reset_cpu_mask = 1; 253662306a36Sopenharmony_ci connection->worker.reset_cpu_mask = 1; 253762306a36Sopenharmony_ci } 253862306a36Sopenharmony_ci } 253962306a36Sopenharmony_ci err = 0; 254062306a36Sopenharmony_ci 254162306a36Sopenharmony_cifail: 254262306a36Sopenharmony_ci free_cpumask_var(new_cpu_mask); 254362306a36Sopenharmony_ci return err; 254462306a36Sopenharmony_ci 254562306a36Sopenharmony_ci} 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_cistruct drbd_resource *drbd_create_resource(const char *name) 254862306a36Sopenharmony_ci{ 254962306a36Sopenharmony_ci struct drbd_resource *resource; 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci resource = kzalloc(sizeof(struct drbd_resource), GFP_KERNEL); 255262306a36Sopenharmony_ci if (!resource) 255362306a36Sopenharmony_ci goto fail; 255462306a36Sopenharmony_ci resource->name = kstrdup(name, GFP_KERNEL); 255562306a36Sopenharmony_ci if (!resource->name) 255662306a36Sopenharmony_ci goto fail_free_resource; 255762306a36Sopenharmony_ci if (!zalloc_cpumask_var(&resource->cpu_mask, GFP_KERNEL)) 255862306a36Sopenharmony_ci goto fail_free_name; 255962306a36Sopenharmony_ci kref_init(&resource->kref); 256062306a36Sopenharmony_ci idr_init(&resource->devices); 256162306a36Sopenharmony_ci INIT_LIST_HEAD(&resource->connections); 256262306a36Sopenharmony_ci resource->write_ordering = WO_BDEV_FLUSH; 256362306a36Sopenharmony_ci list_add_tail_rcu(&resource->resources, &drbd_resources); 256462306a36Sopenharmony_ci mutex_init(&resource->conf_update); 256562306a36Sopenharmony_ci mutex_init(&resource->adm_mutex); 256662306a36Sopenharmony_ci spin_lock_init(&resource->req_lock); 256762306a36Sopenharmony_ci drbd_debugfs_resource_add(resource); 256862306a36Sopenharmony_ci return resource; 256962306a36Sopenharmony_ci 257062306a36Sopenharmony_cifail_free_name: 257162306a36Sopenharmony_ci kfree(resource->name); 257262306a36Sopenharmony_cifail_free_resource: 257362306a36Sopenharmony_ci kfree(resource); 257462306a36Sopenharmony_cifail: 257562306a36Sopenharmony_ci return NULL; 257662306a36Sopenharmony_ci} 257762306a36Sopenharmony_ci 257862306a36Sopenharmony_ci/* caller must be under adm_mutex */ 257962306a36Sopenharmony_cistruct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) 258062306a36Sopenharmony_ci{ 258162306a36Sopenharmony_ci struct drbd_resource *resource; 258262306a36Sopenharmony_ci struct drbd_connection *connection; 258362306a36Sopenharmony_ci 258462306a36Sopenharmony_ci connection = kzalloc(sizeof(struct drbd_connection), GFP_KERNEL); 258562306a36Sopenharmony_ci if (!connection) 258662306a36Sopenharmony_ci return NULL; 258762306a36Sopenharmony_ci 258862306a36Sopenharmony_ci if (drbd_alloc_socket(&connection->data)) 258962306a36Sopenharmony_ci goto fail; 259062306a36Sopenharmony_ci if (drbd_alloc_socket(&connection->meta)) 259162306a36Sopenharmony_ci goto fail; 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci connection->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); 259462306a36Sopenharmony_ci if (!connection->current_epoch) 259562306a36Sopenharmony_ci goto fail; 259662306a36Sopenharmony_ci 259762306a36Sopenharmony_ci INIT_LIST_HEAD(&connection->transfer_log); 259862306a36Sopenharmony_ci 259962306a36Sopenharmony_ci INIT_LIST_HEAD(&connection->current_epoch->list); 260062306a36Sopenharmony_ci connection->epochs = 1; 260162306a36Sopenharmony_ci spin_lock_init(&connection->epoch_lock); 260262306a36Sopenharmony_ci 260362306a36Sopenharmony_ci connection->send.seen_any_write_yet = false; 260462306a36Sopenharmony_ci connection->send.current_epoch_nr = 0; 260562306a36Sopenharmony_ci connection->send.current_epoch_writes = 0; 260662306a36Sopenharmony_ci 260762306a36Sopenharmony_ci resource = drbd_create_resource(name); 260862306a36Sopenharmony_ci if (!resource) 260962306a36Sopenharmony_ci goto fail; 261062306a36Sopenharmony_ci 261162306a36Sopenharmony_ci connection->cstate = C_STANDALONE; 261262306a36Sopenharmony_ci mutex_init(&connection->cstate_mutex); 261362306a36Sopenharmony_ci init_waitqueue_head(&connection->ping_wait); 261462306a36Sopenharmony_ci idr_init(&connection->peer_devices); 261562306a36Sopenharmony_ci 261662306a36Sopenharmony_ci drbd_init_workqueue(&connection->sender_work); 261762306a36Sopenharmony_ci mutex_init(&connection->data.mutex); 261862306a36Sopenharmony_ci mutex_init(&connection->meta.mutex); 261962306a36Sopenharmony_ci 262062306a36Sopenharmony_ci drbd_thread_init(resource, &connection->receiver, drbd_receiver, "receiver"); 262162306a36Sopenharmony_ci connection->receiver.connection = connection; 262262306a36Sopenharmony_ci drbd_thread_init(resource, &connection->worker, drbd_worker, "worker"); 262362306a36Sopenharmony_ci connection->worker.connection = connection; 262462306a36Sopenharmony_ci drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv"); 262562306a36Sopenharmony_ci connection->ack_receiver.connection = connection; 262662306a36Sopenharmony_ci 262762306a36Sopenharmony_ci kref_init(&connection->kref); 262862306a36Sopenharmony_ci 262962306a36Sopenharmony_ci connection->resource = resource; 263062306a36Sopenharmony_ci 263162306a36Sopenharmony_ci if (set_resource_options(resource, res_opts)) 263262306a36Sopenharmony_ci goto fail_resource; 263362306a36Sopenharmony_ci 263462306a36Sopenharmony_ci kref_get(&resource->kref); 263562306a36Sopenharmony_ci list_add_tail_rcu(&connection->connections, &resource->connections); 263662306a36Sopenharmony_ci drbd_debugfs_connection_add(connection); 263762306a36Sopenharmony_ci return connection; 263862306a36Sopenharmony_ci 263962306a36Sopenharmony_cifail_resource: 264062306a36Sopenharmony_ci list_del(&resource->resources); 264162306a36Sopenharmony_ci drbd_free_resource(resource); 264262306a36Sopenharmony_cifail: 264362306a36Sopenharmony_ci kfree(connection->current_epoch); 264462306a36Sopenharmony_ci drbd_free_socket(&connection->meta); 264562306a36Sopenharmony_ci drbd_free_socket(&connection->data); 264662306a36Sopenharmony_ci kfree(connection); 264762306a36Sopenharmony_ci return NULL; 264862306a36Sopenharmony_ci} 264962306a36Sopenharmony_ci 265062306a36Sopenharmony_civoid drbd_destroy_connection(struct kref *kref) 265162306a36Sopenharmony_ci{ 265262306a36Sopenharmony_ci struct drbd_connection *connection = container_of(kref, struct drbd_connection, kref); 265362306a36Sopenharmony_ci struct drbd_resource *resource = connection->resource; 265462306a36Sopenharmony_ci 265562306a36Sopenharmony_ci if (atomic_read(&connection->current_epoch->epoch_size) != 0) 265662306a36Sopenharmony_ci drbd_err(connection, "epoch_size:%d\n", atomic_read(&connection->current_epoch->epoch_size)); 265762306a36Sopenharmony_ci kfree(connection->current_epoch); 265862306a36Sopenharmony_ci 265962306a36Sopenharmony_ci idr_destroy(&connection->peer_devices); 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci drbd_free_socket(&connection->meta); 266262306a36Sopenharmony_ci drbd_free_socket(&connection->data); 266362306a36Sopenharmony_ci kfree(connection->int_dig_in); 266462306a36Sopenharmony_ci kfree(connection->int_dig_vv); 266562306a36Sopenharmony_ci kfree(connection); 266662306a36Sopenharmony_ci kref_put(&resource->kref, drbd_destroy_resource); 266762306a36Sopenharmony_ci} 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_cistatic int init_submitter(struct drbd_device *device) 267062306a36Sopenharmony_ci{ 267162306a36Sopenharmony_ci /* opencoded create_singlethread_workqueue(), 267262306a36Sopenharmony_ci * to be able to say "drbd%d", ..., minor */ 267362306a36Sopenharmony_ci device->submit.wq = 267462306a36Sopenharmony_ci alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor); 267562306a36Sopenharmony_ci if (!device->submit.wq) 267662306a36Sopenharmony_ci return -ENOMEM; 267762306a36Sopenharmony_ci 267862306a36Sopenharmony_ci INIT_WORK(&device->submit.worker, do_submit); 267962306a36Sopenharmony_ci INIT_LIST_HEAD(&device->submit.writes); 268062306a36Sopenharmony_ci return 0; 268162306a36Sopenharmony_ci} 268262306a36Sopenharmony_ci 268362306a36Sopenharmony_cienum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor) 268462306a36Sopenharmony_ci{ 268562306a36Sopenharmony_ci struct drbd_resource *resource = adm_ctx->resource; 268662306a36Sopenharmony_ci struct drbd_connection *connection, *n; 268762306a36Sopenharmony_ci struct drbd_device *device; 268862306a36Sopenharmony_ci struct drbd_peer_device *peer_device, *tmp_peer_device; 268962306a36Sopenharmony_ci struct gendisk *disk; 269062306a36Sopenharmony_ci int id; 269162306a36Sopenharmony_ci int vnr = adm_ctx->volume; 269262306a36Sopenharmony_ci enum drbd_ret_code err = ERR_NOMEM; 269362306a36Sopenharmony_ci 269462306a36Sopenharmony_ci device = minor_to_device(minor); 269562306a36Sopenharmony_ci if (device) 269662306a36Sopenharmony_ci return ERR_MINOR_OR_VOLUME_EXISTS; 269762306a36Sopenharmony_ci 269862306a36Sopenharmony_ci /* GFP_KERNEL, we are outside of all write-out paths */ 269962306a36Sopenharmony_ci device = kzalloc(sizeof(struct drbd_device), GFP_KERNEL); 270062306a36Sopenharmony_ci if (!device) 270162306a36Sopenharmony_ci return ERR_NOMEM; 270262306a36Sopenharmony_ci kref_init(&device->kref); 270362306a36Sopenharmony_ci 270462306a36Sopenharmony_ci kref_get(&resource->kref); 270562306a36Sopenharmony_ci device->resource = resource; 270662306a36Sopenharmony_ci device->minor = minor; 270762306a36Sopenharmony_ci device->vnr = vnr; 270862306a36Sopenharmony_ci 270962306a36Sopenharmony_ci drbd_init_set_defaults(device); 271062306a36Sopenharmony_ci 271162306a36Sopenharmony_ci disk = blk_alloc_disk(NUMA_NO_NODE); 271262306a36Sopenharmony_ci if (!disk) 271362306a36Sopenharmony_ci goto out_no_disk; 271462306a36Sopenharmony_ci 271562306a36Sopenharmony_ci device->vdisk = disk; 271662306a36Sopenharmony_ci device->rq_queue = disk->queue; 271762306a36Sopenharmony_ci 271862306a36Sopenharmony_ci set_disk_ro(disk, true); 271962306a36Sopenharmony_ci 272062306a36Sopenharmony_ci disk->major = DRBD_MAJOR; 272162306a36Sopenharmony_ci disk->first_minor = minor; 272262306a36Sopenharmony_ci disk->minors = 1; 272362306a36Sopenharmony_ci disk->fops = &drbd_ops; 272462306a36Sopenharmony_ci disk->flags |= GENHD_FL_NO_PART; 272562306a36Sopenharmony_ci sprintf(disk->disk_name, "drbd%d", minor); 272662306a36Sopenharmony_ci disk->private_data = device; 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); 272962306a36Sopenharmony_ci blk_queue_write_cache(disk->queue, true, true); 273062306a36Sopenharmony_ci /* Setting the max_hw_sectors to an odd value of 8kibyte here 273162306a36Sopenharmony_ci This triggers a max_bio_size message upon first attach or connect */ 273262306a36Sopenharmony_ci blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8); 273362306a36Sopenharmony_ci 273462306a36Sopenharmony_ci device->md_io.page = alloc_page(GFP_KERNEL); 273562306a36Sopenharmony_ci if (!device->md_io.page) 273662306a36Sopenharmony_ci goto out_no_io_page; 273762306a36Sopenharmony_ci 273862306a36Sopenharmony_ci if (drbd_bm_init(device)) 273962306a36Sopenharmony_ci goto out_no_bitmap; 274062306a36Sopenharmony_ci device->read_requests = RB_ROOT; 274162306a36Sopenharmony_ci device->write_requests = RB_ROOT; 274262306a36Sopenharmony_ci 274362306a36Sopenharmony_ci id = idr_alloc(&drbd_devices, device, minor, minor + 1, GFP_KERNEL); 274462306a36Sopenharmony_ci if (id < 0) { 274562306a36Sopenharmony_ci if (id == -ENOSPC) 274662306a36Sopenharmony_ci err = ERR_MINOR_OR_VOLUME_EXISTS; 274762306a36Sopenharmony_ci goto out_no_minor_idr; 274862306a36Sopenharmony_ci } 274962306a36Sopenharmony_ci kref_get(&device->kref); 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci id = idr_alloc(&resource->devices, device, vnr, vnr + 1, GFP_KERNEL); 275262306a36Sopenharmony_ci if (id < 0) { 275362306a36Sopenharmony_ci if (id == -ENOSPC) 275462306a36Sopenharmony_ci err = ERR_MINOR_OR_VOLUME_EXISTS; 275562306a36Sopenharmony_ci goto out_idr_remove_minor; 275662306a36Sopenharmony_ci } 275762306a36Sopenharmony_ci kref_get(&device->kref); 275862306a36Sopenharmony_ci 275962306a36Sopenharmony_ci INIT_LIST_HEAD(&device->peer_devices); 276062306a36Sopenharmony_ci INIT_LIST_HEAD(&device->pending_bitmap_io); 276162306a36Sopenharmony_ci for_each_connection(connection, resource) { 276262306a36Sopenharmony_ci peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL); 276362306a36Sopenharmony_ci if (!peer_device) 276462306a36Sopenharmony_ci goto out_idr_remove_from_resource; 276562306a36Sopenharmony_ci peer_device->connection = connection; 276662306a36Sopenharmony_ci peer_device->device = device; 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci list_add(&peer_device->peer_devices, &device->peer_devices); 276962306a36Sopenharmony_ci kref_get(&device->kref); 277062306a36Sopenharmony_ci 277162306a36Sopenharmony_ci id = idr_alloc(&connection->peer_devices, peer_device, vnr, vnr + 1, GFP_KERNEL); 277262306a36Sopenharmony_ci if (id < 0) { 277362306a36Sopenharmony_ci if (id == -ENOSPC) 277462306a36Sopenharmony_ci err = ERR_INVALID_REQUEST; 277562306a36Sopenharmony_ci goto out_idr_remove_from_resource; 277662306a36Sopenharmony_ci } 277762306a36Sopenharmony_ci kref_get(&connection->kref); 277862306a36Sopenharmony_ci INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf); 277962306a36Sopenharmony_ci } 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_ci if (init_submitter(device)) { 278262306a36Sopenharmony_ci err = ERR_NOMEM; 278362306a36Sopenharmony_ci goto out_idr_remove_from_resource; 278462306a36Sopenharmony_ci } 278562306a36Sopenharmony_ci 278662306a36Sopenharmony_ci err = add_disk(disk); 278762306a36Sopenharmony_ci if (err) 278862306a36Sopenharmony_ci goto out_destroy_workqueue; 278962306a36Sopenharmony_ci 279062306a36Sopenharmony_ci /* inherit the connection state */ 279162306a36Sopenharmony_ci device->state.conn = first_connection(resource)->cstate; 279262306a36Sopenharmony_ci if (device->state.conn == C_WF_REPORT_PARAMS) { 279362306a36Sopenharmony_ci for_each_peer_device(peer_device, device) 279462306a36Sopenharmony_ci drbd_connected(peer_device); 279562306a36Sopenharmony_ci } 279662306a36Sopenharmony_ci /* move to create_peer_device() */ 279762306a36Sopenharmony_ci for_each_peer_device(peer_device, device) 279862306a36Sopenharmony_ci drbd_debugfs_peer_device_add(peer_device); 279962306a36Sopenharmony_ci drbd_debugfs_device_add(device); 280062306a36Sopenharmony_ci return NO_ERROR; 280162306a36Sopenharmony_ci 280262306a36Sopenharmony_ciout_destroy_workqueue: 280362306a36Sopenharmony_ci destroy_workqueue(device->submit.wq); 280462306a36Sopenharmony_ciout_idr_remove_from_resource: 280562306a36Sopenharmony_ci for_each_connection_safe(connection, n, resource) { 280662306a36Sopenharmony_ci peer_device = idr_remove(&connection->peer_devices, vnr); 280762306a36Sopenharmony_ci if (peer_device) 280862306a36Sopenharmony_ci kref_put(&connection->kref, drbd_destroy_connection); 280962306a36Sopenharmony_ci } 281062306a36Sopenharmony_ci for_each_peer_device_safe(peer_device, tmp_peer_device, device) { 281162306a36Sopenharmony_ci list_del(&peer_device->peer_devices); 281262306a36Sopenharmony_ci kfree(peer_device); 281362306a36Sopenharmony_ci } 281462306a36Sopenharmony_ci idr_remove(&resource->devices, vnr); 281562306a36Sopenharmony_ciout_idr_remove_minor: 281662306a36Sopenharmony_ci idr_remove(&drbd_devices, minor); 281762306a36Sopenharmony_ci synchronize_rcu(); 281862306a36Sopenharmony_ciout_no_minor_idr: 281962306a36Sopenharmony_ci drbd_bm_cleanup(device); 282062306a36Sopenharmony_ciout_no_bitmap: 282162306a36Sopenharmony_ci __free_page(device->md_io.page); 282262306a36Sopenharmony_ciout_no_io_page: 282362306a36Sopenharmony_ci put_disk(disk); 282462306a36Sopenharmony_ciout_no_disk: 282562306a36Sopenharmony_ci kref_put(&resource->kref, drbd_destroy_resource); 282662306a36Sopenharmony_ci kfree(device); 282762306a36Sopenharmony_ci return err; 282862306a36Sopenharmony_ci} 282962306a36Sopenharmony_ci 283062306a36Sopenharmony_civoid drbd_delete_device(struct drbd_device *device) 283162306a36Sopenharmony_ci{ 283262306a36Sopenharmony_ci struct drbd_resource *resource = device->resource; 283362306a36Sopenharmony_ci struct drbd_connection *connection; 283462306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 283562306a36Sopenharmony_ci 283662306a36Sopenharmony_ci /* move to free_peer_device() */ 283762306a36Sopenharmony_ci for_each_peer_device(peer_device, device) 283862306a36Sopenharmony_ci drbd_debugfs_peer_device_cleanup(peer_device); 283962306a36Sopenharmony_ci drbd_debugfs_device_cleanup(device); 284062306a36Sopenharmony_ci for_each_connection(connection, resource) { 284162306a36Sopenharmony_ci idr_remove(&connection->peer_devices, device->vnr); 284262306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 284362306a36Sopenharmony_ci } 284462306a36Sopenharmony_ci idr_remove(&resource->devices, device->vnr); 284562306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 284662306a36Sopenharmony_ci idr_remove(&drbd_devices, device_to_minor(device)); 284762306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 284862306a36Sopenharmony_ci del_gendisk(device->vdisk); 284962306a36Sopenharmony_ci synchronize_rcu(); 285062306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 285162306a36Sopenharmony_ci} 285262306a36Sopenharmony_ci 285362306a36Sopenharmony_cistatic int __init drbd_init(void) 285462306a36Sopenharmony_ci{ 285562306a36Sopenharmony_ci int err; 285662306a36Sopenharmony_ci 285762306a36Sopenharmony_ci if (drbd_minor_count < DRBD_MINOR_COUNT_MIN || drbd_minor_count > DRBD_MINOR_COUNT_MAX) { 285862306a36Sopenharmony_ci pr_err("invalid minor_count (%d)\n", drbd_minor_count); 285962306a36Sopenharmony_ci#ifdef MODULE 286062306a36Sopenharmony_ci return -EINVAL; 286162306a36Sopenharmony_ci#else 286262306a36Sopenharmony_ci drbd_minor_count = DRBD_MINOR_COUNT_DEF; 286362306a36Sopenharmony_ci#endif 286462306a36Sopenharmony_ci } 286562306a36Sopenharmony_ci 286662306a36Sopenharmony_ci err = register_blkdev(DRBD_MAJOR, "drbd"); 286762306a36Sopenharmony_ci if (err) { 286862306a36Sopenharmony_ci pr_err("unable to register block device major %d\n", 286962306a36Sopenharmony_ci DRBD_MAJOR); 287062306a36Sopenharmony_ci return err; 287162306a36Sopenharmony_ci } 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_ci /* 287462306a36Sopenharmony_ci * allocate all necessary structs 287562306a36Sopenharmony_ci */ 287662306a36Sopenharmony_ci init_waitqueue_head(&drbd_pp_wait); 287762306a36Sopenharmony_ci 287862306a36Sopenharmony_ci drbd_proc = NULL; /* play safe for drbd_cleanup */ 287962306a36Sopenharmony_ci idr_init(&drbd_devices); 288062306a36Sopenharmony_ci 288162306a36Sopenharmony_ci mutex_init(&resources_mutex); 288262306a36Sopenharmony_ci INIT_LIST_HEAD(&drbd_resources); 288362306a36Sopenharmony_ci 288462306a36Sopenharmony_ci err = drbd_genl_register(); 288562306a36Sopenharmony_ci if (err) { 288662306a36Sopenharmony_ci pr_err("unable to register generic netlink family\n"); 288762306a36Sopenharmony_ci goto fail; 288862306a36Sopenharmony_ci } 288962306a36Sopenharmony_ci 289062306a36Sopenharmony_ci err = drbd_create_mempools(); 289162306a36Sopenharmony_ci if (err) 289262306a36Sopenharmony_ci goto fail; 289362306a36Sopenharmony_ci 289462306a36Sopenharmony_ci err = -ENOMEM; 289562306a36Sopenharmony_ci drbd_proc = proc_create_single("drbd", S_IFREG | 0444 , NULL, drbd_seq_show); 289662306a36Sopenharmony_ci if (!drbd_proc) { 289762306a36Sopenharmony_ci pr_err("unable to register proc file\n"); 289862306a36Sopenharmony_ci goto fail; 289962306a36Sopenharmony_ci } 290062306a36Sopenharmony_ci 290162306a36Sopenharmony_ci retry.wq = create_singlethread_workqueue("drbd-reissue"); 290262306a36Sopenharmony_ci if (!retry.wq) { 290362306a36Sopenharmony_ci pr_err("unable to create retry workqueue\n"); 290462306a36Sopenharmony_ci goto fail; 290562306a36Sopenharmony_ci } 290662306a36Sopenharmony_ci INIT_WORK(&retry.worker, do_retry); 290762306a36Sopenharmony_ci spin_lock_init(&retry.lock); 290862306a36Sopenharmony_ci INIT_LIST_HEAD(&retry.writes); 290962306a36Sopenharmony_ci 291062306a36Sopenharmony_ci drbd_debugfs_init(); 291162306a36Sopenharmony_ci 291262306a36Sopenharmony_ci pr_info("initialized. " 291362306a36Sopenharmony_ci "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", 291462306a36Sopenharmony_ci GENL_MAGIC_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); 291562306a36Sopenharmony_ci pr_info("%s\n", drbd_buildtag()); 291662306a36Sopenharmony_ci pr_info("registered as block device major %d\n", DRBD_MAJOR); 291762306a36Sopenharmony_ci return 0; /* Success! */ 291862306a36Sopenharmony_ci 291962306a36Sopenharmony_cifail: 292062306a36Sopenharmony_ci drbd_cleanup(); 292162306a36Sopenharmony_ci if (err == -ENOMEM) 292262306a36Sopenharmony_ci pr_err("ran out of memory\n"); 292362306a36Sopenharmony_ci else 292462306a36Sopenharmony_ci pr_err("initialization failure\n"); 292562306a36Sopenharmony_ci return err; 292662306a36Sopenharmony_ci} 292762306a36Sopenharmony_ci 292862306a36Sopenharmony_cistatic void drbd_free_one_sock(struct drbd_socket *ds) 292962306a36Sopenharmony_ci{ 293062306a36Sopenharmony_ci struct socket *s; 293162306a36Sopenharmony_ci mutex_lock(&ds->mutex); 293262306a36Sopenharmony_ci s = ds->socket; 293362306a36Sopenharmony_ci ds->socket = NULL; 293462306a36Sopenharmony_ci mutex_unlock(&ds->mutex); 293562306a36Sopenharmony_ci if (s) { 293662306a36Sopenharmony_ci /* so debugfs does not need to mutex_lock() */ 293762306a36Sopenharmony_ci synchronize_rcu(); 293862306a36Sopenharmony_ci kernel_sock_shutdown(s, SHUT_RDWR); 293962306a36Sopenharmony_ci sock_release(s); 294062306a36Sopenharmony_ci } 294162306a36Sopenharmony_ci} 294262306a36Sopenharmony_ci 294362306a36Sopenharmony_civoid drbd_free_sock(struct drbd_connection *connection) 294462306a36Sopenharmony_ci{ 294562306a36Sopenharmony_ci if (connection->data.socket) 294662306a36Sopenharmony_ci drbd_free_one_sock(&connection->data); 294762306a36Sopenharmony_ci if (connection->meta.socket) 294862306a36Sopenharmony_ci drbd_free_one_sock(&connection->meta); 294962306a36Sopenharmony_ci} 295062306a36Sopenharmony_ci 295162306a36Sopenharmony_ci/* meta data management */ 295262306a36Sopenharmony_ci 295362306a36Sopenharmony_civoid conn_md_sync(struct drbd_connection *connection) 295462306a36Sopenharmony_ci{ 295562306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 295662306a36Sopenharmony_ci int vnr; 295762306a36Sopenharmony_ci 295862306a36Sopenharmony_ci rcu_read_lock(); 295962306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 296062306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 296162306a36Sopenharmony_ci 296262306a36Sopenharmony_ci kref_get(&device->kref); 296362306a36Sopenharmony_ci rcu_read_unlock(); 296462306a36Sopenharmony_ci drbd_md_sync(device); 296562306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 296662306a36Sopenharmony_ci rcu_read_lock(); 296762306a36Sopenharmony_ci } 296862306a36Sopenharmony_ci rcu_read_unlock(); 296962306a36Sopenharmony_ci} 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_ci/* aligned 4kByte */ 297262306a36Sopenharmony_cistruct meta_data_on_disk { 297362306a36Sopenharmony_ci u64 la_size_sect; /* last agreed size. */ 297462306a36Sopenharmony_ci u64 uuid[UI_SIZE]; /* UUIDs. */ 297562306a36Sopenharmony_ci u64 device_uuid; 297662306a36Sopenharmony_ci u64 reserved_u64_1; 297762306a36Sopenharmony_ci u32 flags; /* MDF */ 297862306a36Sopenharmony_ci u32 magic; 297962306a36Sopenharmony_ci u32 md_size_sect; 298062306a36Sopenharmony_ci u32 al_offset; /* offset to this block */ 298162306a36Sopenharmony_ci u32 al_nr_extents; /* important for restoring the AL (userspace) */ 298262306a36Sopenharmony_ci /* `-- act_log->nr_elements <-- ldev->dc.al_extents */ 298362306a36Sopenharmony_ci u32 bm_offset; /* offset to the bitmap, from here */ 298462306a36Sopenharmony_ci u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ 298562306a36Sopenharmony_ci u32 la_peer_max_bio_size; /* last peer max_bio_size */ 298662306a36Sopenharmony_ci 298762306a36Sopenharmony_ci /* see al_tr_number_to_on_disk_sector() */ 298862306a36Sopenharmony_ci u32 al_stripes; 298962306a36Sopenharmony_ci u32 al_stripe_size_4k; 299062306a36Sopenharmony_ci 299162306a36Sopenharmony_ci u8 reserved_u8[4096 - (7*8 + 10*4)]; 299262306a36Sopenharmony_ci} __packed; 299362306a36Sopenharmony_ci 299462306a36Sopenharmony_ci 299562306a36Sopenharmony_ci 299662306a36Sopenharmony_civoid drbd_md_write(struct drbd_device *device, void *b) 299762306a36Sopenharmony_ci{ 299862306a36Sopenharmony_ci struct meta_data_on_disk *buffer = b; 299962306a36Sopenharmony_ci sector_t sector; 300062306a36Sopenharmony_ci int i; 300162306a36Sopenharmony_ci 300262306a36Sopenharmony_ci memset(buffer, 0, sizeof(*buffer)); 300362306a36Sopenharmony_ci 300462306a36Sopenharmony_ci buffer->la_size_sect = cpu_to_be64(get_capacity(device->vdisk)); 300562306a36Sopenharmony_ci for (i = UI_CURRENT; i < UI_SIZE; i++) 300662306a36Sopenharmony_ci buffer->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]); 300762306a36Sopenharmony_ci buffer->flags = cpu_to_be32(device->ldev->md.flags); 300862306a36Sopenharmony_ci buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN); 300962306a36Sopenharmony_ci 301062306a36Sopenharmony_ci buffer->md_size_sect = cpu_to_be32(device->ldev->md.md_size_sect); 301162306a36Sopenharmony_ci buffer->al_offset = cpu_to_be32(device->ldev->md.al_offset); 301262306a36Sopenharmony_ci buffer->al_nr_extents = cpu_to_be32(device->act_log->nr_elements); 301362306a36Sopenharmony_ci buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE); 301462306a36Sopenharmony_ci buffer->device_uuid = cpu_to_be64(device->ldev->md.device_uuid); 301562306a36Sopenharmony_ci 301662306a36Sopenharmony_ci buffer->bm_offset = cpu_to_be32(device->ldev->md.bm_offset); 301762306a36Sopenharmony_ci buffer->la_peer_max_bio_size = cpu_to_be32(device->peer_max_bio_size); 301862306a36Sopenharmony_ci 301962306a36Sopenharmony_ci buffer->al_stripes = cpu_to_be32(device->ldev->md.al_stripes); 302062306a36Sopenharmony_ci buffer->al_stripe_size_4k = cpu_to_be32(device->ldev->md.al_stripe_size_4k); 302162306a36Sopenharmony_ci 302262306a36Sopenharmony_ci D_ASSERT(device, drbd_md_ss(device->ldev) == device->ldev->md.md_offset); 302362306a36Sopenharmony_ci sector = device->ldev->md.md_offset; 302462306a36Sopenharmony_ci 302562306a36Sopenharmony_ci if (drbd_md_sync_page_io(device, device->ldev, sector, REQ_OP_WRITE)) { 302662306a36Sopenharmony_ci /* this was a try anyways ... */ 302762306a36Sopenharmony_ci drbd_err(device, "meta data update failed!\n"); 302862306a36Sopenharmony_ci drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); 302962306a36Sopenharmony_ci } 303062306a36Sopenharmony_ci} 303162306a36Sopenharmony_ci 303262306a36Sopenharmony_ci/** 303362306a36Sopenharmony_ci * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set 303462306a36Sopenharmony_ci * @device: DRBD device. 303562306a36Sopenharmony_ci */ 303662306a36Sopenharmony_civoid drbd_md_sync(struct drbd_device *device) 303762306a36Sopenharmony_ci{ 303862306a36Sopenharmony_ci struct meta_data_on_disk *buffer; 303962306a36Sopenharmony_ci 304062306a36Sopenharmony_ci /* Don't accidentally change the DRBD meta data layout. */ 304162306a36Sopenharmony_ci BUILD_BUG_ON(UI_SIZE != 4); 304262306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096); 304362306a36Sopenharmony_ci 304462306a36Sopenharmony_ci del_timer(&device->md_sync_timer); 304562306a36Sopenharmony_ci /* timer may be rearmed by drbd_md_mark_dirty() now. */ 304662306a36Sopenharmony_ci if (!test_and_clear_bit(MD_DIRTY, &device->flags)) 304762306a36Sopenharmony_ci return; 304862306a36Sopenharmony_ci 304962306a36Sopenharmony_ci /* We use here D_FAILED and not D_ATTACHING because we try to write 305062306a36Sopenharmony_ci * metadata even if we detach due to a disk failure! */ 305162306a36Sopenharmony_ci if (!get_ldev_if_state(device, D_FAILED)) 305262306a36Sopenharmony_ci return; 305362306a36Sopenharmony_ci 305462306a36Sopenharmony_ci buffer = drbd_md_get_buffer(device, __func__); 305562306a36Sopenharmony_ci if (!buffer) 305662306a36Sopenharmony_ci goto out; 305762306a36Sopenharmony_ci 305862306a36Sopenharmony_ci drbd_md_write(device, buffer); 305962306a36Sopenharmony_ci 306062306a36Sopenharmony_ci /* Update device->ldev->md.la_size_sect, 306162306a36Sopenharmony_ci * since we updated it on metadata. */ 306262306a36Sopenharmony_ci device->ldev->md.la_size_sect = get_capacity(device->vdisk); 306362306a36Sopenharmony_ci 306462306a36Sopenharmony_ci drbd_md_put_buffer(device); 306562306a36Sopenharmony_ciout: 306662306a36Sopenharmony_ci put_ldev(device); 306762306a36Sopenharmony_ci} 306862306a36Sopenharmony_ci 306962306a36Sopenharmony_cistatic int check_activity_log_stripe_size(struct drbd_device *device, 307062306a36Sopenharmony_ci struct meta_data_on_disk *on_disk, 307162306a36Sopenharmony_ci struct drbd_md *in_core) 307262306a36Sopenharmony_ci{ 307362306a36Sopenharmony_ci u32 al_stripes = be32_to_cpu(on_disk->al_stripes); 307462306a36Sopenharmony_ci u32 al_stripe_size_4k = be32_to_cpu(on_disk->al_stripe_size_4k); 307562306a36Sopenharmony_ci u64 al_size_4k; 307662306a36Sopenharmony_ci 307762306a36Sopenharmony_ci /* both not set: default to old fixed size activity log */ 307862306a36Sopenharmony_ci if (al_stripes == 0 && al_stripe_size_4k == 0) { 307962306a36Sopenharmony_ci al_stripes = 1; 308062306a36Sopenharmony_ci al_stripe_size_4k = MD_32kB_SECT/8; 308162306a36Sopenharmony_ci } 308262306a36Sopenharmony_ci 308362306a36Sopenharmony_ci /* some paranoia plausibility checks */ 308462306a36Sopenharmony_ci 308562306a36Sopenharmony_ci /* we need both values to be set */ 308662306a36Sopenharmony_ci if (al_stripes == 0 || al_stripe_size_4k == 0) 308762306a36Sopenharmony_ci goto err; 308862306a36Sopenharmony_ci 308962306a36Sopenharmony_ci al_size_4k = (u64)al_stripes * al_stripe_size_4k; 309062306a36Sopenharmony_ci 309162306a36Sopenharmony_ci /* Upper limit of activity log area, to avoid potential overflow 309262306a36Sopenharmony_ci * problems in al_tr_number_to_on_disk_sector(). As right now, more 309362306a36Sopenharmony_ci * than 72 * 4k blocks total only increases the amount of history, 309462306a36Sopenharmony_ci * limiting this arbitrarily to 16 GB is not a real limitation ;-) */ 309562306a36Sopenharmony_ci if (al_size_4k > (16 * 1024 * 1024/4)) 309662306a36Sopenharmony_ci goto err; 309762306a36Sopenharmony_ci 309862306a36Sopenharmony_ci /* Lower limit: we need at least 8 transaction slots (32kB) 309962306a36Sopenharmony_ci * to not break existing setups */ 310062306a36Sopenharmony_ci if (al_size_4k < MD_32kB_SECT/8) 310162306a36Sopenharmony_ci goto err; 310262306a36Sopenharmony_ci 310362306a36Sopenharmony_ci in_core->al_stripe_size_4k = al_stripe_size_4k; 310462306a36Sopenharmony_ci in_core->al_stripes = al_stripes; 310562306a36Sopenharmony_ci in_core->al_size_4k = al_size_4k; 310662306a36Sopenharmony_ci 310762306a36Sopenharmony_ci return 0; 310862306a36Sopenharmony_cierr: 310962306a36Sopenharmony_ci drbd_err(device, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n", 311062306a36Sopenharmony_ci al_stripes, al_stripe_size_4k); 311162306a36Sopenharmony_ci return -EINVAL; 311262306a36Sopenharmony_ci} 311362306a36Sopenharmony_ci 311462306a36Sopenharmony_cistatic int check_offsets_and_sizes(struct drbd_device *device, struct drbd_backing_dev *bdev) 311562306a36Sopenharmony_ci{ 311662306a36Sopenharmony_ci sector_t capacity = drbd_get_capacity(bdev->md_bdev); 311762306a36Sopenharmony_ci struct drbd_md *in_core = &bdev->md; 311862306a36Sopenharmony_ci s32 on_disk_al_sect; 311962306a36Sopenharmony_ci s32 on_disk_bm_sect; 312062306a36Sopenharmony_ci 312162306a36Sopenharmony_ci /* The on-disk size of the activity log, calculated from offsets, and 312262306a36Sopenharmony_ci * the size of the activity log calculated from the stripe settings, 312362306a36Sopenharmony_ci * should match. 312462306a36Sopenharmony_ci * Though we could relax this a bit: it is ok, if the striped activity log 312562306a36Sopenharmony_ci * fits in the available on-disk activity log size. 312662306a36Sopenharmony_ci * Right now, that would break how resize is implemented. 312762306a36Sopenharmony_ci * TODO: make drbd_determine_dev_size() (and the drbdmeta tool) aware 312862306a36Sopenharmony_ci * of possible unused padding space in the on disk layout. */ 312962306a36Sopenharmony_ci if (in_core->al_offset < 0) { 313062306a36Sopenharmony_ci if (in_core->bm_offset > in_core->al_offset) 313162306a36Sopenharmony_ci goto err; 313262306a36Sopenharmony_ci on_disk_al_sect = -in_core->al_offset; 313362306a36Sopenharmony_ci on_disk_bm_sect = in_core->al_offset - in_core->bm_offset; 313462306a36Sopenharmony_ci } else { 313562306a36Sopenharmony_ci if (in_core->al_offset != MD_4kB_SECT) 313662306a36Sopenharmony_ci goto err; 313762306a36Sopenharmony_ci if (in_core->bm_offset < in_core->al_offset + in_core->al_size_4k * MD_4kB_SECT) 313862306a36Sopenharmony_ci goto err; 313962306a36Sopenharmony_ci 314062306a36Sopenharmony_ci on_disk_al_sect = in_core->bm_offset - MD_4kB_SECT; 314162306a36Sopenharmony_ci on_disk_bm_sect = in_core->md_size_sect - in_core->bm_offset; 314262306a36Sopenharmony_ci } 314362306a36Sopenharmony_ci 314462306a36Sopenharmony_ci /* old fixed size meta data is exactly that: fixed. */ 314562306a36Sopenharmony_ci if (in_core->meta_dev_idx >= 0) { 314662306a36Sopenharmony_ci if (in_core->md_size_sect != MD_128MB_SECT 314762306a36Sopenharmony_ci || in_core->al_offset != MD_4kB_SECT 314862306a36Sopenharmony_ci || in_core->bm_offset != MD_4kB_SECT + MD_32kB_SECT 314962306a36Sopenharmony_ci || in_core->al_stripes != 1 315062306a36Sopenharmony_ci || in_core->al_stripe_size_4k != MD_32kB_SECT/8) 315162306a36Sopenharmony_ci goto err; 315262306a36Sopenharmony_ci } 315362306a36Sopenharmony_ci 315462306a36Sopenharmony_ci if (capacity < in_core->md_size_sect) 315562306a36Sopenharmony_ci goto err; 315662306a36Sopenharmony_ci if (capacity - in_core->md_size_sect < drbd_md_first_sector(bdev)) 315762306a36Sopenharmony_ci goto err; 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci /* should be aligned, and at least 32k */ 316062306a36Sopenharmony_ci if ((on_disk_al_sect & 7) || (on_disk_al_sect < MD_32kB_SECT)) 316162306a36Sopenharmony_ci goto err; 316262306a36Sopenharmony_ci 316362306a36Sopenharmony_ci /* should fit (for now: exactly) into the available on-disk space; 316462306a36Sopenharmony_ci * overflow prevention is in check_activity_log_stripe_size() above. */ 316562306a36Sopenharmony_ci if (on_disk_al_sect != in_core->al_size_4k * MD_4kB_SECT) 316662306a36Sopenharmony_ci goto err; 316762306a36Sopenharmony_ci 316862306a36Sopenharmony_ci /* again, should be aligned */ 316962306a36Sopenharmony_ci if (in_core->bm_offset & 7) 317062306a36Sopenharmony_ci goto err; 317162306a36Sopenharmony_ci 317262306a36Sopenharmony_ci /* FIXME check for device grow with flex external meta data? */ 317362306a36Sopenharmony_ci 317462306a36Sopenharmony_ci /* can the available bitmap space cover the last agreed device size? */ 317562306a36Sopenharmony_ci if (on_disk_bm_sect < (in_core->la_size_sect+7)/MD_4kB_SECT/8/512) 317662306a36Sopenharmony_ci goto err; 317762306a36Sopenharmony_ci 317862306a36Sopenharmony_ci return 0; 317962306a36Sopenharmony_ci 318062306a36Sopenharmony_cierr: 318162306a36Sopenharmony_ci drbd_err(device, "meta data offsets don't make sense: idx=%d " 318262306a36Sopenharmony_ci "al_s=%u, al_sz4k=%u, al_offset=%d, bm_offset=%d, " 318362306a36Sopenharmony_ci "md_size_sect=%u, la_size=%llu, md_capacity=%llu\n", 318462306a36Sopenharmony_ci in_core->meta_dev_idx, 318562306a36Sopenharmony_ci in_core->al_stripes, in_core->al_stripe_size_4k, 318662306a36Sopenharmony_ci in_core->al_offset, in_core->bm_offset, in_core->md_size_sect, 318762306a36Sopenharmony_ci (unsigned long long)in_core->la_size_sect, 318862306a36Sopenharmony_ci (unsigned long long)capacity); 318962306a36Sopenharmony_ci 319062306a36Sopenharmony_ci return -EINVAL; 319162306a36Sopenharmony_ci} 319262306a36Sopenharmony_ci 319362306a36Sopenharmony_ci 319462306a36Sopenharmony_ci/** 319562306a36Sopenharmony_ci * drbd_md_read() - Reads in the meta data super block 319662306a36Sopenharmony_ci * @device: DRBD device. 319762306a36Sopenharmony_ci * @bdev: Device from which the meta data should be read in. 319862306a36Sopenharmony_ci * 319962306a36Sopenharmony_ci * Return NO_ERROR on success, and an enum drbd_ret_code in case 320062306a36Sopenharmony_ci * something goes wrong. 320162306a36Sopenharmony_ci * 320262306a36Sopenharmony_ci * Called exactly once during drbd_adm_attach(), while still being D_DISKLESS, 320362306a36Sopenharmony_ci * even before @bdev is assigned to @device->ldev. 320462306a36Sopenharmony_ci */ 320562306a36Sopenharmony_ciint drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev) 320662306a36Sopenharmony_ci{ 320762306a36Sopenharmony_ci struct meta_data_on_disk *buffer; 320862306a36Sopenharmony_ci u32 magic, flags; 320962306a36Sopenharmony_ci int i, rv = NO_ERROR; 321062306a36Sopenharmony_ci 321162306a36Sopenharmony_ci if (device->state.disk != D_DISKLESS) 321262306a36Sopenharmony_ci return ERR_DISK_CONFIGURED; 321362306a36Sopenharmony_ci 321462306a36Sopenharmony_ci buffer = drbd_md_get_buffer(device, __func__); 321562306a36Sopenharmony_ci if (!buffer) 321662306a36Sopenharmony_ci return ERR_NOMEM; 321762306a36Sopenharmony_ci 321862306a36Sopenharmony_ci /* First, figure out where our meta data superblock is located, 321962306a36Sopenharmony_ci * and read it. */ 322062306a36Sopenharmony_ci bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; 322162306a36Sopenharmony_ci bdev->md.md_offset = drbd_md_ss(bdev); 322262306a36Sopenharmony_ci /* Even for (flexible or indexed) external meta data, 322362306a36Sopenharmony_ci * initially restrict us to the 4k superblock for now. 322462306a36Sopenharmony_ci * Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */ 322562306a36Sopenharmony_ci bdev->md.md_size_sect = 8; 322662306a36Sopenharmony_ci 322762306a36Sopenharmony_ci if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, 322862306a36Sopenharmony_ci REQ_OP_READ)) { 322962306a36Sopenharmony_ci /* NOTE: can't do normal error processing here as this is 323062306a36Sopenharmony_ci called BEFORE disk is attached */ 323162306a36Sopenharmony_ci drbd_err(device, "Error while reading metadata.\n"); 323262306a36Sopenharmony_ci rv = ERR_IO_MD_DISK; 323362306a36Sopenharmony_ci goto err; 323462306a36Sopenharmony_ci } 323562306a36Sopenharmony_ci 323662306a36Sopenharmony_ci magic = be32_to_cpu(buffer->magic); 323762306a36Sopenharmony_ci flags = be32_to_cpu(buffer->flags); 323862306a36Sopenharmony_ci if (magic == DRBD_MD_MAGIC_84_UNCLEAN || 323962306a36Sopenharmony_ci (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) { 324062306a36Sopenharmony_ci /* btw: that's Activity Log clean, not "all" clean. */ 324162306a36Sopenharmony_ci drbd_err(device, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n"); 324262306a36Sopenharmony_ci rv = ERR_MD_UNCLEAN; 324362306a36Sopenharmony_ci goto err; 324462306a36Sopenharmony_ci } 324562306a36Sopenharmony_ci 324662306a36Sopenharmony_ci rv = ERR_MD_INVALID; 324762306a36Sopenharmony_ci if (magic != DRBD_MD_MAGIC_08) { 324862306a36Sopenharmony_ci if (magic == DRBD_MD_MAGIC_07) 324962306a36Sopenharmony_ci drbd_err(device, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); 325062306a36Sopenharmony_ci else 325162306a36Sopenharmony_ci drbd_err(device, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); 325262306a36Sopenharmony_ci goto err; 325362306a36Sopenharmony_ci } 325462306a36Sopenharmony_ci 325562306a36Sopenharmony_ci if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { 325662306a36Sopenharmony_ci drbd_err(device, "unexpected bm_bytes_per_bit: %u (expected %u)\n", 325762306a36Sopenharmony_ci be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); 325862306a36Sopenharmony_ci goto err; 325962306a36Sopenharmony_ci } 326062306a36Sopenharmony_ci 326162306a36Sopenharmony_ci 326262306a36Sopenharmony_ci /* convert to in_core endian */ 326362306a36Sopenharmony_ci bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect); 326462306a36Sopenharmony_ci for (i = UI_CURRENT; i < UI_SIZE; i++) 326562306a36Sopenharmony_ci bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); 326662306a36Sopenharmony_ci bdev->md.flags = be32_to_cpu(buffer->flags); 326762306a36Sopenharmony_ci bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); 326862306a36Sopenharmony_ci 326962306a36Sopenharmony_ci bdev->md.md_size_sect = be32_to_cpu(buffer->md_size_sect); 327062306a36Sopenharmony_ci bdev->md.al_offset = be32_to_cpu(buffer->al_offset); 327162306a36Sopenharmony_ci bdev->md.bm_offset = be32_to_cpu(buffer->bm_offset); 327262306a36Sopenharmony_ci 327362306a36Sopenharmony_ci if (check_activity_log_stripe_size(device, buffer, &bdev->md)) 327462306a36Sopenharmony_ci goto err; 327562306a36Sopenharmony_ci if (check_offsets_and_sizes(device, bdev)) 327662306a36Sopenharmony_ci goto err; 327762306a36Sopenharmony_ci 327862306a36Sopenharmony_ci if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { 327962306a36Sopenharmony_ci drbd_err(device, "unexpected bm_offset: %d (expected %d)\n", 328062306a36Sopenharmony_ci be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); 328162306a36Sopenharmony_ci goto err; 328262306a36Sopenharmony_ci } 328362306a36Sopenharmony_ci if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { 328462306a36Sopenharmony_ci drbd_err(device, "unexpected md_size: %u (expected %u)\n", 328562306a36Sopenharmony_ci be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); 328662306a36Sopenharmony_ci goto err; 328762306a36Sopenharmony_ci } 328862306a36Sopenharmony_ci 328962306a36Sopenharmony_ci rv = NO_ERROR; 329062306a36Sopenharmony_ci 329162306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 329262306a36Sopenharmony_ci if (device->state.conn < C_CONNECTED) { 329362306a36Sopenharmony_ci unsigned int peer; 329462306a36Sopenharmony_ci peer = be32_to_cpu(buffer->la_peer_max_bio_size); 329562306a36Sopenharmony_ci peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE); 329662306a36Sopenharmony_ci device->peer_max_bio_size = peer; 329762306a36Sopenharmony_ci } 329862306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 329962306a36Sopenharmony_ci 330062306a36Sopenharmony_ci err: 330162306a36Sopenharmony_ci drbd_md_put_buffer(device); 330262306a36Sopenharmony_ci 330362306a36Sopenharmony_ci return rv; 330462306a36Sopenharmony_ci} 330562306a36Sopenharmony_ci 330662306a36Sopenharmony_ci/** 330762306a36Sopenharmony_ci * drbd_md_mark_dirty() - Mark meta data super block as dirty 330862306a36Sopenharmony_ci * @device: DRBD device. 330962306a36Sopenharmony_ci * 331062306a36Sopenharmony_ci * Call this function if you change anything that should be written to 331162306a36Sopenharmony_ci * the meta-data super block. This function sets MD_DIRTY, and starts a 331262306a36Sopenharmony_ci * timer that ensures that within five seconds you have to call drbd_md_sync(). 331362306a36Sopenharmony_ci */ 331462306a36Sopenharmony_civoid drbd_md_mark_dirty(struct drbd_device *device) 331562306a36Sopenharmony_ci{ 331662306a36Sopenharmony_ci if (!test_and_set_bit(MD_DIRTY, &device->flags)) 331762306a36Sopenharmony_ci mod_timer(&device->md_sync_timer, jiffies + 5*HZ); 331862306a36Sopenharmony_ci} 331962306a36Sopenharmony_ci 332062306a36Sopenharmony_civoid drbd_uuid_move_history(struct drbd_device *device) __must_hold(local) 332162306a36Sopenharmony_ci{ 332262306a36Sopenharmony_ci int i; 332362306a36Sopenharmony_ci 332462306a36Sopenharmony_ci for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) 332562306a36Sopenharmony_ci device->ldev->md.uuid[i+1] = device->ldev->md.uuid[i]; 332662306a36Sopenharmony_ci} 332762306a36Sopenharmony_ci 332862306a36Sopenharmony_civoid __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local) 332962306a36Sopenharmony_ci{ 333062306a36Sopenharmony_ci if (idx == UI_CURRENT) { 333162306a36Sopenharmony_ci if (device->state.role == R_PRIMARY) 333262306a36Sopenharmony_ci val |= 1; 333362306a36Sopenharmony_ci else 333462306a36Sopenharmony_ci val &= ~((u64)1); 333562306a36Sopenharmony_ci 333662306a36Sopenharmony_ci drbd_set_ed_uuid(device, val); 333762306a36Sopenharmony_ci } 333862306a36Sopenharmony_ci 333962306a36Sopenharmony_ci device->ldev->md.uuid[idx] = val; 334062306a36Sopenharmony_ci drbd_md_mark_dirty(device); 334162306a36Sopenharmony_ci} 334262306a36Sopenharmony_ci 334362306a36Sopenharmony_civoid _drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local) 334462306a36Sopenharmony_ci{ 334562306a36Sopenharmony_ci unsigned long flags; 334662306a36Sopenharmony_ci spin_lock_irqsave(&device->ldev->md.uuid_lock, flags); 334762306a36Sopenharmony_ci __drbd_uuid_set(device, idx, val); 334862306a36Sopenharmony_ci spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags); 334962306a36Sopenharmony_ci} 335062306a36Sopenharmony_ci 335162306a36Sopenharmony_civoid drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local) 335262306a36Sopenharmony_ci{ 335362306a36Sopenharmony_ci unsigned long flags; 335462306a36Sopenharmony_ci spin_lock_irqsave(&device->ldev->md.uuid_lock, flags); 335562306a36Sopenharmony_ci if (device->ldev->md.uuid[idx]) { 335662306a36Sopenharmony_ci drbd_uuid_move_history(device); 335762306a36Sopenharmony_ci device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[idx]; 335862306a36Sopenharmony_ci } 335962306a36Sopenharmony_ci __drbd_uuid_set(device, idx, val); 336062306a36Sopenharmony_ci spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags); 336162306a36Sopenharmony_ci} 336262306a36Sopenharmony_ci 336362306a36Sopenharmony_ci/** 336462306a36Sopenharmony_ci * drbd_uuid_new_current() - Creates a new current UUID 336562306a36Sopenharmony_ci * @device: DRBD device. 336662306a36Sopenharmony_ci * 336762306a36Sopenharmony_ci * Creates a new current UUID, and rotates the old current UUID into 336862306a36Sopenharmony_ci * the bitmap slot. Causes an incremental resync upon next connect. 336962306a36Sopenharmony_ci */ 337062306a36Sopenharmony_civoid drbd_uuid_new_current(struct drbd_device *device) __must_hold(local) 337162306a36Sopenharmony_ci{ 337262306a36Sopenharmony_ci u64 val; 337362306a36Sopenharmony_ci unsigned long long bm_uuid; 337462306a36Sopenharmony_ci 337562306a36Sopenharmony_ci get_random_bytes(&val, sizeof(u64)); 337662306a36Sopenharmony_ci 337762306a36Sopenharmony_ci spin_lock_irq(&device->ldev->md.uuid_lock); 337862306a36Sopenharmony_ci bm_uuid = device->ldev->md.uuid[UI_BITMAP]; 337962306a36Sopenharmony_ci 338062306a36Sopenharmony_ci if (bm_uuid) 338162306a36Sopenharmony_ci drbd_warn(device, "bm UUID was already set: %llX\n", bm_uuid); 338262306a36Sopenharmony_ci 338362306a36Sopenharmony_ci device->ldev->md.uuid[UI_BITMAP] = device->ldev->md.uuid[UI_CURRENT]; 338462306a36Sopenharmony_ci __drbd_uuid_set(device, UI_CURRENT, val); 338562306a36Sopenharmony_ci spin_unlock_irq(&device->ldev->md.uuid_lock); 338662306a36Sopenharmony_ci 338762306a36Sopenharmony_ci drbd_print_uuids(device, "new current UUID"); 338862306a36Sopenharmony_ci /* get it to stable storage _now_ */ 338962306a36Sopenharmony_ci drbd_md_sync(device); 339062306a36Sopenharmony_ci} 339162306a36Sopenharmony_ci 339262306a36Sopenharmony_civoid drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) 339362306a36Sopenharmony_ci{ 339462306a36Sopenharmony_ci unsigned long flags; 339562306a36Sopenharmony_ci if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) 339662306a36Sopenharmony_ci return; 339762306a36Sopenharmony_ci 339862306a36Sopenharmony_ci spin_lock_irqsave(&device->ldev->md.uuid_lock, flags); 339962306a36Sopenharmony_ci if (val == 0) { 340062306a36Sopenharmony_ci drbd_uuid_move_history(device); 340162306a36Sopenharmony_ci device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; 340262306a36Sopenharmony_ci device->ldev->md.uuid[UI_BITMAP] = 0; 340362306a36Sopenharmony_ci } else { 340462306a36Sopenharmony_ci unsigned long long bm_uuid = device->ldev->md.uuid[UI_BITMAP]; 340562306a36Sopenharmony_ci if (bm_uuid) 340662306a36Sopenharmony_ci drbd_warn(device, "bm UUID was already set: %llX\n", bm_uuid); 340762306a36Sopenharmony_ci 340862306a36Sopenharmony_ci device->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); 340962306a36Sopenharmony_ci } 341062306a36Sopenharmony_ci spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags); 341162306a36Sopenharmony_ci 341262306a36Sopenharmony_ci drbd_md_mark_dirty(device); 341362306a36Sopenharmony_ci} 341462306a36Sopenharmony_ci 341562306a36Sopenharmony_ci/** 341662306a36Sopenharmony_ci * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() 341762306a36Sopenharmony_ci * @device: DRBD device. 341862306a36Sopenharmony_ci * 341962306a36Sopenharmony_ci * Sets all bits in the bitmap and writes the whole bitmap to stable storage. 342062306a36Sopenharmony_ci */ 342162306a36Sopenharmony_ciint drbd_bmio_set_n_write(struct drbd_device *device, 342262306a36Sopenharmony_ci struct drbd_peer_device *peer_device) __must_hold(local) 342362306a36Sopenharmony_ci 342462306a36Sopenharmony_ci{ 342562306a36Sopenharmony_ci int rv = -EIO; 342662306a36Sopenharmony_ci 342762306a36Sopenharmony_ci drbd_md_set_flag(device, MDF_FULL_SYNC); 342862306a36Sopenharmony_ci drbd_md_sync(device); 342962306a36Sopenharmony_ci drbd_bm_set_all(device); 343062306a36Sopenharmony_ci 343162306a36Sopenharmony_ci rv = drbd_bm_write(device, peer_device); 343262306a36Sopenharmony_ci 343362306a36Sopenharmony_ci if (!rv) { 343462306a36Sopenharmony_ci drbd_md_clear_flag(device, MDF_FULL_SYNC); 343562306a36Sopenharmony_ci drbd_md_sync(device); 343662306a36Sopenharmony_ci } 343762306a36Sopenharmony_ci 343862306a36Sopenharmony_ci return rv; 343962306a36Sopenharmony_ci} 344062306a36Sopenharmony_ci 344162306a36Sopenharmony_ci/** 344262306a36Sopenharmony_ci * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() 344362306a36Sopenharmony_ci * @device: DRBD device. 344462306a36Sopenharmony_ci * 344562306a36Sopenharmony_ci * Clears all bits in the bitmap and writes the whole bitmap to stable storage. 344662306a36Sopenharmony_ci */ 344762306a36Sopenharmony_ciint drbd_bmio_clear_n_write(struct drbd_device *device, 344862306a36Sopenharmony_ci struct drbd_peer_device *peer_device) __must_hold(local) 344962306a36Sopenharmony_ci 345062306a36Sopenharmony_ci{ 345162306a36Sopenharmony_ci drbd_resume_al(device); 345262306a36Sopenharmony_ci drbd_bm_clear_all(device); 345362306a36Sopenharmony_ci return drbd_bm_write(device, peer_device); 345462306a36Sopenharmony_ci} 345562306a36Sopenharmony_ci 345662306a36Sopenharmony_cistatic int w_bitmap_io(struct drbd_work *w, int unused) 345762306a36Sopenharmony_ci{ 345862306a36Sopenharmony_ci struct drbd_device *device = 345962306a36Sopenharmony_ci container_of(w, struct drbd_device, bm_io_work.w); 346062306a36Sopenharmony_ci struct bm_io_work *work = &device->bm_io_work; 346162306a36Sopenharmony_ci int rv = -EIO; 346262306a36Sopenharmony_ci 346362306a36Sopenharmony_ci if (work->flags != BM_LOCKED_CHANGE_ALLOWED) { 346462306a36Sopenharmony_ci int cnt = atomic_read(&device->ap_bio_cnt); 346562306a36Sopenharmony_ci if (cnt) 346662306a36Sopenharmony_ci drbd_err(device, "FIXME: ap_bio_cnt %d, expected 0; queued for '%s'\n", 346762306a36Sopenharmony_ci cnt, work->why); 346862306a36Sopenharmony_ci } 346962306a36Sopenharmony_ci 347062306a36Sopenharmony_ci if (get_ldev(device)) { 347162306a36Sopenharmony_ci drbd_bm_lock(device, work->why, work->flags); 347262306a36Sopenharmony_ci rv = work->io_fn(device, work->peer_device); 347362306a36Sopenharmony_ci drbd_bm_unlock(device); 347462306a36Sopenharmony_ci put_ldev(device); 347562306a36Sopenharmony_ci } 347662306a36Sopenharmony_ci 347762306a36Sopenharmony_ci clear_bit_unlock(BITMAP_IO, &device->flags); 347862306a36Sopenharmony_ci wake_up(&device->misc_wait); 347962306a36Sopenharmony_ci 348062306a36Sopenharmony_ci if (work->done) 348162306a36Sopenharmony_ci work->done(device, rv); 348262306a36Sopenharmony_ci 348362306a36Sopenharmony_ci clear_bit(BITMAP_IO_QUEUED, &device->flags); 348462306a36Sopenharmony_ci work->why = NULL; 348562306a36Sopenharmony_ci work->flags = 0; 348662306a36Sopenharmony_ci 348762306a36Sopenharmony_ci return 0; 348862306a36Sopenharmony_ci} 348962306a36Sopenharmony_ci 349062306a36Sopenharmony_ci/** 349162306a36Sopenharmony_ci * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap 349262306a36Sopenharmony_ci * @device: DRBD device. 349362306a36Sopenharmony_ci * @io_fn: IO callback to be called when bitmap IO is possible 349462306a36Sopenharmony_ci * @done: callback to be called after the bitmap IO was performed 349562306a36Sopenharmony_ci * @why: Descriptive text of the reason for doing the IO 349662306a36Sopenharmony_ci * @flags: Bitmap flags 349762306a36Sopenharmony_ci * 349862306a36Sopenharmony_ci * While IO on the bitmap happens we freeze application IO thus we ensure 349962306a36Sopenharmony_ci * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be 350062306a36Sopenharmony_ci * called from worker context. It MUST NOT be used while a previous such 350162306a36Sopenharmony_ci * work is still pending! 350262306a36Sopenharmony_ci * 350362306a36Sopenharmony_ci * Its worker function encloses the call of io_fn() by get_ldev() and 350462306a36Sopenharmony_ci * put_ldev(). 350562306a36Sopenharmony_ci */ 350662306a36Sopenharmony_civoid drbd_queue_bitmap_io(struct drbd_device *device, 350762306a36Sopenharmony_ci int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), 350862306a36Sopenharmony_ci void (*done)(struct drbd_device *, int), 350962306a36Sopenharmony_ci char *why, enum bm_flag flags, 351062306a36Sopenharmony_ci struct drbd_peer_device *peer_device) 351162306a36Sopenharmony_ci{ 351262306a36Sopenharmony_ci D_ASSERT(device, current == peer_device->connection->worker.task); 351362306a36Sopenharmony_ci 351462306a36Sopenharmony_ci D_ASSERT(device, !test_bit(BITMAP_IO_QUEUED, &device->flags)); 351562306a36Sopenharmony_ci D_ASSERT(device, !test_bit(BITMAP_IO, &device->flags)); 351662306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->bm_io_work.w.list)); 351762306a36Sopenharmony_ci if (device->bm_io_work.why) 351862306a36Sopenharmony_ci drbd_err(device, "FIXME going to queue '%s' but '%s' still pending?\n", 351962306a36Sopenharmony_ci why, device->bm_io_work.why); 352062306a36Sopenharmony_ci 352162306a36Sopenharmony_ci device->bm_io_work.peer_device = peer_device; 352262306a36Sopenharmony_ci device->bm_io_work.io_fn = io_fn; 352362306a36Sopenharmony_ci device->bm_io_work.done = done; 352462306a36Sopenharmony_ci device->bm_io_work.why = why; 352562306a36Sopenharmony_ci device->bm_io_work.flags = flags; 352662306a36Sopenharmony_ci 352762306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 352862306a36Sopenharmony_ci set_bit(BITMAP_IO, &device->flags); 352962306a36Sopenharmony_ci /* don't wait for pending application IO if the caller indicates that 353062306a36Sopenharmony_ci * application IO does not conflict anyways. */ 353162306a36Sopenharmony_ci if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) { 353262306a36Sopenharmony_ci if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) 353362306a36Sopenharmony_ci drbd_queue_work(&peer_device->connection->sender_work, 353462306a36Sopenharmony_ci &device->bm_io_work.w); 353562306a36Sopenharmony_ci } 353662306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 353762306a36Sopenharmony_ci} 353862306a36Sopenharmony_ci 353962306a36Sopenharmony_ci/** 354062306a36Sopenharmony_ci * drbd_bitmap_io() - Does an IO operation on the whole bitmap 354162306a36Sopenharmony_ci * @device: DRBD device. 354262306a36Sopenharmony_ci * @io_fn: IO callback to be called when bitmap IO is possible 354362306a36Sopenharmony_ci * @why: Descriptive text of the reason for doing the IO 354462306a36Sopenharmony_ci * @flags: Bitmap flags 354562306a36Sopenharmony_ci * 354662306a36Sopenharmony_ci * freezes application IO while that the actual IO operations runs. This 354762306a36Sopenharmony_ci * functions MAY NOT be called from worker context. 354862306a36Sopenharmony_ci */ 354962306a36Sopenharmony_ciint drbd_bitmap_io(struct drbd_device *device, 355062306a36Sopenharmony_ci int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), 355162306a36Sopenharmony_ci char *why, enum bm_flag flags, 355262306a36Sopenharmony_ci struct drbd_peer_device *peer_device) 355362306a36Sopenharmony_ci{ 355462306a36Sopenharmony_ci /* Only suspend io, if some operation is supposed to be locked out */ 355562306a36Sopenharmony_ci const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST); 355662306a36Sopenharmony_ci int rv; 355762306a36Sopenharmony_ci 355862306a36Sopenharmony_ci D_ASSERT(device, current != first_peer_device(device)->connection->worker.task); 355962306a36Sopenharmony_ci 356062306a36Sopenharmony_ci if (do_suspend_io) 356162306a36Sopenharmony_ci drbd_suspend_io(device); 356262306a36Sopenharmony_ci 356362306a36Sopenharmony_ci drbd_bm_lock(device, why, flags); 356462306a36Sopenharmony_ci rv = io_fn(device, peer_device); 356562306a36Sopenharmony_ci drbd_bm_unlock(device); 356662306a36Sopenharmony_ci 356762306a36Sopenharmony_ci if (do_suspend_io) 356862306a36Sopenharmony_ci drbd_resume_io(device); 356962306a36Sopenharmony_ci 357062306a36Sopenharmony_ci return rv; 357162306a36Sopenharmony_ci} 357262306a36Sopenharmony_ci 357362306a36Sopenharmony_civoid drbd_md_set_flag(struct drbd_device *device, int flag) __must_hold(local) 357462306a36Sopenharmony_ci{ 357562306a36Sopenharmony_ci if ((device->ldev->md.flags & flag) != flag) { 357662306a36Sopenharmony_ci drbd_md_mark_dirty(device); 357762306a36Sopenharmony_ci device->ldev->md.flags |= flag; 357862306a36Sopenharmony_ci } 357962306a36Sopenharmony_ci} 358062306a36Sopenharmony_ci 358162306a36Sopenharmony_civoid drbd_md_clear_flag(struct drbd_device *device, int flag) __must_hold(local) 358262306a36Sopenharmony_ci{ 358362306a36Sopenharmony_ci if ((device->ldev->md.flags & flag) != 0) { 358462306a36Sopenharmony_ci drbd_md_mark_dirty(device); 358562306a36Sopenharmony_ci device->ldev->md.flags &= ~flag; 358662306a36Sopenharmony_ci } 358762306a36Sopenharmony_ci} 358862306a36Sopenharmony_ciint drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) 358962306a36Sopenharmony_ci{ 359062306a36Sopenharmony_ci return (bdev->md.flags & flag) != 0; 359162306a36Sopenharmony_ci} 359262306a36Sopenharmony_ci 359362306a36Sopenharmony_cistatic void md_sync_timer_fn(struct timer_list *t) 359462306a36Sopenharmony_ci{ 359562306a36Sopenharmony_ci struct drbd_device *device = from_timer(device, t, md_sync_timer); 359662306a36Sopenharmony_ci drbd_device_post_work(device, MD_SYNC); 359762306a36Sopenharmony_ci} 359862306a36Sopenharmony_ci 359962306a36Sopenharmony_ciconst char *cmdname(enum drbd_packet cmd) 360062306a36Sopenharmony_ci{ 360162306a36Sopenharmony_ci /* THINK may need to become several global tables 360262306a36Sopenharmony_ci * when we want to support more than 360362306a36Sopenharmony_ci * one PRO_VERSION */ 360462306a36Sopenharmony_ci static const char *cmdnames[] = { 360562306a36Sopenharmony_ci 360662306a36Sopenharmony_ci [P_DATA] = "Data", 360762306a36Sopenharmony_ci [P_DATA_REPLY] = "DataReply", 360862306a36Sopenharmony_ci [P_RS_DATA_REPLY] = "RSDataReply", 360962306a36Sopenharmony_ci [P_BARRIER] = "Barrier", 361062306a36Sopenharmony_ci [P_BITMAP] = "ReportBitMap", 361162306a36Sopenharmony_ci [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", 361262306a36Sopenharmony_ci [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", 361362306a36Sopenharmony_ci [P_UNPLUG_REMOTE] = "UnplugRemote", 361462306a36Sopenharmony_ci [P_DATA_REQUEST] = "DataRequest", 361562306a36Sopenharmony_ci [P_RS_DATA_REQUEST] = "RSDataRequest", 361662306a36Sopenharmony_ci [P_SYNC_PARAM] = "SyncParam", 361762306a36Sopenharmony_ci [P_PROTOCOL] = "ReportProtocol", 361862306a36Sopenharmony_ci [P_UUIDS] = "ReportUUIDs", 361962306a36Sopenharmony_ci [P_SIZES] = "ReportSizes", 362062306a36Sopenharmony_ci [P_STATE] = "ReportState", 362162306a36Sopenharmony_ci [P_SYNC_UUID] = "ReportSyncUUID", 362262306a36Sopenharmony_ci [P_AUTH_CHALLENGE] = "AuthChallenge", 362362306a36Sopenharmony_ci [P_AUTH_RESPONSE] = "AuthResponse", 362462306a36Sopenharmony_ci [P_STATE_CHG_REQ] = "StateChgRequest", 362562306a36Sopenharmony_ci [P_PING] = "Ping", 362662306a36Sopenharmony_ci [P_PING_ACK] = "PingAck", 362762306a36Sopenharmony_ci [P_RECV_ACK] = "RecvAck", 362862306a36Sopenharmony_ci [P_WRITE_ACK] = "WriteAck", 362962306a36Sopenharmony_ci [P_RS_WRITE_ACK] = "RSWriteAck", 363062306a36Sopenharmony_ci [P_SUPERSEDED] = "Superseded", 363162306a36Sopenharmony_ci [P_NEG_ACK] = "NegAck", 363262306a36Sopenharmony_ci [P_NEG_DREPLY] = "NegDReply", 363362306a36Sopenharmony_ci [P_NEG_RS_DREPLY] = "NegRSDReply", 363462306a36Sopenharmony_ci [P_BARRIER_ACK] = "BarrierAck", 363562306a36Sopenharmony_ci [P_STATE_CHG_REPLY] = "StateChgReply", 363662306a36Sopenharmony_ci [P_OV_REQUEST] = "OVRequest", 363762306a36Sopenharmony_ci [P_OV_REPLY] = "OVReply", 363862306a36Sopenharmony_ci [P_OV_RESULT] = "OVResult", 363962306a36Sopenharmony_ci [P_CSUM_RS_REQUEST] = "CsumRSRequest", 364062306a36Sopenharmony_ci [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", 364162306a36Sopenharmony_ci [P_SYNC_PARAM89] = "SyncParam89", 364262306a36Sopenharmony_ci [P_COMPRESSED_BITMAP] = "CBitmap", 364362306a36Sopenharmony_ci [P_DELAY_PROBE] = "DelayProbe", 364462306a36Sopenharmony_ci [P_OUT_OF_SYNC] = "OutOfSync", 364562306a36Sopenharmony_ci [P_RS_CANCEL] = "RSCancel", 364662306a36Sopenharmony_ci [P_CONN_ST_CHG_REQ] = "conn_st_chg_req", 364762306a36Sopenharmony_ci [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", 364862306a36Sopenharmony_ci [P_PROTOCOL_UPDATE] = "protocol_update", 364962306a36Sopenharmony_ci [P_TRIM] = "Trim", 365062306a36Sopenharmony_ci [P_RS_THIN_REQ] = "rs_thin_req", 365162306a36Sopenharmony_ci [P_RS_DEALLOCATED] = "rs_deallocated", 365262306a36Sopenharmony_ci [P_WSAME] = "WriteSame", 365362306a36Sopenharmony_ci [P_ZEROES] = "Zeroes", 365462306a36Sopenharmony_ci 365562306a36Sopenharmony_ci /* enum drbd_packet, but not commands - obsoleted flags: 365662306a36Sopenharmony_ci * P_MAY_IGNORE 365762306a36Sopenharmony_ci * P_MAX_OPT_CMD 365862306a36Sopenharmony_ci */ 365962306a36Sopenharmony_ci }; 366062306a36Sopenharmony_ci 366162306a36Sopenharmony_ci /* too big for the array: 0xfffX */ 366262306a36Sopenharmony_ci if (cmd == P_INITIAL_META) 366362306a36Sopenharmony_ci return "InitialMeta"; 366462306a36Sopenharmony_ci if (cmd == P_INITIAL_DATA) 366562306a36Sopenharmony_ci return "InitialData"; 366662306a36Sopenharmony_ci if (cmd == P_CONNECTION_FEATURES) 366762306a36Sopenharmony_ci return "ConnectionFeatures"; 366862306a36Sopenharmony_ci if (cmd >= ARRAY_SIZE(cmdnames)) 366962306a36Sopenharmony_ci return "Unknown"; 367062306a36Sopenharmony_ci return cmdnames[cmd]; 367162306a36Sopenharmony_ci} 367262306a36Sopenharmony_ci 367362306a36Sopenharmony_ci/** 367462306a36Sopenharmony_ci * drbd_wait_misc - wait for a request to make progress 367562306a36Sopenharmony_ci * @device: device associated with the request 367662306a36Sopenharmony_ci * @i: the struct drbd_interval embedded in struct drbd_request or 367762306a36Sopenharmony_ci * struct drbd_peer_request 367862306a36Sopenharmony_ci */ 367962306a36Sopenharmony_ciint drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i) 368062306a36Sopenharmony_ci{ 368162306a36Sopenharmony_ci struct net_conf *nc; 368262306a36Sopenharmony_ci DEFINE_WAIT(wait); 368362306a36Sopenharmony_ci long timeout; 368462306a36Sopenharmony_ci 368562306a36Sopenharmony_ci rcu_read_lock(); 368662306a36Sopenharmony_ci nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 368762306a36Sopenharmony_ci if (!nc) { 368862306a36Sopenharmony_ci rcu_read_unlock(); 368962306a36Sopenharmony_ci return -ETIMEDOUT; 369062306a36Sopenharmony_ci } 369162306a36Sopenharmony_ci timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT; 369262306a36Sopenharmony_ci rcu_read_unlock(); 369362306a36Sopenharmony_ci 369462306a36Sopenharmony_ci /* Indicate to wake up device->misc_wait on progress. */ 369562306a36Sopenharmony_ci i->waiting = true; 369662306a36Sopenharmony_ci prepare_to_wait(&device->misc_wait, &wait, TASK_INTERRUPTIBLE); 369762306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 369862306a36Sopenharmony_ci timeout = schedule_timeout(timeout); 369962306a36Sopenharmony_ci finish_wait(&device->misc_wait, &wait); 370062306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 370162306a36Sopenharmony_ci if (!timeout || device->state.conn < C_CONNECTED) 370262306a36Sopenharmony_ci return -ETIMEDOUT; 370362306a36Sopenharmony_ci if (signal_pending(current)) 370462306a36Sopenharmony_ci return -ERESTARTSYS; 370562306a36Sopenharmony_ci return 0; 370662306a36Sopenharmony_ci} 370762306a36Sopenharmony_ci 370862306a36Sopenharmony_civoid lock_all_resources(void) 370962306a36Sopenharmony_ci{ 371062306a36Sopenharmony_ci struct drbd_resource *resource; 371162306a36Sopenharmony_ci int __maybe_unused i = 0; 371262306a36Sopenharmony_ci 371362306a36Sopenharmony_ci mutex_lock(&resources_mutex); 371462306a36Sopenharmony_ci local_irq_disable(); 371562306a36Sopenharmony_ci for_each_resource(resource, &drbd_resources) 371662306a36Sopenharmony_ci spin_lock_nested(&resource->req_lock, i++); 371762306a36Sopenharmony_ci} 371862306a36Sopenharmony_ci 371962306a36Sopenharmony_civoid unlock_all_resources(void) 372062306a36Sopenharmony_ci{ 372162306a36Sopenharmony_ci struct drbd_resource *resource; 372262306a36Sopenharmony_ci 372362306a36Sopenharmony_ci for_each_resource(resource, &drbd_resources) 372462306a36Sopenharmony_ci spin_unlock(&resource->req_lock); 372562306a36Sopenharmony_ci local_irq_enable(); 372662306a36Sopenharmony_ci mutex_unlock(&resources_mutex); 372762306a36Sopenharmony_ci} 372862306a36Sopenharmony_ci 372962306a36Sopenharmony_ci#ifdef CONFIG_DRBD_FAULT_INJECTION 373062306a36Sopenharmony_ci/* Fault insertion support including random number generator shamelessly 373162306a36Sopenharmony_ci * stolen from kernel/rcutorture.c */ 373262306a36Sopenharmony_cistruct fault_random_state { 373362306a36Sopenharmony_ci unsigned long state; 373462306a36Sopenharmony_ci unsigned long count; 373562306a36Sopenharmony_ci}; 373662306a36Sopenharmony_ci 373762306a36Sopenharmony_ci#define FAULT_RANDOM_MULT 39916801 /* prime */ 373862306a36Sopenharmony_ci#define FAULT_RANDOM_ADD 479001701 /* prime */ 373962306a36Sopenharmony_ci#define FAULT_RANDOM_REFRESH 10000 374062306a36Sopenharmony_ci 374162306a36Sopenharmony_ci/* 374262306a36Sopenharmony_ci * Crude but fast random-number generator. Uses a linear congruential 374362306a36Sopenharmony_ci * generator, with occasional help from get_random_bytes(). 374462306a36Sopenharmony_ci */ 374562306a36Sopenharmony_cistatic unsigned long 374662306a36Sopenharmony_ci_drbd_fault_random(struct fault_random_state *rsp) 374762306a36Sopenharmony_ci{ 374862306a36Sopenharmony_ci long refresh; 374962306a36Sopenharmony_ci 375062306a36Sopenharmony_ci if (!rsp->count--) { 375162306a36Sopenharmony_ci get_random_bytes(&refresh, sizeof(refresh)); 375262306a36Sopenharmony_ci rsp->state += refresh; 375362306a36Sopenharmony_ci rsp->count = FAULT_RANDOM_REFRESH; 375462306a36Sopenharmony_ci } 375562306a36Sopenharmony_ci rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD; 375662306a36Sopenharmony_ci return swahw32(rsp->state); 375762306a36Sopenharmony_ci} 375862306a36Sopenharmony_ci 375962306a36Sopenharmony_cistatic char * 376062306a36Sopenharmony_ci_drbd_fault_str(unsigned int type) { 376162306a36Sopenharmony_ci static char *_faults[] = { 376262306a36Sopenharmony_ci [DRBD_FAULT_MD_WR] = "Meta-data write", 376362306a36Sopenharmony_ci [DRBD_FAULT_MD_RD] = "Meta-data read", 376462306a36Sopenharmony_ci [DRBD_FAULT_RS_WR] = "Resync write", 376562306a36Sopenharmony_ci [DRBD_FAULT_RS_RD] = "Resync read", 376662306a36Sopenharmony_ci [DRBD_FAULT_DT_WR] = "Data write", 376762306a36Sopenharmony_ci [DRBD_FAULT_DT_RD] = "Data read", 376862306a36Sopenharmony_ci [DRBD_FAULT_DT_RA] = "Data read ahead", 376962306a36Sopenharmony_ci [DRBD_FAULT_BM_ALLOC] = "BM allocation", 377062306a36Sopenharmony_ci [DRBD_FAULT_AL_EE] = "EE allocation", 377162306a36Sopenharmony_ci [DRBD_FAULT_RECEIVE] = "receive data corruption", 377262306a36Sopenharmony_ci }; 377362306a36Sopenharmony_ci 377462306a36Sopenharmony_ci return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; 377562306a36Sopenharmony_ci} 377662306a36Sopenharmony_ci 377762306a36Sopenharmony_ciunsigned int 377862306a36Sopenharmony_ci_drbd_insert_fault(struct drbd_device *device, unsigned int type) 377962306a36Sopenharmony_ci{ 378062306a36Sopenharmony_ci static struct fault_random_state rrs = {0, 0}; 378162306a36Sopenharmony_ci 378262306a36Sopenharmony_ci unsigned int ret = ( 378362306a36Sopenharmony_ci (drbd_fault_devs == 0 || 378462306a36Sopenharmony_ci ((1 << device_to_minor(device)) & drbd_fault_devs) != 0) && 378562306a36Sopenharmony_ci (((_drbd_fault_random(&rrs) % 100) + 1) <= drbd_fault_rate)); 378662306a36Sopenharmony_ci 378762306a36Sopenharmony_ci if (ret) { 378862306a36Sopenharmony_ci drbd_fault_count++; 378962306a36Sopenharmony_ci 379062306a36Sopenharmony_ci if (drbd_ratelimit()) 379162306a36Sopenharmony_ci drbd_warn(device, "***Simulating %s failure\n", 379262306a36Sopenharmony_ci _drbd_fault_str(type)); 379362306a36Sopenharmony_ci } 379462306a36Sopenharmony_ci 379562306a36Sopenharmony_ci return ret; 379662306a36Sopenharmony_ci} 379762306a36Sopenharmony_ci#endif 379862306a36Sopenharmony_ci 379962306a36Sopenharmony_cimodule_init(drbd_init) 380062306a36Sopenharmony_cimodule_exit(drbd_cleanup) 380162306a36Sopenharmony_ci 380262306a36Sopenharmony_ciEXPORT_SYMBOL(drbd_conn_str); 380362306a36Sopenharmony_ciEXPORT_SYMBOL(drbd_role_str); 380462306a36Sopenharmony_ciEXPORT_SYMBOL(drbd_disk_str); 380562306a36Sopenharmony_ciEXPORT_SYMBOL(drbd_set_st_err_str); 3806