162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci drbd_receiver.c 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 862306a36Sopenharmony_ci Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 962306a36Sopenharmony_ci Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/module.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <linux/uaccess.h> 1762306a36Sopenharmony_ci#include <net/sock.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include <linux/drbd.h> 2062306a36Sopenharmony_ci#include <linux/fs.h> 2162306a36Sopenharmony_ci#include <linux/file.h> 2262306a36Sopenharmony_ci#include <linux/in.h> 2362306a36Sopenharmony_ci#include <linux/mm.h> 2462306a36Sopenharmony_ci#include <linux/memcontrol.h> 2562306a36Sopenharmony_ci#include <linux/mm_inline.h> 2662306a36Sopenharmony_ci#include <linux/slab.h> 2762306a36Sopenharmony_ci#include <uapi/linux/sched/types.h> 2862306a36Sopenharmony_ci#include <linux/sched/signal.h> 2962306a36Sopenharmony_ci#include <linux/pkt_sched.h> 3062306a36Sopenharmony_ci#include <linux/unistd.h> 3162306a36Sopenharmony_ci#include <linux/vmalloc.h> 3262306a36Sopenharmony_ci#include <linux/random.h> 3362306a36Sopenharmony_ci#include <linux/string.h> 3462306a36Sopenharmony_ci#include <linux/scatterlist.h> 3562306a36Sopenharmony_ci#include <linux/part_stat.h> 3662306a36Sopenharmony_ci#include "drbd_int.h" 3762306a36Sopenharmony_ci#include "drbd_protocol.h" 3862306a36Sopenharmony_ci#include "drbd_req.h" 3962306a36Sopenharmony_ci#include "drbd_vli.h" 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES) 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_cistruct packet_info { 4462306a36Sopenharmony_ci enum drbd_packet cmd; 4562306a36Sopenharmony_ci unsigned int size; 4662306a36Sopenharmony_ci unsigned int vnr; 4762306a36Sopenharmony_ci void *data; 4862306a36Sopenharmony_ci}; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_cienum finish_epoch { 5162306a36Sopenharmony_ci FE_STILL_LIVE, 5262306a36Sopenharmony_ci FE_DESTROYED, 5362306a36Sopenharmony_ci FE_RECYCLED, 5462306a36Sopenharmony_ci}; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistatic int drbd_do_features(struct drbd_connection *connection); 5762306a36Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection); 5862306a36Sopenharmony_cistatic int drbd_disconnected(struct drbd_peer_device *); 5962306a36Sopenharmony_cistatic void conn_wait_active_ee_empty(struct drbd_connection *connection); 6062306a36Sopenharmony_cistatic enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); 6162306a36Sopenharmony_cistatic int e_end_block(struct drbd_work *, int); 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci/* 6762306a36Sopenharmony_ci * some helper functions to deal with single linked page lists, 6862306a36Sopenharmony_ci * page->private being our "next" pointer. 6962306a36Sopenharmony_ci */ 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci/* If at least n pages are linked at head, get n pages off. 7262306a36Sopenharmony_ci * Otherwise, don't modify head, and return NULL. 7362306a36Sopenharmony_ci * Locking is the responsibility of the caller. 7462306a36Sopenharmony_ci */ 7562306a36Sopenharmony_cistatic struct page *page_chain_del(struct page **head, int n) 7662306a36Sopenharmony_ci{ 7762306a36Sopenharmony_ci struct page *page; 7862306a36Sopenharmony_ci struct page *tmp; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci BUG_ON(!n); 8162306a36Sopenharmony_ci BUG_ON(!head); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci page = *head; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci if (!page) 8662306a36Sopenharmony_ci return NULL; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci while (page) { 8962306a36Sopenharmony_ci tmp = page_chain_next(page); 9062306a36Sopenharmony_ci if (--n == 0) 9162306a36Sopenharmony_ci break; /* found sufficient pages */ 9262306a36Sopenharmony_ci if (tmp == NULL) 9362306a36Sopenharmony_ci /* insufficient pages, don't use any of them. */ 9462306a36Sopenharmony_ci return NULL; 9562306a36Sopenharmony_ci page = tmp; 9662306a36Sopenharmony_ci } 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci /* add end of list marker for the returned list */ 9962306a36Sopenharmony_ci set_page_private(page, 0); 10062306a36Sopenharmony_ci /* actual return value, and adjustment of head */ 10162306a36Sopenharmony_ci page = *head; 10262306a36Sopenharmony_ci *head = tmp; 10362306a36Sopenharmony_ci return page; 10462306a36Sopenharmony_ci} 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci/* may be used outside of locks to find the tail of a (usually short) 10762306a36Sopenharmony_ci * "private" page chain, before adding it back to a global chain head 10862306a36Sopenharmony_ci * with page_chain_add() under a spinlock. */ 10962306a36Sopenharmony_cistatic struct page *page_chain_tail(struct page *page, int *len) 11062306a36Sopenharmony_ci{ 11162306a36Sopenharmony_ci struct page *tmp; 11262306a36Sopenharmony_ci int i = 1; 11362306a36Sopenharmony_ci while ((tmp = page_chain_next(page))) { 11462306a36Sopenharmony_ci ++i; 11562306a36Sopenharmony_ci page = tmp; 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci if (len) 11862306a36Sopenharmony_ci *len = i; 11962306a36Sopenharmony_ci return page; 12062306a36Sopenharmony_ci} 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_cistatic int page_chain_free(struct page *page) 12362306a36Sopenharmony_ci{ 12462306a36Sopenharmony_ci struct page *tmp; 12562306a36Sopenharmony_ci int i = 0; 12662306a36Sopenharmony_ci page_chain_for_each_safe(page, tmp) { 12762306a36Sopenharmony_ci put_page(page); 12862306a36Sopenharmony_ci ++i; 12962306a36Sopenharmony_ci } 13062306a36Sopenharmony_ci return i; 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic void page_chain_add(struct page **head, 13462306a36Sopenharmony_ci struct page *chain_first, struct page *chain_last) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci#if 1 13762306a36Sopenharmony_ci struct page *tmp; 13862306a36Sopenharmony_ci tmp = page_chain_tail(chain_first, NULL); 13962306a36Sopenharmony_ci BUG_ON(tmp != chain_last); 14062306a36Sopenharmony_ci#endif 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci /* add chain to head */ 14362306a36Sopenharmony_ci set_page_private(chain_last, (unsigned long)*head); 14462306a36Sopenharmony_ci *head = chain_first; 14562306a36Sopenharmony_ci} 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_cistatic struct page *__drbd_alloc_pages(struct drbd_device *device, 14862306a36Sopenharmony_ci unsigned int number) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci struct page *page = NULL; 15162306a36Sopenharmony_ci struct page *tmp = NULL; 15262306a36Sopenharmony_ci unsigned int i = 0; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci /* Yes, testing drbd_pp_vacant outside the lock is racy. 15562306a36Sopenharmony_ci * So what. It saves a spin_lock. */ 15662306a36Sopenharmony_ci if (drbd_pp_vacant >= number) { 15762306a36Sopenharmony_ci spin_lock(&drbd_pp_lock); 15862306a36Sopenharmony_ci page = page_chain_del(&drbd_pp_pool, number); 15962306a36Sopenharmony_ci if (page) 16062306a36Sopenharmony_ci drbd_pp_vacant -= number; 16162306a36Sopenharmony_ci spin_unlock(&drbd_pp_lock); 16262306a36Sopenharmony_ci if (page) 16362306a36Sopenharmony_ci return page; 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD 16762306a36Sopenharmony_ci * "criss-cross" setup, that might cause write-out on some other DRBD, 16862306a36Sopenharmony_ci * which in turn might block on the other node at this very place. */ 16962306a36Sopenharmony_ci for (i = 0; i < number; i++) { 17062306a36Sopenharmony_ci tmp = alloc_page(GFP_TRY); 17162306a36Sopenharmony_ci if (!tmp) 17262306a36Sopenharmony_ci break; 17362306a36Sopenharmony_ci set_page_private(tmp, (unsigned long)page); 17462306a36Sopenharmony_ci page = tmp; 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci if (i == number) 17862306a36Sopenharmony_ci return page; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci /* Not enough pages immediately available this time. 18162306a36Sopenharmony_ci * No need to jump around here, drbd_alloc_pages will retry this 18262306a36Sopenharmony_ci * function "soon". */ 18362306a36Sopenharmony_ci if (page) { 18462306a36Sopenharmony_ci tmp = page_chain_tail(page, NULL); 18562306a36Sopenharmony_ci spin_lock(&drbd_pp_lock); 18662306a36Sopenharmony_ci page_chain_add(&drbd_pp_pool, page, tmp); 18762306a36Sopenharmony_ci drbd_pp_vacant += i; 18862306a36Sopenharmony_ci spin_unlock(&drbd_pp_lock); 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci return NULL; 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_cistatic void reclaim_finished_net_peer_reqs(struct drbd_device *device, 19462306a36Sopenharmony_ci struct list_head *to_be_freed) 19562306a36Sopenharmony_ci{ 19662306a36Sopenharmony_ci struct drbd_peer_request *peer_req, *tmp; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci /* The EEs are always appended to the end of the list. Since 19962306a36Sopenharmony_ci they are sent in order over the wire, they have to finish 20062306a36Sopenharmony_ci in order. As soon as we see the first not finished we can 20162306a36Sopenharmony_ci stop to examine the list... */ 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { 20462306a36Sopenharmony_ci if (drbd_peer_req_has_active_page(peer_req)) 20562306a36Sopenharmony_ci break; 20662306a36Sopenharmony_ci list_move(&peer_req->w.list, to_be_freed); 20762306a36Sopenharmony_ci } 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic void drbd_reclaim_net_peer_reqs(struct drbd_device *device) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci LIST_HEAD(reclaimed); 21362306a36Sopenharmony_ci struct drbd_peer_request *peer_req, *t; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 21662306a36Sopenharmony_ci reclaim_finished_net_peer_reqs(device, &reclaimed); 21762306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 21862306a36Sopenharmony_ci list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 21962306a36Sopenharmony_ci drbd_free_net_peer_req(device, peer_req); 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_cistatic void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 22562306a36Sopenharmony_ci int vnr; 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci rcu_read_lock(); 22862306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 22962306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 23062306a36Sopenharmony_ci if (!atomic_read(&device->pp_in_use_by_net)) 23162306a36Sopenharmony_ci continue; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci kref_get(&device->kref); 23462306a36Sopenharmony_ci rcu_read_unlock(); 23562306a36Sopenharmony_ci drbd_reclaim_net_peer_reqs(device); 23662306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 23762306a36Sopenharmony_ci rcu_read_lock(); 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci rcu_read_unlock(); 24062306a36Sopenharmony_ci} 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci/** 24362306a36Sopenharmony_ci * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) 24462306a36Sopenharmony_ci * @peer_device: DRBD device. 24562306a36Sopenharmony_ci * @number: number of pages requested 24662306a36Sopenharmony_ci * @retry: whether to retry, if not enough pages are available right now 24762306a36Sopenharmony_ci * 24862306a36Sopenharmony_ci * Tries to allocate number pages, first from our own page pool, then from 24962306a36Sopenharmony_ci * the kernel. 25062306a36Sopenharmony_ci * Possibly retry until DRBD frees sufficient pages somewhere else. 25162306a36Sopenharmony_ci * 25262306a36Sopenharmony_ci * If this allocation would exceed the max_buffers setting, we throttle 25362306a36Sopenharmony_ci * allocation (schedule_timeout) to give the system some room to breathe. 25462306a36Sopenharmony_ci * 25562306a36Sopenharmony_ci * We do not use max-buffers as hard limit, because it could lead to 25662306a36Sopenharmony_ci * congestion and further to a distributed deadlock during online-verify or 25762306a36Sopenharmony_ci * (checksum based) resync, if the max-buffers, socket buffer sizes and 25862306a36Sopenharmony_ci * resync-rate settings are mis-configured. 25962306a36Sopenharmony_ci * 26062306a36Sopenharmony_ci * Returns a page chain linked via page->private. 26162306a36Sopenharmony_ci */ 26262306a36Sopenharmony_cistruct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, 26362306a36Sopenharmony_ci bool retry) 26462306a36Sopenharmony_ci{ 26562306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 26662306a36Sopenharmony_ci struct page *page = NULL; 26762306a36Sopenharmony_ci struct net_conf *nc; 26862306a36Sopenharmony_ci DEFINE_WAIT(wait); 26962306a36Sopenharmony_ci unsigned int mxb; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci rcu_read_lock(); 27262306a36Sopenharmony_ci nc = rcu_dereference(peer_device->connection->net_conf); 27362306a36Sopenharmony_ci mxb = nc ? nc->max_buffers : 1000000; 27462306a36Sopenharmony_ci rcu_read_unlock(); 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci if (atomic_read(&device->pp_in_use) < mxb) 27762306a36Sopenharmony_ci page = __drbd_alloc_pages(device, number); 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci /* Try to keep the fast path fast, but occasionally we need 28062306a36Sopenharmony_ci * to reclaim the pages we lended to the network stack. */ 28162306a36Sopenharmony_ci if (page && atomic_read(&device->pp_in_use_by_net) > 512) 28262306a36Sopenharmony_ci drbd_reclaim_net_peer_reqs(device); 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci while (page == NULL) { 28562306a36Sopenharmony_ci prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci drbd_reclaim_net_peer_reqs(device); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci if (atomic_read(&device->pp_in_use) < mxb) { 29062306a36Sopenharmony_ci page = __drbd_alloc_pages(device, number); 29162306a36Sopenharmony_ci if (page) 29262306a36Sopenharmony_ci break; 29362306a36Sopenharmony_ci } 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci if (!retry) 29662306a36Sopenharmony_ci break; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci if (signal_pending(current)) { 29962306a36Sopenharmony_ci drbd_warn(device, "drbd_alloc_pages interrupted!\n"); 30062306a36Sopenharmony_ci break; 30162306a36Sopenharmony_ci } 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci if (schedule_timeout(HZ/10) == 0) 30462306a36Sopenharmony_ci mxb = UINT_MAX; 30562306a36Sopenharmony_ci } 30662306a36Sopenharmony_ci finish_wait(&drbd_pp_wait, &wait); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (page) 30962306a36Sopenharmony_ci atomic_add(number, &device->pp_in_use); 31062306a36Sopenharmony_ci return page; 31162306a36Sopenharmony_ci} 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages. 31462306a36Sopenharmony_ci * Is also used from inside an other spin_lock_irq(&resource->req_lock); 31562306a36Sopenharmony_ci * Either links the page chain back to the global pool, 31662306a36Sopenharmony_ci * or returns all pages to the system. */ 31762306a36Sopenharmony_cistatic void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; 32062306a36Sopenharmony_ci int i; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci if (page == NULL) 32362306a36Sopenharmony_ci return; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count) 32662306a36Sopenharmony_ci i = page_chain_free(page); 32762306a36Sopenharmony_ci else { 32862306a36Sopenharmony_ci struct page *tmp; 32962306a36Sopenharmony_ci tmp = page_chain_tail(page, &i); 33062306a36Sopenharmony_ci spin_lock(&drbd_pp_lock); 33162306a36Sopenharmony_ci page_chain_add(&drbd_pp_pool, page, tmp); 33262306a36Sopenharmony_ci drbd_pp_vacant += i; 33362306a36Sopenharmony_ci spin_unlock(&drbd_pp_lock); 33462306a36Sopenharmony_ci } 33562306a36Sopenharmony_ci i = atomic_sub_return(i, a); 33662306a36Sopenharmony_ci if (i < 0) 33762306a36Sopenharmony_ci drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", 33862306a36Sopenharmony_ci is_net ? "pp_in_use_by_net" : "pp_in_use", i); 33962306a36Sopenharmony_ci wake_up(&drbd_pp_wait); 34062306a36Sopenharmony_ci} 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci/* 34362306a36Sopenharmony_ciYou need to hold the req_lock: 34462306a36Sopenharmony_ci _drbd_wait_ee_list_empty() 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ciYou must not have the req_lock: 34762306a36Sopenharmony_ci drbd_free_peer_req() 34862306a36Sopenharmony_ci drbd_alloc_peer_req() 34962306a36Sopenharmony_ci drbd_free_peer_reqs() 35062306a36Sopenharmony_ci drbd_ee_fix_bhs() 35162306a36Sopenharmony_ci drbd_finish_peer_reqs() 35262306a36Sopenharmony_ci drbd_clear_done_ee() 35362306a36Sopenharmony_ci drbd_wait_ee_list_empty() 35462306a36Sopenharmony_ci*/ 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci/* normal: payload_size == request size (bi_size) 35762306a36Sopenharmony_ci * w_same: payload_size == logical_block_size 35862306a36Sopenharmony_ci * trim: payload_size == 0 */ 35962306a36Sopenharmony_cistruct drbd_peer_request * 36062306a36Sopenharmony_cidrbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 36162306a36Sopenharmony_ci unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local) 36262306a36Sopenharmony_ci{ 36362306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 36462306a36Sopenharmony_ci struct drbd_peer_request *peer_req; 36562306a36Sopenharmony_ci struct page *page = NULL; 36662306a36Sopenharmony_ci unsigned int nr_pages = PFN_UP(payload_size); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) 36962306a36Sopenharmony_ci return NULL; 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); 37262306a36Sopenharmony_ci if (!peer_req) { 37362306a36Sopenharmony_ci if (!(gfp_mask & __GFP_NOWARN)) 37462306a36Sopenharmony_ci drbd_err(device, "%s: allocation failed\n", __func__); 37562306a36Sopenharmony_ci return NULL; 37662306a36Sopenharmony_ci } 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci if (nr_pages) { 37962306a36Sopenharmony_ci page = drbd_alloc_pages(peer_device, nr_pages, 38062306a36Sopenharmony_ci gfpflags_allow_blocking(gfp_mask)); 38162306a36Sopenharmony_ci if (!page) 38262306a36Sopenharmony_ci goto fail; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci memset(peer_req, 0, sizeof(*peer_req)); 38662306a36Sopenharmony_ci INIT_LIST_HEAD(&peer_req->w.list); 38762306a36Sopenharmony_ci drbd_clear_interval(&peer_req->i); 38862306a36Sopenharmony_ci peer_req->i.size = request_size; 38962306a36Sopenharmony_ci peer_req->i.sector = sector; 39062306a36Sopenharmony_ci peer_req->submit_jif = jiffies; 39162306a36Sopenharmony_ci peer_req->peer_device = peer_device; 39262306a36Sopenharmony_ci peer_req->pages = page; 39362306a36Sopenharmony_ci /* 39462306a36Sopenharmony_ci * The block_id is opaque to the receiver. It is not endianness 39562306a36Sopenharmony_ci * converted, and sent back to the sender unchanged. 39662306a36Sopenharmony_ci */ 39762306a36Sopenharmony_ci peer_req->block_id = id; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci return peer_req; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci fail: 40262306a36Sopenharmony_ci mempool_free(peer_req, &drbd_ee_mempool); 40362306a36Sopenharmony_ci return NULL; 40462306a36Sopenharmony_ci} 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_civoid __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, 40762306a36Sopenharmony_ci int is_net) 40862306a36Sopenharmony_ci{ 40962306a36Sopenharmony_ci might_sleep(); 41062306a36Sopenharmony_ci if (peer_req->flags & EE_HAS_DIGEST) 41162306a36Sopenharmony_ci kfree(peer_req->digest); 41262306a36Sopenharmony_ci drbd_free_pages(device, peer_req->pages, is_net); 41362306a36Sopenharmony_ci D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); 41462306a36Sopenharmony_ci D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 41562306a36Sopenharmony_ci if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { 41662306a36Sopenharmony_ci peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 41762306a36Sopenharmony_ci drbd_al_complete_io(device, &peer_req->i); 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci mempool_free(peer_req, &drbd_ee_mempool); 42062306a36Sopenharmony_ci} 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ciint drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) 42362306a36Sopenharmony_ci{ 42462306a36Sopenharmony_ci LIST_HEAD(work_list); 42562306a36Sopenharmony_ci struct drbd_peer_request *peer_req, *t; 42662306a36Sopenharmony_ci int count = 0; 42762306a36Sopenharmony_ci int is_net = list == &device->net_ee; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 43062306a36Sopenharmony_ci list_splice_init(list, &work_list); 43162306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 43462306a36Sopenharmony_ci __drbd_free_peer_req(device, peer_req, is_net); 43562306a36Sopenharmony_ci count++; 43662306a36Sopenharmony_ci } 43762306a36Sopenharmony_ci return count; 43862306a36Sopenharmony_ci} 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci/* 44162306a36Sopenharmony_ci * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier. 44262306a36Sopenharmony_ci */ 44362306a36Sopenharmony_cistatic int drbd_finish_peer_reqs(struct drbd_device *device) 44462306a36Sopenharmony_ci{ 44562306a36Sopenharmony_ci LIST_HEAD(work_list); 44662306a36Sopenharmony_ci LIST_HEAD(reclaimed); 44762306a36Sopenharmony_ci struct drbd_peer_request *peer_req, *t; 44862306a36Sopenharmony_ci int err = 0; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 45162306a36Sopenharmony_ci reclaim_finished_net_peer_reqs(device, &reclaimed); 45262306a36Sopenharmony_ci list_splice_init(&device->done_ee, &work_list); 45362306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 45662306a36Sopenharmony_ci drbd_free_net_peer_req(device, peer_req); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci /* possible callbacks here: 45962306a36Sopenharmony_ci * e_end_block, and e_end_resync_block, e_send_superseded. 46062306a36Sopenharmony_ci * all ignore the last argument. 46162306a36Sopenharmony_ci */ 46262306a36Sopenharmony_ci list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 46362306a36Sopenharmony_ci int err2; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci /* list_del not necessary, next/prev members not touched */ 46662306a36Sopenharmony_ci err2 = peer_req->w.cb(&peer_req->w, !!err); 46762306a36Sopenharmony_ci if (!err) 46862306a36Sopenharmony_ci err = err2; 46962306a36Sopenharmony_ci drbd_free_peer_req(device, peer_req); 47062306a36Sopenharmony_ci } 47162306a36Sopenharmony_ci wake_up(&device->ee_wait); 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci return err; 47462306a36Sopenharmony_ci} 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_cistatic void _drbd_wait_ee_list_empty(struct drbd_device *device, 47762306a36Sopenharmony_ci struct list_head *head) 47862306a36Sopenharmony_ci{ 47962306a36Sopenharmony_ci DEFINE_WAIT(wait); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci /* avoids spin_lock/unlock 48262306a36Sopenharmony_ci * and calling prepare_to_wait in the fast path */ 48362306a36Sopenharmony_ci while (!list_empty(head)) { 48462306a36Sopenharmony_ci prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE); 48562306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 48662306a36Sopenharmony_ci io_schedule(); 48762306a36Sopenharmony_ci finish_wait(&device->ee_wait, &wait); 48862306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 48962306a36Sopenharmony_ci } 49062306a36Sopenharmony_ci} 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_cistatic void drbd_wait_ee_list_empty(struct drbd_device *device, 49362306a36Sopenharmony_ci struct list_head *head) 49462306a36Sopenharmony_ci{ 49562306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 49662306a36Sopenharmony_ci _drbd_wait_ee_list_empty(device, head); 49762306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 49862306a36Sopenharmony_ci} 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_cistatic int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) 50162306a36Sopenharmony_ci{ 50262306a36Sopenharmony_ci struct kvec iov = { 50362306a36Sopenharmony_ci .iov_base = buf, 50462306a36Sopenharmony_ci .iov_len = size, 50562306a36Sopenharmony_ci }; 50662306a36Sopenharmony_ci struct msghdr msg = { 50762306a36Sopenharmony_ci .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) 50862306a36Sopenharmony_ci }; 50962306a36Sopenharmony_ci iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size); 51062306a36Sopenharmony_ci return sock_recvmsg(sock, &msg, msg.msg_flags); 51162306a36Sopenharmony_ci} 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_cistatic int drbd_recv(struct drbd_connection *connection, void *buf, size_t size) 51462306a36Sopenharmony_ci{ 51562306a36Sopenharmony_ci int rv; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci rv = drbd_recv_short(connection->data.socket, buf, size, 0); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci if (rv < 0) { 52062306a36Sopenharmony_ci if (rv == -ECONNRESET) 52162306a36Sopenharmony_ci drbd_info(connection, "sock was reset by peer\n"); 52262306a36Sopenharmony_ci else if (rv != -ERESTARTSYS) 52362306a36Sopenharmony_ci drbd_err(connection, "sock_recvmsg returned %d\n", rv); 52462306a36Sopenharmony_ci } else if (rv == 0) { 52562306a36Sopenharmony_ci if (test_bit(DISCONNECT_SENT, &connection->flags)) { 52662306a36Sopenharmony_ci long t; 52762306a36Sopenharmony_ci rcu_read_lock(); 52862306a36Sopenharmony_ci t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 52962306a36Sopenharmony_ci rcu_read_unlock(); 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t); 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci if (t) 53462306a36Sopenharmony_ci goto out; 53562306a36Sopenharmony_ci } 53662306a36Sopenharmony_ci drbd_info(connection, "sock was shut down by peer\n"); 53762306a36Sopenharmony_ci } 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci if (rv != size) 54062306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD); 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ciout: 54362306a36Sopenharmony_ci return rv; 54462306a36Sopenharmony_ci} 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_cistatic int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size) 54762306a36Sopenharmony_ci{ 54862306a36Sopenharmony_ci int err; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci err = drbd_recv(connection, buf, size); 55162306a36Sopenharmony_ci if (err != size) { 55262306a36Sopenharmony_ci if (err >= 0) 55362306a36Sopenharmony_ci err = -EIO; 55462306a36Sopenharmony_ci } else 55562306a36Sopenharmony_ci err = 0; 55662306a36Sopenharmony_ci return err; 55762306a36Sopenharmony_ci} 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_cistatic int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size) 56062306a36Sopenharmony_ci{ 56162306a36Sopenharmony_ci int err; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci err = drbd_recv_all(connection, buf, size); 56462306a36Sopenharmony_ci if (err && !signal_pending(current)) 56562306a36Sopenharmony_ci drbd_warn(connection, "short read (expected size %d)\n", (int)size); 56662306a36Sopenharmony_ci return err; 56762306a36Sopenharmony_ci} 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci/* quoting tcp(7): 57062306a36Sopenharmony_ci * On individual connections, the socket buffer size must be set prior to the 57162306a36Sopenharmony_ci * listen(2) or connect(2) calls in order to have it take effect. 57262306a36Sopenharmony_ci * This is our wrapper to do so. 57362306a36Sopenharmony_ci */ 57462306a36Sopenharmony_cistatic void drbd_setbufsize(struct socket *sock, unsigned int snd, 57562306a36Sopenharmony_ci unsigned int rcv) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci /* open coded SO_SNDBUF, SO_RCVBUF */ 57862306a36Sopenharmony_ci if (snd) { 57962306a36Sopenharmony_ci sock->sk->sk_sndbuf = snd; 58062306a36Sopenharmony_ci sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 58162306a36Sopenharmony_ci } 58262306a36Sopenharmony_ci if (rcv) { 58362306a36Sopenharmony_ci sock->sk->sk_rcvbuf = rcv; 58462306a36Sopenharmony_ci sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 58562306a36Sopenharmony_ci } 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_cistatic struct socket *drbd_try_connect(struct drbd_connection *connection) 58962306a36Sopenharmony_ci{ 59062306a36Sopenharmony_ci const char *what; 59162306a36Sopenharmony_ci struct socket *sock; 59262306a36Sopenharmony_ci struct sockaddr_in6 src_in6; 59362306a36Sopenharmony_ci struct sockaddr_in6 peer_in6; 59462306a36Sopenharmony_ci struct net_conf *nc; 59562306a36Sopenharmony_ci int err, peer_addr_len, my_addr_len; 59662306a36Sopenharmony_ci int sndbuf_size, rcvbuf_size, connect_int; 59762306a36Sopenharmony_ci int disconnect_on_error = 1; 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci rcu_read_lock(); 60062306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 60162306a36Sopenharmony_ci if (!nc) { 60262306a36Sopenharmony_ci rcu_read_unlock(); 60362306a36Sopenharmony_ci return NULL; 60462306a36Sopenharmony_ci } 60562306a36Sopenharmony_ci sndbuf_size = nc->sndbuf_size; 60662306a36Sopenharmony_ci rcvbuf_size = nc->rcvbuf_size; 60762306a36Sopenharmony_ci connect_int = nc->connect_int; 60862306a36Sopenharmony_ci rcu_read_unlock(); 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6)); 61162306a36Sopenharmony_ci memcpy(&src_in6, &connection->my_addr, my_addr_len); 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6) 61462306a36Sopenharmony_ci src_in6.sin6_port = 0; 61562306a36Sopenharmony_ci else 61662306a36Sopenharmony_ci ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6)); 61962306a36Sopenharmony_ci memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci what = "sock_create_kern"; 62262306a36Sopenharmony_ci err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family, 62362306a36Sopenharmony_ci SOCK_STREAM, IPPROTO_TCP, &sock); 62462306a36Sopenharmony_ci if (err < 0) { 62562306a36Sopenharmony_ci sock = NULL; 62662306a36Sopenharmony_ci goto out; 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci sock->sk->sk_rcvtimeo = 63062306a36Sopenharmony_ci sock->sk->sk_sndtimeo = connect_int * HZ; 63162306a36Sopenharmony_ci drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci /* explicitly bind to the configured IP as source IP 63462306a36Sopenharmony_ci * for the outgoing connections. 63562306a36Sopenharmony_ci * This is needed for multihomed hosts and to be 63662306a36Sopenharmony_ci * able to use lo: interfaces for drbd. 63762306a36Sopenharmony_ci * Make sure to use 0 as port number, so linux selects 63862306a36Sopenharmony_ci * a free one dynamically. 63962306a36Sopenharmony_ci */ 64062306a36Sopenharmony_ci what = "bind before connect"; 64162306a36Sopenharmony_ci err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len); 64262306a36Sopenharmony_ci if (err < 0) 64362306a36Sopenharmony_ci goto out; 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci /* connect may fail, peer not yet available. 64662306a36Sopenharmony_ci * stay C_WF_CONNECTION, don't go Disconnecting! */ 64762306a36Sopenharmony_ci disconnect_on_error = 0; 64862306a36Sopenharmony_ci what = "connect"; 64962306a36Sopenharmony_ci err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0); 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ciout: 65262306a36Sopenharmony_ci if (err < 0) { 65362306a36Sopenharmony_ci if (sock) { 65462306a36Sopenharmony_ci sock_release(sock); 65562306a36Sopenharmony_ci sock = NULL; 65662306a36Sopenharmony_ci } 65762306a36Sopenharmony_ci switch (-err) { 65862306a36Sopenharmony_ci /* timeout, busy, signal pending */ 65962306a36Sopenharmony_ci case ETIMEDOUT: case EAGAIN: case EINPROGRESS: 66062306a36Sopenharmony_ci case EINTR: case ERESTARTSYS: 66162306a36Sopenharmony_ci /* peer not (yet) available, network problem */ 66262306a36Sopenharmony_ci case ECONNREFUSED: case ENETUNREACH: 66362306a36Sopenharmony_ci case EHOSTDOWN: case EHOSTUNREACH: 66462306a36Sopenharmony_ci disconnect_on_error = 0; 66562306a36Sopenharmony_ci break; 66662306a36Sopenharmony_ci default: 66762306a36Sopenharmony_ci drbd_err(connection, "%s failed, err = %d\n", what, err); 66862306a36Sopenharmony_ci } 66962306a36Sopenharmony_ci if (disconnect_on_error) 67062306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 67162306a36Sopenharmony_ci } 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci return sock; 67462306a36Sopenharmony_ci} 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_cistruct accept_wait_data { 67762306a36Sopenharmony_ci struct drbd_connection *connection; 67862306a36Sopenharmony_ci struct socket *s_listen; 67962306a36Sopenharmony_ci struct completion door_bell; 68062306a36Sopenharmony_ci void (*original_sk_state_change)(struct sock *sk); 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci}; 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_cistatic void drbd_incoming_connection(struct sock *sk) 68562306a36Sopenharmony_ci{ 68662306a36Sopenharmony_ci struct accept_wait_data *ad = sk->sk_user_data; 68762306a36Sopenharmony_ci void (*state_change)(struct sock *sk); 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci state_change = ad->original_sk_state_change; 69062306a36Sopenharmony_ci if (sk->sk_state == TCP_ESTABLISHED) 69162306a36Sopenharmony_ci complete(&ad->door_bell); 69262306a36Sopenharmony_ci state_change(sk); 69362306a36Sopenharmony_ci} 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_cistatic int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad) 69662306a36Sopenharmony_ci{ 69762306a36Sopenharmony_ci int err, sndbuf_size, rcvbuf_size, my_addr_len; 69862306a36Sopenharmony_ci struct sockaddr_in6 my_addr; 69962306a36Sopenharmony_ci struct socket *s_listen; 70062306a36Sopenharmony_ci struct net_conf *nc; 70162306a36Sopenharmony_ci const char *what; 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci rcu_read_lock(); 70462306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 70562306a36Sopenharmony_ci if (!nc) { 70662306a36Sopenharmony_ci rcu_read_unlock(); 70762306a36Sopenharmony_ci return -EIO; 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci sndbuf_size = nc->sndbuf_size; 71062306a36Sopenharmony_ci rcvbuf_size = nc->rcvbuf_size; 71162306a36Sopenharmony_ci rcu_read_unlock(); 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6)); 71462306a36Sopenharmony_ci memcpy(&my_addr, &connection->my_addr, my_addr_len); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci what = "sock_create_kern"; 71762306a36Sopenharmony_ci err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family, 71862306a36Sopenharmony_ci SOCK_STREAM, IPPROTO_TCP, &s_listen); 71962306a36Sopenharmony_ci if (err) { 72062306a36Sopenharmony_ci s_listen = NULL; 72162306a36Sopenharmony_ci goto out; 72262306a36Sopenharmony_ci } 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 72562306a36Sopenharmony_ci drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size); 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci what = "bind before listen"; 72862306a36Sopenharmony_ci err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len); 72962306a36Sopenharmony_ci if (err < 0) 73062306a36Sopenharmony_ci goto out; 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci ad->s_listen = s_listen; 73362306a36Sopenharmony_ci write_lock_bh(&s_listen->sk->sk_callback_lock); 73462306a36Sopenharmony_ci ad->original_sk_state_change = s_listen->sk->sk_state_change; 73562306a36Sopenharmony_ci s_listen->sk->sk_state_change = drbd_incoming_connection; 73662306a36Sopenharmony_ci s_listen->sk->sk_user_data = ad; 73762306a36Sopenharmony_ci write_unlock_bh(&s_listen->sk->sk_callback_lock); 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci what = "listen"; 74062306a36Sopenharmony_ci err = s_listen->ops->listen(s_listen, 5); 74162306a36Sopenharmony_ci if (err < 0) 74262306a36Sopenharmony_ci goto out; 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci return 0; 74562306a36Sopenharmony_ciout: 74662306a36Sopenharmony_ci if (s_listen) 74762306a36Sopenharmony_ci sock_release(s_listen); 74862306a36Sopenharmony_ci if (err < 0) { 74962306a36Sopenharmony_ci if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 75062306a36Sopenharmony_ci drbd_err(connection, "%s failed, err = %d\n", what, err); 75162306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 75262306a36Sopenharmony_ci } 75362306a36Sopenharmony_ci } 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci return -EIO; 75662306a36Sopenharmony_ci} 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_cistatic void unregister_state_change(struct sock *sk, struct accept_wait_data *ad) 75962306a36Sopenharmony_ci{ 76062306a36Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 76162306a36Sopenharmony_ci sk->sk_state_change = ad->original_sk_state_change; 76262306a36Sopenharmony_ci sk->sk_user_data = NULL; 76362306a36Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 76462306a36Sopenharmony_ci} 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_cistatic struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad) 76762306a36Sopenharmony_ci{ 76862306a36Sopenharmony_ci int timeo, connect_int, err = 0; 76962306a36Sopenharmony_ci struct socket *s_estab = NULL; 77062306a36Sopenharmony_ci struct net_conf *nc; 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci rcu_read_lock(); 77362306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 77462306a36Sopenharmony_ci if (!nc) { 77562306a36Sopenharmony_ci rcu_read_unlock(); 77662306a36Sopenharmony_ci return NULL; 77762306a36Sopenharmony_ci } 77862306a36Sopenharmony_ci connect_int = nc->connect_int; 77962306a36Sopenharmony_ci rcu_read_unlock(); 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci timeo = connect_int * HZ; 78262306a36Sopenharmony_ci /* 28.5% random jitter */ 78362306a36Sopenharmony_ci timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7; 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); 78662306a36Sopenharmony_ci if (err <= 0) 78762306a36Sopenharmony_ci return NULL; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci err = kernel_accept(ad->s_listen, &s_estab, 0); 79062306a36Sopenharmony_ci if (err < 0) { 79162306a36Sopenharmony_ci if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 79262306a36Sopenharmony_ci drbd_err(connection, "accept failed, err = %d\n", err); 79362306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 79462306a36Sopenharmony_ci } 79562306a36Sopenharmony_ci } 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci if (s_estab) 79862306a36Sopenharmony_ci unregister_state_change(s_estab->sk, ad); 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci return s_estab; 80162306a36Sopenharmony_ci} 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_cistatic int decode_header(struct drbd_connection *, void *, struct packet_info *); 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_cistatic int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock, 80662306a36Sopenharmony_ci enum drbd_packet cmd) 80762306a36Sopenharmony_ci{ 80862306a36Sopenharmony_ci if (!conn_prepare_command(connection, sock)) 80962306a36Sopenharmony_ci return -EIO; 81062306a36Sopenharmony_ci return conn_send_command(connection, sock, cmd, 0, NULL, 0); 81162306a36Sopenharmony_ci} 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_cistatic int receive_first_packet(struct drbd_connection *connection, struct socket *sock) 81462306a36Sopenharmony_ci{ 81562306a36Sopenharmony_ci unsigned int header_size = drbd_header_size(connection); 81662306a36Sopenharmony_ci struct packet_info pi; 81762306a36Sopenharmony_ci struct net_conf *nc; 81862306a36Sopenharmony_ci int err; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci rcu_read_lock(); 82162306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 82262306a36Sopenharmony_ci if (!nc) { 82362306a36Sopenharmony_ci rcu_read_unlock(); 82462306a36Sopenharmony_ci return -EIO; 82562306a36Sopenharmony_ci } 82662306a36Sopenharmony_ci sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10; 82762306a36Sopenharmony_ci rcu_read_unlock(); 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); 83062306a36Sopenharmony_ci if (err != header_size) { 83162306a36Sopenharmony_ci if (err >= 0) 83262306a36Sopenharmony_ci err = -EIO; 83362306a36Sopenharmony_ci return err; 83462306a36Sopenharmony_ci } 83562306a36Sopenharmony_ci err = decode_header(connection, connection->data.rbuf, &pi); 83662306a36Sopenharmony_ci if (err) 83762306a36Sopenharmony_ci return err; 83862306a36Sopenharmony_ci return pi.cmd; 83962306a36Sopenharmony_ci} 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci/** 84262306a36Sopenharmony_ci * drbd_socket_okay() - Free the socket if its connection is not okay 84362306a36Sopenharmony_ci * @sock: pointer to the pointer to the socket. 84462306a36Sopenharmony_ci */ 84562306a36Sopenharmony_cistatic bool drbd_socket_okay(struct socket **sock) 84662306a36Sopenharmony_ci{ 84762306a36Sopenharmony_ci int rr; 84862306a36Sopenharmony_ci char tb[4]; 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci if (!*sock) 85162306a36Sopenharmony_ci return false; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci if (rr > 0 || rr == -EAGAIN) { 85662306a36Sopenharmony_ci return true; 85762306a36Sopenharmony_ci } else { 85862306a36Sopenharmony_ci sock_release(*sock); 85962306a36Sopenharmony_ci *sock = NULL; 86062306a36Sopenharmony_ci return false; 86162306a36Sopenharmony_ci } 86262306a36Sopenharmony_ci} 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_cistatic bool connection_established(struct drbd_connection *connection, 86562306a36Sopenharmony_ci struct socket **sock1, 86662306a36Sopenharmony_ci struct socket **sock2) 86762306a36Sopenharmony_ci{ 86862306a36Sopenharmony_ci struct net_conf *nc; 86962306a36Sopenharmony_ci int timeout; 87062306a36Sopenharmony_ci bool ok; 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci if (!*sock1 || !*sock2) 87362306a36Sopenharmony_ci return false; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci rcu_read_lock(); 87662306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 87762306a36Sopenharmony_ci timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; 87862306a36Sopenharmony_ci rcu_read_unlock(); 87962306a36Sopenharmony_ci schedule_timeout_interruptible(timeout); 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci ok = drbd_socket_okay(sock1); 88262306a36Sopenharmony_ci ok = drbd_socket_okay(sock2) && ok; 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci return ok; 88562306a36Sopenharmony_ci} 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci/* Gets called if a connection is established, or if a new minor gets created 88862306a36Sopenharmony_ci in a connection */ 88962306a36Sopenharmony_ciint drbd_connected(struct drbd_peer_device *peer_device) 89062306a36Sopenharmony_ci{ 89162306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 89262306a36Sopenharmony_ci int err; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci atomic_set(&device->packet_seq, 0); 89562306a36Sopenharmony_ci device->peer_seq = 0; 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci device->state_mutex = peer_device->connection->agreed_pro_version < 100 ? 89862306a36Sopenharmony_ci &peer_device->connection->cstate_mutex : 89962306a36Sopenharmony_ci &device->own_state_mutex; 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci err = drbd_send_sync_param(peer_device); 90262306a36Sopenharmony_ci if (!err) 90362306a36Sopenharmony_ci err = drbd_send_sizes(peer_device, 0, 0); 90462306a36Sopenharmony_ci if (!err) 90562306a36Sopenharmony_ci err = drbd_send_uuids(peer_device); 90662306a36Sopenharmony_ci if (!err) 90762306a36Sopenharmony_ci err = drbd_send_current_state(peer_device); 90862306a36Sopenharmony_ci clear_bit(USE_DEGR_WFC_T, &device->flags); 90962306a36Sopenharmony_ci clear_bit(RESIZE_PENDING, &device->flags); 91062306a36Sopenharmony_ci atomic_set(&device->ap_in_flight, 0); 91162306a36Sopenharmony_ci mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */ 91262306a36Sopenharmony_ci return err; 91362306a36Sopenharmony_ci} 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci/* 91662306a36Sopenharmony_ci * return values: 91762306a36Sopenharmony_ci * 1 yes, we have a valid connection 91862306a36Sopenharmony_ci * 0 oops, did not work out, please try again 91962306a36Sopenharmony_ci * -1 peer talks different language, 92062306a36Sopenharmony_ci * no point in trying again, please go standalone. 92162306a36Sopenharmony_ci * -2 We do not have a network config... 92262306a36Sopenharmony_ci */ 92362306a36Sopenharmony_cistatic int conn_connect(struct drbd_connection *connection) 92462306a36Sopenharmony_ci{ 92562306a36Sopenharmony_ci struct drbd_socket sock, msock; 92662306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 92762306a36Sopenharmony_ci struct net_conf *nc; 92862306a36Sopenharmony_ci int vnr, timeout, h; 92962306a36Sopenharmony_ci bool discard_my_data, ok; 93062306a36Sopenharmony_ci enum drbd_state_rv rv; 93162306a36Sopenharmony_ci struct accept_wait_data ad = { 93262306a36Sopenharmony_ci .connection = connection, 93362306a36Sopenharmony_ci .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), 93462306a36Sopenharmony_ci }; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci clear_bit(DISCONNECT_SENT, &connection->flags); 93762306a36Sopenharmony_ci if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) 93862306a36Sopenharmony_ci return -2; 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci mutex_init(&sock.mutex); 94162306a36Sopenharmony_ci sock.sbuf = connection->data.sbuf; 94262306a36Sopenharmony_ci sock.rbuf = connection->data.rbuf; 94362306a36Sopenharmony_ci sock.socket = NULL; 94462306a36Sopenharmony_ci mutex_init(&msock.mutex); 94562306a36Sopenharmony_ci msock.sbuf = connection->meta.sbuf; 94662306a36Sopenharmony_ci msock.rbuf = connection->meta.rbuf; 94762306a36Sopenharmony_ci msock.socket = NULL; 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci /* Assume that the peer only understands protocol 80 until we know better. */ 95062306a36Sopenharmony_ci connection->agreed_pro_version = 80; 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci if (prepare_listen_socket(connection, &ad)) 95362306a36Sopenharmony_ci return 0; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci do { 95662306a36Sopenharmony_ci struct socket *s; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci s = drbd_try_connect(connection); 95962306a36Sopenharmony_ci if (s) { 96062306a36Sopenharmony_ci if (!sock.socket) { 96162306a36Sopenharmony_ci sock.socket = s; 96262306a36Sopenharmony_ci send_first_packet(connection, &sock, P_INITIAL_DATA); 96362306a36Sopenharmony_ci } else if (!msock.socket) { 96462306a36Sopenharmony_ci clear_bit(RESOLVE_CONFLICTS, &connection->flags); 96562306a36Sopenharmony_ci msock.socket = s; 96662306a36Sopenharmony_ci send_first_packet(connection, &msock, P_INITIAL_META); 96762306a36Sopenharmony_ci } else { 96862306a36Sopenharmony_ci drbd_err(connection, "Logic error in conn_connect()\n"); 96962306a36Sopenharmony_ci goto out_release_sockets; 97062306a36Sopenharmony_ci } 97162306a36Sopenharmony_ci } 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci if (connection_established(connection, &sock.socket, &msock.socket)) 97462306a36Sopenharmony_ci break; 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ciretry: 97762306a36Sopenharmony_ci s = drbd_wait_for_connect(connection, &ad); 97862306a36Sopenharmony_ci if (s) { 97962306a36Sopenharmony_ci int fp = receive_first_packet(connection, s); 98062306a36Sopenharmony_ci drbd_socket_okay(&sock.socket); 98162306a36Sopenharmony_ci drbd_socket_okay(&msock.socket); 98262306a36Sopenharmony_ci switch (fp) { 98362306a36Sopenharmony_ci case P_INITIAL_DATA: 98462306a36Sopenharmony_ci if (sock.socket) { 98562306a36Sopenharmony_ci drbd_warn(connection, "initial packet S crossed\n"); 98662306a36Sopenharmony_ci sock_release(sock.socket); 98762306a36Sopenharmony_ci sock.socket = s; 98862306a36Sopenharmony_ci goto randomize; 98962306a36Sopenharmony_ci } 99062306a36Sopenharmony_ci sock.socket = s; 99162306a36Sopenharmony_ci break; 99262306a36Sopenharmony_ci case P_INITIAL_META: 99362306a36Sopenharmony_ci set_bit(RESOLVE_CONFLICTS, &connection->flags); 99462306a36Sopenharmony_ci if (msock.socket) { 99562306a36Sopenharmony_ci drbd_warn(connection, "initial packet M crossed\n"); 99662306a36Sopenharmony_ci sock_release(msock.socket); 99762306a36Sopenharmony_ci msock.socket = s; 99862306a36Sopenharmony_ci goto randomize; 99962306a36Sopenharmony_ci } 100062306a36Sopenharmony_ci msock.socket = s; 100162306a36Sopenharmony_ci break; 100262306a36Sopenharmony_ci default: 100362306a36Sopenharmony_ci drbd_warn(connection, "Error receiving initial packet\n"); 100462306a36Sopenharmony_ci sock_release(s); 100562306a36Sopenharmony_cirandomize: 100662306a36Sopenharmony_ci if (get_random_u32_below(2)) 100762306a36Sopenharmony_ci goto retry; 100862306a36Sopenharmony_ci } 100962306a36Sopenharmony_ci } 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci if (connection->cstate <= C_DISCONNECTING) 101262306a36Sopenharmony_ci goto out_release_sockets; 101362306a36Sopenharmony_ci if (signal_pending(current)) { 101462306a36Sopenharmony_ci flush_signals(current); 101562306a36Sopenharmony_ci smp_rmb(); 101662306a36Sopenharmony_ci if (get_t_state(&connection->receiver) == EXITING) 101762306a36Sopenharmony_ci goto out_release_sockets; 101862306a36Sopenharmony_ci } 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci ok = connection_established(connection, &sock.socket, &msock.socket); 102162306a36Sopenharmony_ci } while (!ok); 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci if (ad.s_listen) 102462306a36Sopenharmony_ci sock_release(ad.s_listen); 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 102762306a36Sopenharmony_ci msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci sock.socket->sk->sk_allocation = GFP_NOIO; 103062306a36Sopenharmony_ci msock.socket->sk->sk_allocation = GFP_NOIO; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci sock.socket->sk->sk_use_task_frag = false; 103362306a36Sopenharmony_ci msock.socket->sk->sk_use_task_frag = false; 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 103662306a36Sopenharmony_ci msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci /* NOT YET ... 103962306a36Sopenharmony_ci * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10; 104062306a36Sopenharmony_ci * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 104162306a36Sopenharmony_ci * first set it to the P_CONNECTION_FEATURES timeout, 104262306a36Sopenharmony_ci * which we set to 4x the configured ping_timeout. */ 104362306a36Sopenharmony_ci rcu_read_lock(); 104462306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci sock.socket->sk->sk_sndtimeo = 104762306a36Sopenharmony_ci sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ; 105062306a36Sopenharmony_ci timeout = nc->timeout * HZ / 10; 105162306a36Sopenharmony_ci discard_my_data = nc->discard_my_data; 105262306a36Sopenharmony_ci rcu_read_unlock(); 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci msock.socket->sk->sk_sndtimeo = timeout; 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci /* we don't want delays. 105762306a36Sopenharmony_ci * we use TCP_CORK where appropriate, though */ 105862306a36Sopenharmony_ci tcp_sock_set_nodelay(sock.socket->sk); 105962306a36Sopenharmony_ci tcp_sock_set_nodelay(msock.socket->sk); 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci connection->data.socket = sock.socket; 106262306a36Sopenharmony_ci connection->meta.socket = msock.socket; 106362306a36Sopenharmony_ci connection->last_received = jiffies; 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci h = drbd_do_features(connection); 106662306a36Sopenharmony_ci if (h <= 0) 106762306a36Sopenharmony_ci return h; 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ci if (connection->cram_hmac_tfm) { 107062306a36Sopenharmony_ci /* drbd_request_state(device, NS(conn, WFAuth)); */ 107162306a36Sopenharmony_ci switch (drbd_do_auth(connection)) { 107262306a36Sopenharmony_ci case -1: 107362306a36Sopenharmony_ci drbd_err(connection, "Authentication of peer failed\n"); 107462306a36Sopenharmony_ci return -1; 107562306a36Sopenharmony_ci case 0: 107662306a36Sopenharmony_ci drbd_err(connection, "Authentication of peer failed, trying again.\n"); 107762306a36Sopenharmony_ci return 0; 107862306a36Sopenharmony_ci } 107962306a36Sopenharmony_ci } 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci connection->data.socket->sk->sk_sndtimeo = timeout; 108262306a36Sopenharmony_ci connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci if (drbd_send_protocol(connection) == -EOPNOTSUPP) 108562306a36Sopenharmony_ci return -1; 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci /* Prevent a race between resync-handshake and 108862306a36Sopenharmony_ci * being promoted to Primary. 108962306a36Sopenharmony_ci * 109062306a36Sopenharmony_ci * Grab and release the state mutex, so we know that any current 109162306a36Sopenharmony_ci * drbd_set_role() is finished, and any incoming drbd_set_role 109262306a36Sopenharmony_ci * will see the STATE_SENT flag, and wait for it to be cleared. 109362306a36Sopenharmony_ci */ 109462306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 109562306a36Sopenharmony_ci mutex_lock(peer_device->device->state_mutex); 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci /* avoid a race with conn_request_state( C_DISCONNECTING ) */ 109862306a36Sopenharmony_ci spin_lock_irq(&connection->resource->req_lock); 109962306a36Sopenharmony_ci set_bit(STATE_SENT, &connection->flags); 110062306a36Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 110362306a36Sopenharmony_ci mutex_unlock(peer_device->device->state_mutex); 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci rcu_read_lock(); 110662306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 110762306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 110862306a36Sopenharmony_ci kref_get(&device->kref); 110962306a36Sopenharmony_ci rcu_read_unlock(); 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci if (discard_my_data) 111262306a36Sopenharmony_ci set_bit(DISCARD_MY_DATA, &device->flags); 111362306a36Sopenharmony_ci else 111462306a36Sopenharmony_ci clear_bit(DISCARD_MY_DATA, &device->flags); 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_ci drbd_connected(peer_device); 111762306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 111862306a36Sopenharmony_ci rcu_read_lock(); 111962306a36Sopenharmony_ci } 112062306a36Sopenharmony_ci rcu_read_unlock(); 112162306a36Sopenharmony_ci 112262306a36Sopenharmony_ci rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); 112362306a36Sopenharmony_ci if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) { 112462306a36Sopenharmony_ci clear_bit(STATE_SENT, &connection->flags); 112562306a36Sopenharmony_ci return 0; 112662306a36Sopenharmony_ci } 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci drbd_thread_start(&connection->ack_receiver); 112962306a36Sopenharmony_ci /* opencoded create_singlethread_workqueue(), 113062306a36Sopenharmony_ci * to be able to use format string arguments */ 113162306a36Sopenharmony_ci connection->ack_sender = 113262306a36Sopenharmony_ci alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name); 113362306a36Sopenharmony_ci if (!connection->ack_sender) { 113462306a36Sopenharmony_ci drbd_err(connection, "Failed to create workqueue ack_sender\n"); 113562306a36Sopenharmony_ci return 0; 113662306a36Sopenharmony_ci } 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci mutex_lock(&connection->resource->conf_update); 113962306a36Sopenharmony_ci /* The discard_my_data flag is a single-shot modifier to the next 114062306a36Sopenharmony_ci * connection attempt, the handshake of which is now well underway. 114162306a36Sopenharmony_ci * No need for rcu style copying of the whole struct 114262306a36Sopenharmony_ci * just to clear a single value. */ 114362306a36Sopenharmony_ci connection->net_conf->discard_my_data = 0; 114462306a36Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci return h; 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ciout_release_sockets: 114962306a36Sopenharmony_ci if (ad.s_listen) 115062306a36Sopenharmony_ci sock_release(ad.s_listen); 115162306a36Sopenharmony_ci if (sock.socket) 115262306a36Sopenharmony_ci sock_release(sock.socket); 115362306a36Sopenharmony_ci if (msock.socket) 115462306a36Sopenharmony_ci sock_release(msock.socket); 115562306a36Sopenharmony_ci return -1; 115662306a36Sopenharmony_ci} 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_cistatic int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi) 115962306a36Sopenharmony_ci{ 116062306a36Sopenharmony_ci unsigned int header_size = drbd_header_size(connection); 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci if (header_size == sizeof(struct p_header100) && 116362306a36Sopenharmony_ci *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { 116462306a36Sopenharmony_ci struct p_header100 *h = header; 116562306a36Sopenharmony_ci if (h->pad != 0) { 116662306a36Sopenharmony_ci drbd_err(connection, "Header padding is not zero\n"); 116762306a36Sopenharmony_ci return -EINVAL; 116862306a36Sopenharmony_ci } 116962306a36Sopenharmony_ci pi->vnr = be16_to_cpu(h->volume); 117062306a36Sopenharmony_ci pi->cmd = be16_to_cpu(h->command); 117162306a36Sopenharmony_ci pi->size = be32_to_cpu(h->length); 117262306a36Sopenharmony_ci } else if (header_size == sizeof(struct p_header95) && 117362306a36Sopenharmony_ci *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { 117462306a36Sopenharmony_ci struct p_header95 *h = header; 117562306a36Sopenharmony_ci pi->cmd = be16_to_cpu(h->command); 117662306a36Sopenharmony_ci pi->size = be32_to_cpu(h->length); 117762306a36Sopenharmony_ci pi->vnr = 0; 117862306a36Sopenharmony_ci } else if (header_size == sizeof(struct p_header80) && 117962306a36Sopenharmony_ci *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) { 118062306a36Sopenharmony_ci struct p_header80 *h = header; 118162306a36Sopenharmony_ci pi->cmd = be16_to_cpu(h->command); 118262306a36Sopenharmony_ci pi->size = be16_to_cpu(h->length); 118362306a36Sopenharmony_ci pi->vnr = 0; 118462306a36Sopenharmony_ci } else { 118562306a36Sopenharmony_ci drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n", 118662306a36Sopenharmony_ci be32_to_cpu(*(__be32 *)header), 118762306a36Sopenharmony_ci connection->agreed_pro_version); 118862306a36Sopenharmony_ci return -EINVAL; 118962306a36Sopenharmony_ci } 119062306a36Sopenharmony_ci pi->data = header + header_size; 119162306a36Sopenharmony_ci return 0; 119262306a36Sopenharmony_ci} 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_cistatic void drbd_unplug_all_devices(struct drbd_connection *connection) 119562306a36Sopenharmony_ci{ 119662306a36Sopenharmony_ci if (current->plug == &connection->receiver_plug) { 119762306a36Sopenharmony_ci blk_finish_plug(&connection->receiver_plug); 119862306a36Sopenharmony_ci blk_start_plug(&connection->receiver_plug); 119962306a36Sopenharmony_ci } /* else: maybe just schedule() ?? */ 120062306a36Sopenharmony_ci} 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_cistatic int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi) 120362306a36Sopenharmony_ci{ 120462306a36Sopenharmony_ci void *buffer = connection->data.rbuf; 120562306a36Sopenharmony_ci int err; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection)); 120862306a36Sopenharmony_ci if (err) 120962306a36Sopenharmony_ci return err; 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_ci err = decode_header(connection, buffer, pi); 121262306a36Sopenharmony_ci connection->last_received = jiffies; 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci return err; 121562306a36Sopenharmony_ci} 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_cistatic int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi) 121862306a36Sopenharmony_ci{ 121962306a36Sopenharmony_ci void *buffer = connection->data.rbuf; 122062306a36Sopenharmony_ci unsigned int size = drbd_header_size(connection); 122162306a36Sopenharmony_ci int err; 122262306a36Sopenharmony_ci 122362306a36Sopenharmony_ci err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT); 122462306a36Sopenharmony_ci if (err != size) { 122562306a36Sopenharmony_ci /* If we have nothing in the receive buffer now, to reduce 122662306a36Sopenharmony_ci * application latency, try to drain the backend queues as 122762306a36Sopenharmony_ci * quickly as possible, and let remote TCP know what we have 122862306a36Sopenharmony_ci * received so far. */ 122962306a36Sopenharmony_ci if (err == -EAGAIN) { 123062306a36Sopenharmony_ci tcp_sock_set_quickack(connection->data.socket->sk, 2); 123162306a36Sopenharmony_ci drbd_unplug_all_devices(connection); 123262306a36Sopenharmony_ci } 123362306a36Sopenharmony_ci if (err > 0) { 123462306a36Sopenharmony_ci buffer += err; 123562306a36Sopenharmony_ci size -= err; 123662306a36Sopenharmony_ci } 123762306a36Sopenharmony_ci err = drbd_recv_all_warn(connection, buffer, size); 123862306a36Sopenharmony_ci if (err) 123962306a36Sopenharmony_ci return err; 124062306a36Sopenharmony_ci } 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci err = decode_header(connection, connection->data.rbuf, pi); 124362306a36Sopenharmony_ci connection->last_received = jiffies; 124462306a36Sopenharmony_ci 124562306a36Sopenharmony_ci return err; 124662306a36Sopenharmony_ci} 124762306a36Sopenharmony_ci/* This is blkdev_issue_flush, but asynchronous. 124862306a36Sopenharmony_ci * We want to submit to all component volumes in parallel, 124962306a36Sopenharmony_ci * then wait for all completions. 125062306a36Sopenharmony_ci */ 125162306a36Sopenharmony_cistruct issue_flush_context { 125262306a36Sopenharmony_ci atomic_t pending; 125362306a36Sopenharmony_ci int error; 125462306a36Sopenharmony_ci struct completion done; 125562306a36Sopenharmony_ci}; 125662306a36Sopenharmony_cistruct one_flush_context { 125762306a36Sopenharmony_ci struct drbd_device *device; 125862306a36Sopenharmony_ci struct issue_flush_context *ctx; 125962306a36Sopenharmony_ci}; 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_cistatic void one_flush_endio(struct bio *bio) 126262306a36Sopenharmony_ci{ 126362306a36Sopenharmony_ci struct one_flush_context *octx = bio->bi_private; 126462306a36Sopenharmony_ci struct drbd_device *device = octx->device; 126562306a36Sopenharmony_ci struct issue_flush_context *ctx = octx->ctx; 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci if (bio->bi_status) { 126862306a36Sopenharmony_ci ctx->error = blk_status_to_errno(bio->bi_status); 126962306a36Sopenharmony_ci drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status); 127062306a36Sopenharmony_ci } 127162306a36Sopenharmony_ci kfree(octx); 127262306a36Sopenharmony_ci bio_put(bio); 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci clear_bit(FLUSH_PENDING, &device->flags); 127562306a36Sopenharmony_ci put_ldev(device); 127662306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci if (atomic_dec_and_test(&ctx->pending)) 127962306a36Sopenharmony_ci complete(&ctx->done); 128062306a36Sopenharmony_ci} 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_cistatic void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx) 128362306a36Sopenharmony_ci{ 128462306a36Sopenharmony_ci struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0, 128562306a36Sopenharmony_ci REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO); 128662306a36Sopenharmony_ci struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO); 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci if (!octx) { 128962306a36Sopenharmony_ci drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n"); 129062306a36Sopenharmony_ci /* FIXME: what else can I do now? disconnecting or detaching 129162306a36Sopenharmony_ci * really does not help to improve the state of the world, either. 129262306a36Sopenharmony_ci */ 129362306a36Sopenharmony_ci bio_put(bio); 129462306a36Sopenharmony_ci 129562306a36Sopenharmony_ci ctx->error = -ENOMEM; 129662306a36Sopenharmony_ci put_ldev(device); 129762306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 129862306a36Sopenharmony_ci return; 129962306a36Sopenharmony_ci } 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci octx->device = device; 130262306a36Sopenharmony_ci octx->ctx = ctx; 130362306a36Sopenharmony_ci bio->bi_private = octx; 130462306a36Sopenharmony_ci bio->bi_end_io = one_flush_endio; 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci device->flush_jif = jiffies; 130762306a36Sopenharmony_ci set_bit(FLUSH_PENDING, &device->flags); 130862306a36Sopenharmony_ci atomic_inc(&ctx->pending); 130962306a36Sopenharmony_ci submit_bio(bio); 131062306a36Sopenharmony_ci} 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_cistatic void drbd_flush(struct drbd_connection *connection) 131362306a36Sopenharmony_ci{ 131462306a36Sopenharmony_ci if (connection->resource->write_ordering >= WO_BDEV_FLUSH) { 131562306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 131662306a36Sopenharmony_ci struct issue_flush_context ctx; 131762306a36Sopenharmony_ci int vnr; 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci atomic_set(&ctx.pending, 1); 132062306a36Sopenharmony_ci ctx.error = 0; 132162306a36Sopenharmony_ci init_completion(&ctx.done); 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci rcu_read_lock(); 132462306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 132562306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci if (!get_ldev(device)) 132862306a36Sopenharmony_ci continue; 132962306a36Sopenharmony_ci kref_get(&device->kref); 133062306a36Sopenharmony_ci rcu_read_unlock(); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci submit_one_flush(device, &ctx); 133362306a36Sopenharmony_ci 133462306a36Sopenharmony_ci rcu_read_lock(); 133562306a36Sopenharmony_ci } 133662306a36Sopenharmony_ci rcu_read_unlock(); 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci /* Do we want to add a timeout, 133962306a36Sopenharmony_ci * if disk-timeout is set? */ 134062306a36Sopenharmony_ci if (!atomic_dec_and_test(&ctx.pending)) 134162306a36Sopenharmony_ci wait_for_completion(&ctx.done); 134262306a36Sopenharmony_ci 134362306a36Sopenharmony_ci if (ctx.error) { 134462306a36Sopenharmony_ci /* would rather check on EOPNOTSUPP, but that is not reliable. 134562306a36Sopenharmony_ci * don't try again for ANY return value != 0 134662306a36Sopenharmony_ci * if (rv == -EOPNOTSUPP) */ 134762306a36Sopenharmony_ci /* Any error is already reported by bio_endio callback. */ 134862306a36Sopenharmony_ci drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO); 134962306a36Sopenharmony_ci } 135062306a36Sopenharmony_ci } 135162306a36Sopenharmony_ci} 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci/** 135462306a36Sopenharmony_ci * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it. 135562306a36Sopenharmony_ci * @connection: DRBD connection. 135662306a36Sopenharmony_ci * @epoch: Epoch object. 135762306a36Sopenharmony_ci * @ev: Epoch event. 135862306a36Sopenharmony_ci */ 135962306a36Sopenharmony_cistatic enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection, 136062306a36Sopenharmony_ci struct drbd_epoch *epoch, 136162306a36Sopenharmony_ci enum epoch_event ev) 136262306a36Sopenharmony_ci{ 136362306a36Sopenharmony_ci int epoch_size; 136462306a36Sopenharmony_ci struct drbd_epoch *next_epoch; 136562306a36Sopenharmony_ci enum finish_epoch rv = FE_STILL_LIVE; 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_ci spin_lock(&connection->epoch_lock); 136862306a36Sopenharmony_ci do { 136962306a36Sopenharmony_ci next_epoch = NULL; 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci epoch_size = atomic_read(&epoch->epoch_size); 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci switch (ev & ~EV_CLEANUP) { 137462306a36Sopenharmony_ci case EV_PUT: 137562306a36Sopenharmony_ci atomic_dec(&epoch->active); 137662306a36Sopenharmony_ci break; 137762306a36Sopenharmony_ci case EV_GOT_BARRIER_NR: 137862306a36Sopenharmony_ci set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); 137962306a36Sopenharmony_ci break; 138062306a36Sopenharmony_ci case EV_BECAME_LAST: 138162306a36Sopenharmony_ci /* nothing to do*/ 138262306a36Sopenharmony_ci break; 138362306a36Sopenharmony_ci } 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci if (epoch_size != 0 && 138662306a36Sopenharmony_ci atomic_read(&epoch->active) == 0 && 138762306a36Sopenharmony_ci (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { 138862306a36Sopenharmony_ci if (!(ev & EV_CLEANUP)) { 138962306a36Sopenharmony_ci spin_unlock(&connection->epoch_lock); 139062306a36Sopenharmony_ci drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size); 139162306a36Sopenharmony_ci spin_lock(&connection->epoch_lock); 139262306a36Sopenharmony_ci } 139362306a36Sopenharmony_ci#if 0 139462306a36Sopenharmony_ci /* FIXME: dec unacked on connection, once we have 139562306a36Sopenharmony_ci * something to count pending connection packets in. */ 139662306a36Sopenharmony_ci if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) 139762306a36Sopenharmony_ci dec_unacked(epoch->connection); 139862306a36Sopenharmony_ci#endif 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci if (connection->current_epoch != epoch) { 140162306a36Sopenharmony_ci next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); 140262306a36Sopenharmony_ci list_del(&epoch->list); 140362306a36Sopenharmony_ci ev = EV_BECAME_LAST | (ev & EV_CLEANUP); 140462306a36Sopenharmony_ci connection->epochs--; 140562306a36Sopenharmony_ci kfree(epoch); 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci if (rv == FE_STILL_LIVE) 140862306a36Sopenharmony_ci rv = FE_DESTROYED; 140962306a36Sopenharmony_ci } else { 141062306a36Sopenharmony_ci epoch->flags = 0; 141162306a36Sopenharmony_ci atomic_set(&epoch->epoch_size, 0); 141262306a36Sopenharmony_ci /* atomic_set(&epoch->active, 0); is already zero */ 141362306a36Sopenharmony_ci if (rv == FE_STILL_LIVE) 141462306a36Sopenharmony_ci rv = FE_RECYCLED; 141562306a36Sopenharmony_ci } 141662306a36Sopenharmony_ci } 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci if (!next_epoch) 141962306a36Sopenharmony_ci break; 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci epoch = next_epoch; 142262306a36Sopenharmony_ci } while (1); 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci spin_unlock(&connection->epoch_lock); 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_ci return rv; 142762306a36Sopenharmony_ci} 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_cistatic enum write_ordering_e 143062306a36Sopenharmony_cimax_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) 143162306a36Sopenharmony_ci{ 143262306a36Sopenharmony_ci struct disk_conf *dc; 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci dc = rcu_dereference(bdev->disk_conf); 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci if (wo == WO_BDEV_FLUSH && !dc->disk_flushes) 143762306a36Sopenharmony_ci wo = WO_DRAIN_IO; 143862306a36Sopenharmony_ci if (wo == WO_DRAIN_IO && !dc->disk_drain) 143962306a36Sopenharmony_ci wo = WO_NONE; 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_ci return wo; 144262306a36Sopenharmony_ci} 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci/* 144562306a36Sopenharmony_ci * drbd_bump_write_ordering() - Fall back to an other write ordering method 144662306a36Sopenharmony_ci * @wo: Write ordering method to try. 144762306a36Sopenharmony_ci */ 144862306a36Sopenharmony_civoid drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, 144962306a36Sopenharmony_ci enum write_ordering_e wo) 145062306a36Sopenharmony_ci{ 145162306a36Sopenharmony_ci struct drbd_device *device; 145262306a36Sopenharmony_ci enum write_ordering_e pwo; 145362306a36Sopenharmony_ci int vnr; 145462306a36Sopenharmony_ci static char *write_ordering_str[] = { 145562306a36Sopenharmony_ci [WO_NONE] = "none", 145662306a36Sopenharmony_ci [WO_DRAIN_IO] = "drain", 145762306a36Sopenharmony_ci [WO_BDEV_FLUSH] = "flush", 145862306a36Sopenharmony_ci }; 145962306a36Sopenharmony_ci 146062306a36Sopenharmony_ci pwo = resource->write_ordering; 146162306a36Sopenharmony_ci if (wo != WO_BDEV_FLUSH) 146262306a36Sopenharmony_ci wo = min(pwo, wo); 146362306a36Sopenharmony_ci rcu_read_lock(); 146462306a36Sopenharmony_ci idr_for_each_entry(&resource->devices, device, vnr) { 146562306a36Sopenharmony_ci if (get_ldev(device)) { 146662306a36Sopenharmony_ci wo = max_allowed_wo(device->ldev, wo); 146762306a36Sopenharmony_ci if (device->ldev == bdev) 146862306a36Sopenharmony_ci bdev = NULL; 146962306a36Sopenharmony_ci put_ldev(device); 147062306a36Sopenharmony_ci } 147162306a36Sopenharmony_ci } 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci if (bdev) 147462306a36Sopenharmony_ci wo = max_allowed_wo(bdev, wo); 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci rcu_read_unlock(); 147762306a36Sopenharmony_ci 147862306a36Sopenharmony_ci resource->write_ordering = wo; 147962306a36Sopenharmony_ci if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH) 148062306a36Sopenharmony_ci drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); 148162306a36Sopenharmony_ci} 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci/* 148462306a36Sopenharmony_ci * Mapping "discard" to ZEROOUT with UNMAP does not work for us: 148562306a36Sopenharmony_ci * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it 148662306a36Sopenharmony_ci * will directly go to fallback mode, submitting normal writes, and 148762306a36Sopenharmony_ci * never even try to UNMAP. 148862306a36Sopenharmony_ci * 148962306a36Sopenharmony_ci * And dm-thin does not do this (yet), mostly because in general it has 149062306a36Sopenharmony_ci * to assume that "skip_block_zeroing" is set. See also: 149162306a36Sopenharmony_ci * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html 149262306a36Sopenharmony_ci * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html 149362306a36Sopenharmony_ci * 149462306a36Sopenharmony_ci * We *may* ignore the discard-zeroes-data setting, if so configured. 149562306a36Sopenharmony_ci * 149662306a36Sopenharmony_ci * Assumption is that this "discard_zeroes_data=0" is only because the backend 149762306a36Sopenharmony_ci * may ignore partial unaligned discards. 149862306a36Sopenharmony_ci * 149962306a36Sopenharmony_ci * LVM/DM thin as of at least 150062306a36Sopenharmony_ci * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) 150162306a36Sopenharmony_ci * Library version: 1.02.93-RHEL7 (2015-01-28) 150262306a36Sopenharmony_ci * Driver version: 4.29.0 150362306a36Sopenharmony_ci * still behaves this way. 150462306a36Sopenharmony_ci * 150562306a36Sopenharmony_ci * For unaligned (wrt. alignment and granularity) or too small discards, 150662306a36Sopenharmony_ci * we zero-out the initial (and/or) trailing unaligned partial chunks, 150762306a36Sopenharmony_ci * but discard all the aligned full chunks. 150862306a36Sopenharmony_ci * 150962306a36Sopenharmony_ci * At least for LVM/DM thin, with skip_block_zeroing=false, 151062306a36Sopenharmony_ci * the result is effectively "discard_zeroes_data=1". 151162306a36Sopenharmony_ci */ 151262306a36Sopenharmony_ci/* flags: EE_TRIM|EE_ZEROOUT */ 151362306a36Sopenharmony_ciint drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags) 151462306a36Sopenharmony_ci{ 151562306a36Sopenharmony_ci struct block_device *bdev = device->ldev->backing_bdev; 151662306a36Sopenharmony_ci sector_t tmp, nr; 151762306a36Sopenharmony_ci unsigned int max_discard_sectors, granularity; 151862306a36Sopenharmony_ci int alignment; 151962306a36Sopenharmony_ci int err = 0; 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ci if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM)) 152262306a36Sopenharmony_ci goto zero_out; 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci /* Zero-sector (unknown) and one-sector granularities are the same. */ 152562306a36Sopenharmony_ci granularity = max(bdev_discard_granularity(bdev) >> 9, 1U); 152662306a36Sopenharmony_ci alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22)); 152962306a36Sopenharmony_ci max_discard_sectors -= max_discard_sectors % granularity; 153062306a36Sopenharmony_ci if (unlikely(!max_discard_sectors)) 153162306a36Sopenharmony_ci goto zero_out; 153262306a36Sopenharmony_ci 153362306a36Sopenharmony_ci if (nr_sectors < granularity) 153462306a36Sopenharmony_ci goto zero_out; 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_ci tmp = start; 153762306a36Sopenharmony_ci if (sector_div(tmp, granularity) != alignment) { 153862306a36Sopenharmony_ci if (nr_sectors < 2*granularity) 153962306a36Sopenharmony_ci goto zero_out; 154062306a36Sopenharmony_ci /* start + gran - (start + gran - align) % gran */ 154162306a36Sopenharmony_ci tmp = start + granularity - alignment; 154262306a36Sopenharmony_ci tmp = start + granularity - sector_div(tmp, granularity); 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_ci nr = tmp - start; 154562306a36Sopenharmony_ci /* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many 154662306a36Sopenharmony_ci * layers are below us, some may have smaller granularity */ 154762306a36Sopenharmony_ci err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); 154862306a36Sopenharmony_ci nr_sectors -= nr; 154962306a36Sopenharmony_ci start = tmp; 155062306a36Sopenharmony_ci } 155162306a36Sopenharmony_ci while (nr_sectors >= max_discard_sectors) { 155262306a36Sopenharmony_ci err |= blkdev_issue_discard(bdev, start, max_discard_sectors, 155362306a36Sopenharmony_ci GFP_NOIO); 155462306a36Sopenharmony_ci nr_sectors -= max_discard_sectors; 155562306a36Sopenharmony_ci start += max_discard_sectors; 155662306a36Sopenharmony_ci } 155762306a36Sopenharmony_ci if (nr_sectors) { 155862306a36Sopenharmony_ci /* max_discard_sectors is unsigned int (and a multiple of 155962306a36Sopenharmony_ci * granularity, we made sure of that above already); 156062306a36Sopenharmony_ci * nr is < max_discard_sectors; 156162306a36Sopenharmony_ci * I don't need sector_div here, even though nr is sector_t */ 156262306a36Sopenharmony_ci nr = nr_sectors; 156362306a36Sopenharmony_ci nr -= (unsigned int)nr % granularity; 156462306a36Sopenharmony_ci if (nr) { 156562306a36Sopenharmony_ci err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO); 156662306a36Sopenharmony_ci nr_sectors -= nr; 156762306a36Sopenharmony_ci start += nr; 156862306a36Sopenharmony_ci } 156962306a36Sopenharmony_ci } 157062306a36Sopenharmony_ci zero_out: 157162306a36Sopenharmony_ci if (nr_sectors) { 157262306a36Sopenharmony_ci err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 157362306a36Sopenharmony_ci (flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP); 157462306a36Sopenharmony_ci } 157562306a36Sopenharmony_ci return err != 0; 157662306a36Sopenharmony_ci} 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_cistatic bool can_do_reliable_discards(struct drbd_device *device) 157962306a36Sopenharmony_ci{ 158062306a36Sopenharmony_ci struct disk_conf *dc; 158162306a36Sopenharmony_ci bool can_do; 158262306a36Sopenharmony_ci 158362306a36Sopenharmony_ci if (!bdev_max_discard_sectors(device->ldev->backing_bdev)) 158462306a36Sopenharmony_ci return false; 158562306a36Sopenharmony_ci 158662306a36Sopenharmony_ci rcu_read_lock(); 158762306a36Sopenharmony_ci dc = rcu_dereference(device->ldev->disk_conf); 158862306a36Sopenharmony_ci can_do = dc->discard_zeroes_if_aligned; 158962306a36Sopenharmony_ci rcu_read_unlock(); 159062306a36Sopenharmony_ci return can_do; 159162306a36Sopenharmony_ci} 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_cistatic void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req) 159462306a36Sopenharmony_ci{ 159562306a36Sopenharmony_ci /* If the backend cannot discard, or does not guarantee 159662306a36Sopenharmony_ci * read-back zeroes in discarded ranges, we fall back to 159762306a36Sopenharmony_ci * zero-out. Unless configuration specifically requested 159862306a36Sopenharmony_ci * otherwise. */ 159962306a36Sopenharmony_ci if (!can_do_reliable_discards(device)) 160062306a36Sopenharmony_ci peer_req->flags |= EE_ZEROOUT; 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, 160362306a36Sopenharmony_ci peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM))) 160462306a36Sopenharmony_ci peer_req->flags |= EE_WAS_ERROR; 160562306a36Sopenharmony_ci drbd_endio_write_sec_final(peer_req); 160662306a36Sopenharmony_ci} 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_cistatic int peer_request_fault_type(struct drbd_peer_request *peer_req) 160962306a36Sopenharmony_ci{ 161062306a36Sopenharmony_ci if (peer_req_op(peer_req) == REQ_OP_READ) { 161162306a36Sopenharmony_ci return peer_req->flags & EE_APPLICATION ? 161262306a36Sopenharmony_ci DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD; 161362306a36Sopenharmony_ci } else { 161462306a36Sopenharmony_ci return peer_req->flags & EE_APPLICATION ? 161562306a36Sopenharmony_ci DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR; 161662306a36Sopenharmony_ci } 161762306a36Sopenharmony_ci} 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci/** 162062306a36Sopenharmony_ci * drbd_submit_peer_request() 162162306a36Sopenharmony_ci * @peer_req: peer request 162262306a36Sopenharmony_ci * 162362306a36Sopenharmony_ci * May spread the pages to multiple bios, 162462306a36Sopenharmony_ci * depending on bio_add_page restrictions. 162562306a36Sopenharmony_ci * 162662306a36Sopenharmony_ci * Returns 0 if all bios have been submitted, 162762306a36Sopenharmony_ci * -ENOMEM if we could not allocate enough bios, 162862306a36Sopenharmony_ci * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a 162962306a36Sopenharmony_ci * single page to an empty bio (which should never happen and likely indicates 163062306a36Sopenharmony_ci * that the lower level IO stack is in some way broken). This has been observed 163162306a36Sopenharmony_ci * on certain Xen deployments. 163262306a36Sopenharmony_ci */ 163362306a36Sopenharmony_ci/* TODO allocate from our own bio_set. */ 163462306a36Sopenharmony_ciint drbd_submit_peer_request(struct drbd_peer_request *peer_req) 163562306a36Sopenharmony_ci{ 163662306a36Sopenharmony_ci struct drbd_device *device = peer_req->peer_device->device; 163762306a36Sopenharmony_ci struct bio *bios = NULL; 163862306a36Sopenharmony_ci struct bio *bio; 163962306a36Sopenharmony_ci struct page *page = peer_req->pages; 164062306a36Sopenharmony_ci sector_t sector = peer_req->i.sector; 164162306a36Sopenharmony_ci unsigned int data_size = peer_req->i.size; 164262306a36Sopenharmony_ci unsigned int n_bios = 0; 164362306a36Sopenharmony_ci unsigned int nr_pages = PFN_UP(data_size); 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_ci /* TRIM/DISCARD: for now, always use the helper function 164662306a36Sopenharmony_ci * blkdev_issue_zeroout(..., discard=true). 164762306a36Sopenharmony_ci * It's synchronous, but it does the right thing wrt. bio splitting. 164862306a36Sopenharmony_ci * Correctness first, performance later. Next step is to code an 164962306a36Sopenharmony_ci * asynchronous variant of the same. 165062306a36Sopenharmony_ci */ 165162306a36Sopenharmony_ci if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) { 165262306a36Sopenharmony_ci /* wait for all pending IO completions, before we start 165362306a36Sopenharmony_ci * zeroing things out. */ 165462306a36Sopenharmony_ci conn_wait_active_ee_empty(peer_req->peer_device->connection); 165562306a36Sopenharmony_ci /* add it to the active list now, 165662306a36Sopenharmony_ci * so we can find it to present it in debugfs */ 165762306a36Sopenharmony_ci peer_req->submit_jif = jiffies; 165862306a36Sopenharmony_ci peer_req->flags |= EE_SUBMITTED; 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci /* If this was a resync request from receive_rs_deallocated(), 166162306a36Sopenharmony_ci * it is already on the sync_ee list */ 166262306a36Sopenharmony_ci if (list_empty(&peer_req->w.list)) { 166362306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 166462306a36Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->active_ee); 166562306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 166662306a36Sopenharmony_ci } 166762306a36Sopenharmony_ci 166862306a36Sopenharmony_ci drbd_issue_peer_discard_or_zero_out(device, peer_req); 166962306a36Sopenharmony_ci return 0; 167062306a36Sopenharmony_ci } 167162306a36Sopenharmony_ci 167262306a36Sopenharmony_ci /* In most cases, we will only need one bio. But in case the lower 167362306a36Sopenharmony_ci * level restrictions happen to be different at this offset on this 167462306a36Sopenharmony_ci * side than those of the sending peer, we may need to submit the 167562306a36Sopenharmony_ci * request in more than one bio. 167662306a36Sopenharmony_ci * 167762306a36Sopenharmony_ci * Plain bio_alloc is good enough here, this is no DRBD internally 167862306a36Sopenharmony_ci * generated bio, but a bio allocated on behalf of the peer. 167962306a36Sopenharmony_ci */ 168062306a36Sopenharmony_cinext_bio: 168162306a36Sopenharmony_ci /* _DISCARD, _WRITE_ZEROES handled above. 168262306a36Sopenharmony_ci * REQ_OP_FLUSH (empty flush) not expected, 168362306a36Sopenharmony_ci * should have been mapped to a "drbd protocol barrier". 168462306a36Sopenharmony_ci * REQ_OP_SECURE_ERASE: I don't see how we could ever support that. 168562306a36Sopenharmony_ci */ 168662306a36Sopenharmony_ci if (!(peer_req_op(peer_req) == REQ_OP_WRITE || 168762306a36Sopenharmony_ci peer_req_op(peer_req) == REQ_OP_READ)) { 168862306a36Sopenharmony_ci drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf); 168962306a36Sopenharmony_ci return -EINVAL; 169062306a36Sopenharmony_ci } 169162306a36Sopenharmony_ci 169262306a36Sopenharmony_ci bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO); 169362306a36Sopenharmony_ci /* > peer_req->i.sector, unless this is the first bio */ 169462306a36Sopenharmony_ci bio->bi_iter.bi_sector = sector; 169562306a36Sopenharmony_ci bio->bi_private = peer_req; 169662306a36Sopenharmony_ci bio->bi_end_io = drbd_peer_request_endio; 169762306a36Sopenharmony_ci 169862306a36Sopenharmony_ci bio->bi_next = bios; 169962306a36Sopenharmony_ci bios = bio; 170062306a36Sopenharmony_ci ++n_bios; 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci page_chain_for_each(page) { 170362306a36Sopenharmony_ci unsigned len = min_t(unsigned, data_size, PAGE_SIZE); 170462306a36Sopenharmony_ci if (!bio_add_page(bio, page, len, 0)) 170562306a36Sopenharmony_ci goto next_bio; 170662306a36Sopenharmony_ci data_size -= len; 170762306a36Sopenharmony_ci sector += len >> 9; 170862306a36Sopenharmony_ci --nr_pages; 170962306a36Sopenharmony_ci } 171062306a36Sopenharmony_ci D_ASSERT(device, data_size == 0); 171162306a36Sopenharmony_ci D_ASSERT(device, page == NULL); 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci atomic_set(&peer_req->pending_bios, n_bios); 171462306a36Sopenharmony_ci /* for debugfs: update timestamp, mark as submitted */ 171562306a36Sopenharmony_ci peer_req->submit_jif = jiffies; 171662306a36Sopenharmony_ci peer_req->flags |= EE_SUBMITTED; 171762306a36Sopenharmony_ci do { 171862306a36Sopenharmony_ci bio = bios; 171962306a36Sopenharmony_ci bios = bios->bi_next; 172062306a36Sopenharmony_ci bio->bi_next = NULL; 172162306a36Sopenharmony_ci 172262306a36Sopenharmony_ci drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio); 172362306a36Sopenharmony_ci } while (bios); 172462306a36Sopenharmony_ci return 0; 172562306a36Sopenharmony_ci} 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_cistatic void drbd_remove_epoch_entry_interval(struct drbd_device *device, 172862306a36Sopenharmony_ci struct drbd_peer_request *peer_req) 172962306a36Sopenharmony_ci{ 173062306a36Sopenharmony_ci struct drbd_interval *i = &peer_req->i; 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_ci drbd_remove_interval(&device->write_requests, i); 173362306a36Sopenharmony_ci drbd_clear_interval(i); 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_ci /* Wake up any processes waiting for this peer request to complete. */ 173662306a36Sopenharmony_ci if (i->waiting) 173762306a36Sopenharmony_ci wake_up(&device->misc_wait); 173862306a36Sopenharmony_ci} 173962306a36Sopenharmony_ci 174062306a36Sopenharmony_cistatic void conn_wait_active_ee_empty(struct drbd_connection *connection) 174162306a36Sopenharmony_ci{ 174262306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 174362306a36Sopenharmony_ci int vnr; 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_ci rcu_read_lock(); 174662306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 174762306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 174862306a36Sopenharmony_ci 174962306a36Sopenharmony_ci kref_get(&device->kref); 175062306a36Sopenharmony_ci rcu_read_unlock(); 175162306a36Sopenharmony_ci drbd_wait_ee_list_empty(device, &device->active_ee); 175262306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 175362306a36Sopenharmony_ci rcu_read_lock(); 175462306a36Sopenharmony_ci } 175562306a36Sopenharmony_ci rcu_read_unlock(); 175662306a36Sopenharmony_ci} 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_cistatic int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi) 175962306a36Sopenharmony_ci{ 176062306a36Sopenharmony_ci int rv; 176162306a36Sopenharmony_ci struct p_barrier *p = pi->data; 176262306a36Sopenharmony_ci struct drbd_epoch *epoch; 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci /* FIXME these are unacked on connection, 176562306a36Sopenharmony_ci * not a specific (peer)device. 176662306a36Sopenharmony_ci */ 176762306a36Sopenharmony_ci connection->current_epoch->barrier_nr = p->barrier; 176862306a36Sopenharmony_ci connection->current_epoch->connection = connection; 176962306a36Sopenharmony_ci rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR); 177062306a36Sopenharmony_ci 177162306a36Sopenharmony_ci /* P_BARRIER_ACK may imply that the corresponding extent is dropped from 177262306a36Sopenharmony_ci * the activity log, which means it would not be resynced in case the 177362306a36Sopenharmony_ci * R_PRIMARY crashes now. 177462306a36Sopenharmony_ci * Therefore we must send the barrier_ack after the barrier request was 177562306a36Sopenharmony_ci * completed. */ 177662306a36Sopenharmony_ci switch (connection->resource->write_ordering) { 177762306a36Sopenharmony_ci case WO_NONE: 177862306a36Sopenharmony_ci if (rv == FE_RECYCLED) 177962306a36Sopenharmony_ci return 0; 178062306a36Sopenharmony_ci 178162306a36Sopenharmony_ci /* receiver context, in the writeout path of the other node. 178262306a36Sopenharmony_ci * avoid potential distributed deadlock */ 178362306a36Sopenharmony_ci epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 178462306a36Sopenharmony_ci if (epoch) 178562306a36Sopenharmony_ci break; 178662306a36Sopenharmony_ci else 178762306a36Sopenharmony_ci drbd_warn(connection, "Allocation of an epoch failed, slowing down\n"); 178862306a36Sopenharmony_ci fallthrough; 178962306a36Sopenharmony_ci 179062306a36Sopenharmony_ci case WO_BDEV_FLUSH: 179162306a36Sopenharmony_ci case WO_DRAIN_IO: 179262306a36Sopenharmony_ci conn_wait_active_ee_empty(connection); 179362306a36Sopenharmony_ci drbd_flush(connection); 179462306a36Sopenharmony_ci 179562306a36Sopenharmony_ci if (atomic_read(&connection->current_epoch->epoch_size)) { 179662306a36Sopenharmony_ci epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 179762306a36Sopenharmony_ci if (epoch) 179862306a36Sopenharmony_ci break; 179962306a36Sopenharmony_ci } 180062306a36Sopenharmony_ci 180162306a36Sopenharmony_ci return 0; 180262306a36Sopenharmony_ci default: 180362306a36Sopenharmony_ci drbd_err(connection, "Strangeness in connection->write_ordering %d\n", 180462306a36Sopenharmony_ci connection->resource->write_ordering); 180562306a36Sopenharmony_ci return -EIO; 180662306a36Sopenharmony_ci } 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_ci epoch->flags = 0; 180962306a36Sopenharmony_ci atomic_set(&epoch->epoch_size, 0); 181062306a36Sopenharmony_ci atomic_set(&epoch->active, 0); 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci spin_lock(&connection->epoch_lock); 181362306a36Sopenharmony_ci if (atomic_read(&connection->current_epoch->epoch_size)) { 181462306a36Sopenharmony_ci list_add(&epoch->list, &connection->current_epoch->list); 181562306a36Sopenharmony_ci connection->current_epoch = epoch; 181662306a36Sopenharmony_ci connection->epochs++; 181762306a36Sopenharmony_ci } else { 181862306a36Sopenharmony_ci /* The current_epoch got recycled while we allocated this one... */ 181962306a36Sopenharmony_ci kfree(epoch); 182062306a36Sopenharmony_ci } 182162306a36Sopenharmony_ci spin_unlock(&connection->epoch_lock); 182262306a36Sopenharmony_ci 182362306a36Sopenharmony_ci return 0; 182462306a36Sopenharmony_ci} 182562306a36Sopenharmony_ci 182662306a36Sopenharmony_ci/* quick wrapper in case payload size != request_size (write same) */ 182762306a36Sopenharmony_cistatic void drbd_csum_ee_size(struct crypto_shash *h, 182862306a36Sopenharmony_ci struct drbd_peer_request *r, void *d, 182962306a36Sopenharmony_ci unsigned int payload_size) 183062306a36Sopenharmony_ci{ 183162306a36Sopenharmony_ci unsigned int tmp = r->i.size; 183262306a36Sopenharmony_ci r->i.size = payload_size; 183362306a36Sopenharmony_ci drbd_csum_ee(h, r, d); 183462306a36Sopenharmony_ci r->i.size = tmp; 183562306a36Sopenharmony_ci} 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci/* used from receive_RSDataReply (recv_resync_read) 183862306a36Sopenharmony_ci * and from receive_Data. 183962306a36Sopenharmony_ci * data_size: actual payload ("data in") 184062306a36Sopenharmony_ci * for normal writes that is bi_size. 184162306a36Sopenharmony_ci * for discards, that is zero. 184262306a36Sopenharmony_ci * for write same, it is logical_block_size. 184362306a36Sopenharmony_ci * both trim and write same have the bi_size ("data len to be affected") 184462306a36Sopenharmony_ci * as extra argument in the packet header. 184562306a36Sopenharmony_ci */ 184662306a36Sopenharmony_cistatic struct drbd_peer_request * 184762306a36Sopenharmony_ciread_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 184862306a36Sopenharmony_ci struct packet_info *pi) __must_hold(local) 184962306a36Sopenharmony_ci{ 185062306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 185162306a36Sopenharmony_ci const sector_t capacity = get_capacity(device->vdisk); 185262306a36Sopenharmony_ci struct drbd_peer_request *peer_req; 185362306a36Sopenharmony_ci struct page *page; 185462306a36Sopenharmony_ci int digest_size, err; 185562306a36Sopenharmony_ci unsigned int data_size = pi->size, ds; 185662306a36Sopenharmony_ci void *dig_in = peer_device->connection->int_dig_in; 185762306a36Sopenharmony_ci void *dig_vv = peer_device->connection->int_dig_vv; 185862306a36Sopenharmony_ci unsigned long *data; 185962306a36Sopenharmony_ci struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; 186062306a36Sopenharmony_ci struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL; 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci digest_size = 0; 186362306a36Sopenharmony_ci if (!trim && peer_device->connection->peer_integrity_tfm) { 186462306a36Sopenharmony_ci digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); 186562306a36Sopenharmony_ci /* 186662306a36Sopenharmony_ci * FIXME: Receive the incoming digest into the receive buffer 186762306a36Sopenharmony_ci * here, together with its struct p_data? 186862306a36Sopenharmony_ci */ 186962306a36Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); 187062306a36Sopenharmony_ci if (err) 187162306a36Sopenharmony_ci return NULL; 187262306a36Sopenharmony_ci data_size -= digest_size; 187362306a36Sopenharmony_ci } 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci /* assume request_size == data_size, but special case trim. */ 187662306a36Sopenharmony_ci ds = data_size; 187762306a36Sopenharmony_ci if (trim) { 187862306a36Sopenharmony_ci if (!expect(peer_device, data_size == 0)) 187962306a36Sopenharmony_ci return NULL; 188062306a36Sopenharmony_ci ds = be32_to_cpu(trim->size); 188162306a36Sopenharmony_ci } else if (zeroes) { 188262306a36Sopenharmony_ci if (!expect(peer_device, data_size == 0)) 188362306a36Sopenharmony_ci return NULL; 188462306a36Sopenharmony_ci ds = be32_to_cpu(zeroes->size); 188562306a36Sopenharmony_ci } 188662306a36Sopenharmony_ci 188762306a36Sopenharmony_ci if (!expect(peer_device, IS_ALIGNED(ds, 512))) 188862306a36Sopenharmony_ci return NULL; 188962306a36Sopenharmony_ci if (trim || zeroes) { 189062306a36Sopenharmony_ci if (!expect(peer_device, ds <= (DRBD_MAX_BBIO_SECTORS << 9))) 189162306a36Sopenharmony_ci return NULL; 189262306a36Sopenharmony_ci } else if (!expect(peer_device, ds <= DRBD_MAX_BIO_SIZE)) 189362306a36Sopenharmony_ci return NULL; 189462306a36Sopenharmony_ci 189562306a36Sopenharmony_ci /* even though we trust out peer, 189662306a36Sopenharmony_ci * we sometimes have to double check. */ 189762306a36Sopenharmony_ci if (sector + (ds>>9) > capacity) { 189862306a36Sopenharmony_ci drbd_err(device, "request from peer beyond end of local disk: " 189962306a36Sopenharmony_ci "capacity: %llus < sector: %llus + size: %u\n", 190062306a36Sopenharmony_ci (unsigned long long)capacity, 190162306a36Sopenharmony_ci (unsigned long long)sector, ds); 190262306a36Sopenharmony_ci return NULL; 190362306a36Sopenharmony_ci } 190462306a36Sopenharmony_ci 190562306a36Sopenharmony_ci /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 190662306a36Sopenharmony_ci * "criss-cross" setup, that might cause write-out on some other DRBD, 190762306a36Sopenharmony_ci * which in turn might block on the other node at this very place. */ 190862306a36Sopenharmony_ci peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO); 190962306a36Sopenharmony_ci if (!peer_req) 191062306a36Sopenharmony_ci return NULL; 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci peer_req->flags |= EE_WRITE; 191362306a36Sopenharmony_ci if (trim) { 191462306a36Sopenharmony_ci peer_req->flags |= EE_TRIM; 191562306a36Sopenharmony_ci return peer_req; 191662306a36Sopenharmony_ci } 191762306a36Sopenharmony_ci if (zeroes) { 191862306a36Sopenharmony_ci peer_req->flags |= EE_ZEROOUT; 191962306a36Sopenharmony_ci return peer_req; 192062306a36Sopenharmony_ci } 192162306a36Sopenharmony_ci 192262306a36Sopenharmony_ci /* receive payload size bytes into page chain */ 192362306a36Sopenharmony_ci ds = data_size; 192462306a36Sopenharmony_ci page = peer_req->pages; 192562306a36Sopenharmony_ci page_chain_for_each(page) { 192662306a36Sopenharmony_ci unsigned len = min_t(int, ds, PAGE_SIZE); 192762306a36Sopenharmony_ci data = kmap(page); 192862306a36Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, data, len); 192962306a36Sopenharmony_ci if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) { 193062306a36Sopenharmony_ci drbd_err(device, "Fault injection: Corrupting data on receive\n"); 193162306a36Sopenharmony_ci data[0] = data[0] ^ (unsigned long)-1; 193262306a36Sopenharmony_ci } 193362306a36Sopenharmony_ci kunmap(page); 193462306a36Sopenharmony_ci if (err) { 193562306a36Sopenharmony_ci drbd_free_peer_req(device, peer_req); 193662306a36Sopenharmony_ci return NULL; 193762306a36Sopenharmony_ci } 193862306a36Sopenharmony_ci ds -= len; 193962306a36Sopenharmony_ci } 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_ci if (digest_size) { 194262306a36Sopenharmony_ci drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size); 194362306a36Sopenharmony_ci if (memcmp(dig_in, dig_vv, digest_size)) { 194462306a36Sopenharmony_ci drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", 194562306a36Sopenharmony_ci (unsigned long long)sector, data_size); 194662306a36Sopenharmony_ci drbd_free_peer_req(device, peer_req); 194762306a36Sopenharmony_ci return NULL; 194862306a36Sopenharmony_ci } 194962306a36Sopenharmony_ci } 195062306a36Sopenharmony_ci device->recv_cnt += data_size >> 9; 195162306a36Sopenharmony_ci return peer_req; 195262306a36Sopenharmony_ci} 195362306a36Sopenharmony_ci 195462306a36Sopenharmony_ci/* drbd_drain_block() just takes a data block 195562306a36Sopenharmony_ci * out of the socket input buffer, and discards it. 195662306a36Sopenharmony_ci */ 195762306a36Sopenharmony_cistatic int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) 195862306a36Sopenharmony_ci{ 195962306a36Sopenharmony_ci struct page *page; 196062306a36Sopenharmony_ci int err = 0; 196162306a36Sopenharmony_ci void *data; 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci if (!data_size) 196462306a36Sopenharmony_ci return 0; 196562306a36Sopenharmony_ci 196662306a36Sopenharmony_ci page = drbd_alloc_pages(peer_device, 1, 1); 196762306a36Sopenharmony_ci 196862306a36Sopenharmony_ci data = kmap(page); 196962306a36Sopenharmony_ci while (data_size) { 197062306a36Sopenharmony_ci unsigned int len = min_t(int, data_size, PAGE_SIZE); 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, data, len); 197362306a36Sopenharmony_ci if (err) 197462306a36Sopenharmony_ci break; 197562306a36Sopenharmony_ci data_size -= len; 197662306a36Sopenharmony_ci } 197762306a36Sopenharmony_ci kunmap(page); 197862306a36Sopenharmony_ci drbd_free_pages(peer_device->device, page, 0); 197962306a36Sopenharmony_ci return err; 198062306a36Sopenharmony_ci} 198162306a36Sopenharmony_ci 198262306a36Sopenharmony_cistatic int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req, 198362306a36Sopenharmony_ci sector_t sector, int data_size) 198462306a36Sopenharmony_ci{ 198562306a36Sopenharmony_ci struct bio_vec bvec; 198662306a36Sopenharmony_ci struct bvec_iter iter; 198762306a36Sopenharmony_ci struct bio *bio; 198862306a36Sopenharmony_ci int digest_size, err, expect; 198962306a36Sopenharmony_ci void *dig_in = peer_device->connection->int_dig_in; 199062306a36Sopenharmony_ci void *dig_vv = peer_device->connection->int_dig_vv; 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci digest_size = 0; 199362306a36Sopenharmony_ci if (peer_device->connection->peer_integrity_tfm) { 199462306a36Sopenharmony_ci digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); 199562306a36Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); 199662306a36Sopenharmony_ci if (err) 199762306a36Sopenharmony_ci return err; 199862306a36Sopenharmony_ci data_size -= digest_size; 199962306a36Sopenharmony_ci } 200062306a36Sopenharmony_ci 200162306a36Sopenharmony_ci /* optimistically update recv_cnt. if receiving fails below, 200262306a36Sopenharmony_ci * we disconnect anyways, and counters will be reset. */ 200362306a36Sopenharmony_ci peer_device->device->recv_cnt += data_size>>9; 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_ci bio = req->master_bio; 200662306a36Sopenharmony_ci D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector); 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci bio_for_each_segment(bvec, bio, iter) { 200962306a36Sopenharmony_ci void *mapped = bvec_kmap_local(&bvec); 201062306a36Sopenharmony_ci expect = min_t(int, data_size, bvec.bv_len); 201162306a36Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, mapped, expect); 201262306a36Sopenharmony_ci kunmap_local(mapped); 201362306a36Sopenharmony_ci if (err) 201462306a36Sopenharmony_ci return err; 201562306a36Sopenharmony_ci data_size -= expect; 201662306a36Sopenharmony_ci } 201762306a36Sopenharmony_ci 201862306a36Sopenharmony_ci if (digest_size) { 201962306a36Sopenharmony_ci drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv); 202062306a36Sopenharmony_ci if (memcmp(dig_in, dig_vv, digest_size)) { 202162306a36Sopenharmony_ci drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n"); 202262306a36Sopenharmony_ci return -EINVAL; 202362306a36Sopenharmony_ci } 202462306a36Sopenharmony_ci } 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci D_ASSERT(peer_device->device, data_size == 0); 202762306a36Sopenharmony_ci return 0; 202862306a36Sopenharmony_ci} 202962306a36Sopenharmony_ci 203062306a36Sopenharmony_ci/* 203162306a36Sopenharmony_ci * e_end_resync_block() is called in ack_sender context via 203262306a36Sopenharmony_ci * drbd_finish_peer_reqs(). 203362306a36Sopenharmony_ci */ 203462306a36Sopenharmony_cistatic int e_end_resync_block(struct drbd_work *w, int unused) 203562306a36Sopenharmony_ci{ 203662306a36Sopenharmony_ci struct drbd_peer_request *peer_req = 203762306a36Sopenharmony_ci container_of(w, struct drbd_peer_request, w); 203862306a36Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 203962306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 204062306a36Sopenharmony_ci sector_t sector = peer_req->i.sector; 204162306a36Sopenharmony_ci int err; 204262306a36Sopenharmony_ci 204362306a36Sopenharmony_ci D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 204462306a36Sopenharmony_ci 204562306a36Sopenharmony_ci if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 204662306a36Sopenharmony_ci drbd_set_in_sync(peer_device, sector, peer_req->i.size); 204762306a36Sopenharmony_ci err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); 204862306a36Sopenharmony_ci } else { 204962306a36Sopenharmony_ci /* Record failure to sync */ 205062306a36Sopenharmony_ci drbd_rs_failed_io(peer_device, sector, peer_req->i.size); 205162306a36Sopenharmony_ci 205262306a36Sopenharmony_ci err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 205362306a36Sopenharmony_ci } 205462306a36Sopenharmony_ci dec_unacked(device); 205562306a36Sopenharmony_ci 205662306a36Sopenharmony_ci return err; 205762306a36Sopenharmony_ci} 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_cistatic int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, 206062306a36Sopenharmony_ci struct packet_info *pi) __releases(local) 206162306a36Sopenharmony_ci{ 206262306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 206362306a36Sopenharmony_ci struct drbd_peer_request *peer_req; 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_ci peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi); 206662306a36Sopenharmony_ci if (!peer_req) 206762306a36Sopenharmony_ci goto fail; 206862306a36Sopenharmony_ci 206962306a36Sopenharmony_ci dec_rs_pending(peer_device); 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_ci inc_unacked(device); 207262306a36Sopenharmony_ci /* corresponding dec_unacked() in e_end_resync_block() 207362306a36Sopenharmony_ci * respective _drbd_clear_done_ee */ 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci peer_req->w.cb = e_end_resync_block; 207662306a36Sopenharmony_ci peer_req->opf = REQ_OP_WRITE; 207762306a36Sopenharmony_ci peer_req->submit_jif = jiffies; 207862306a36Sopenharmony_ci 207962306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 208062306a36Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->sync_ee); 208162306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci atomic_add(pi->size >> 9, &device->rs_sect_ev); 208462306a36Sopenharmony_ci if (drbd_submit_peer_request(peer_req) == 0) 208562306a36Sopenharmony_ci return 0; 208662306a36Sopenharmony_ci 208762306a36Sopenharmony_ci /* don't care for the reason here */ 208862306a36Sopenharmony_ci drbd_err(device, "submit failed, triggering re-connect\n"); 208962306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 209062306a36Sopenharmony_ci list_del(&peer_req->w.list); 209162306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci drbd_free_peer_req(device, peer_req); 209462306a36Sopenharmony_cifail: 209562306a36Sopenharmony_ci put_ldev(device); 209662306a36Sopenharmony_ci return -EIO; 209762306a36Sopenharmony_ci} 209862306a36Sopenharmony_ci 209962306a36Sopenharmony_cistatic struct drbd_request * 210062306a36Sopenharmony_cifind_request(struct drbd_device *device, struct rb_root *root, u64 id, 210162306a36Sopenharmony_ci sector_t sector, bool missing_ok, const char *func) 210262306a36Sopenharmony_ci{ 210362306a36Sopenharmony_ci struct drbd_request *req; 210462306a36Sopenharmony_ci 210562306a36Sopenharmony_ci /* Request object according to our peer */ 210662306a36Sopenharmony_ci req = (struct drbd_request *)(unsigned long)id; 210762306a36Sopenharmony_ci if (drbd_contains_interval(root, sector, &req->i) && req->i.local) 210862306a36Sopenharmony_ci return req; 210962306a36Sopenharmony_ci if (!missing_ok) { 211062306a36Sopenharmony_ci drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func, 211162306a36Sopenharmony_ci (unsigned long)id, (unsigned long long)sector); 211262306a36Sopenharmony_ci } 211362306a36Sopenharmony_ci return NULL; 211462306a36Sopenharmony_ci} 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_cistatic int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi) 211762306a36Sopenharmony_ci{ 211862306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 211962306a36Sopenharmony_ci struct drbd_device *device; 212062306a36Sopenharmony_ci struct drbd_request *req; 212162306a36Sopenharmony_ci sector_t sector; 212262306a36Sopenharmony_ci int err; 212362306a36Sopenharmony_ci struct p_data *p = pi->data; 212462306a36Sopenharmony_ci 212562306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 212662306a36Sopenharmony_ci if (!peer_device) 212762306a36Sopenharmony_ci return -EIO; 212862306a36Sopenharmony_ci device = peer_device->device; 212962306a36Sopenharmony_ci 213062306a36Sopenharmony_ci sector = be64_to_cpu(p->sector); 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 213362306a36Sopenharmony_ci req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__); 213462306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 213562306a36Sopenharmony_ci if (unlikely(!req)) 213662306a36Sopenharmony_ci return -EIO; 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_ci err = recv_dless_read(peer_device, req, sector, pi->size); 213962306a36Sopenharmony_ci if (!err) 214062306a36Sopenharmony_ci req_mod(req, DATA_RECEIVED, peer_device); 214162306a36Sopenharmony_ci /* else: nothing. handled from drbd_disconnect... 214262306a36Sopenharmony_ci * I don't think we may complete this just yet 214362306a36Sopenharmony_ci * in case we are "on-disconnect: freeze" */ 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_ci return err; 214662306a36Sopenharmony_ci} 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_cistatic int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi) 214962306a36Sopenharmony_ci{ 215062306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 215162306a36Sopenharmony_ci struct drbd_device *device; 215262306a36Sopenharmony_ci sector_t sector; 215362306a36Sopenharmony_ci int err; 215462306a36Sopenharmony_ci struct p_data *p = pi->data; 215562306a36Sopenharmony_ci 215662306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 215762306a36Sopenharmony_ci if (!peer_device) 215862306a36Sopenharmony_ci return -EIO; 215962306a36Sopenharmony_ci device = peer_device->device; 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci sector = be64_to_cpu(p->sector); 216262306a36Sopenharmony_ci D_ASSERT(device, p->block_id == ID_SYNCER); 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci if (get_ldev(device)) { 216562306a36Sopenharmony_ci /* data is submitted to disk within recv_resync_read. 216662306a36Sopenharmony_ci * corresponding put_ldev done below on error, 216762306a36Sopenharmony_ci * or in drbd_peer_request_endio. */ 216862306a36Sopenharmony_ci err = recv_resync_read(peer_device, sector, pi); 216962306a36Sopenharmony_ci } else { 217062306a36Sopenharmony_ci if (drbd_ratelimit()) 217162306a36Sopenharmony_ci drbd_err(device, "Can not write resync data to local disk.\n"); 217262306a36Sopenharmony_ci 217362306a36Sopenharmony_ci err = drbd_drain_block(peer_device, pi->size); 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ci drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 217662306a36Sopenharmony_ci } 217762306a36Sopenharmony_ci 217862306a36Sopenharmony_ci atomic_add(pi->size >> 9, &device->rs_sect_in); 217962306a36Sopenharmony_ci 218062306a36Sopenharmony_ci return err; 218162306a36Sopenharmony_ci} 218262306a36Sopenharmony_ci 218362306a36Sopenharmony_cistatic void restart_conflicting_writes(struct drbd_device *device, 218462306a36Sopenharmony_ci sector_t sector, int size) 218562306a36Sopenharmony_ci{ 218662306a36Sopenharmony_ci struct drbd_interval *i; 218762306a36Sopenharmony_ci struct drbd_request *req; 218862306a36Sopenharmony_ci 218962306a36Sopenharmony_ci drbd_for_each_overlap(i, &device->write_requests, sector, size) { 219062306a36Sopenharmony_ci if (!i->local) 219162306a36Sopenharmony_ci continue; 219262306a36Sopenharmony_ci req = container_of(i, struct drbd_request, i); 219362306a36Sopenharmony_ci if (req->rq_state & RQ_LOCAL_PENDING || 219462306a36Sopenharmony_ci !(req->rq_state & RQ_POSTPONED)) 219562306a36Sopenharmony_ci continue; 219662306a36Sopenharmony_ci /* as it is RQ_POSTPONED, this will cause it to 219762306a36Sopenharmony_ci * be queued on the retry workqueue. */ 219862306a36Sopenharmony_ci __req_mod(req, CONFLICT_RESOLVED, NULL, NULL); 219962306a36Sopenharmony_ci } 220062306a36Sopenharmony_ci} 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci/* 220362306a36Sopenharmony_ci * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs(). 220462306a36Sopenharmony_ci */ 220562306a36Sopenharmony_cistatic int e_end_block(struct drbd_work *w, int cancel) 220662306a36Sopenharmony_ci{ 220762306a36Sopenharmony_ci struct drbd_peer_request *peer_req = 220862306a36Sopenharmony_ci container_of(w, struct drbd_peer_request, w); 220962306a36Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 221062306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 221162306a36Sopenharmony_ci sector_t sector = peer_req->i.sector; 221262306a36Sopenharmony_ci int err = 0, pcmd; 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci if (peer_req->flags & EE_SEND_WRITE_ACK) { 221562306a36Sopenharmony_ci if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 221662306a36Sopenharmony_ci pcmd = (device->state.conn >= C_SYNC_SOURCE && 221762306a36Sopenharmony_ci device->state.conn <= C_PAUSED_SYNC_T && 221862306a36Sopenharmony_ci peer_req->flags & EE_MAY_SET_IN_SYNC) ? 221962306a36Sopenharmony_ci P_RS_WRITE_ACK : P_WRITE_ACK; 222062306a36Sopenharmony_ci err = drbd_send_ack(peer_device, pcmd, peer_req); 222162306a36Sopenharmony_ci if (pcmd == P_RS_WRITE_ACK) 222262306a36Sopenharmony_ci drbd_set_in_sync(peer_device, sector, peer_req->i.size); 222362306a36Sopenharmony_ci } else { 222462306a36Sopenharmony_ci err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 222562306a36Sopenharmony_ci /* we expect it to be marked out of sync anyways... 222662306a36Sopenharmony_ci * maybe assert this? */ 222762306a36Sopenharmony_ci } 222862306a36Sopenharmony_ci dec_unacked(device); 222962306a36Sopenharmony_ci } 223062306a36Sopenharmony_ci 223162306a36Sopenharmony_ci /* we delete from the conflict detection hash _after_ we sent out the 223262306a36Sopenharmony_ci * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 223362306a36Sopenharmony_ci if (peer_req->flags & EE_IN_INTERVAL_TREE) { 223462306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 223562306a36Sopenharmony_ci D_ASSERT(device, !drbd_interval_empty(&peer_req->i)); 223662306a36Sopenharmony_ci drbd_remove_epoch_entry_interval(device, peer_req); 223762306a36Sopenharmony_ci if (peer_req->flags & EE_RESTART_REQUESTS) 223862306a36Sopenharmony_ci restart_conflicting_writes(device, sector, peer_req->i.size); 223962306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 224062306a36Sopenharmony_ci } else 224162306a36Sopenharmony_ci D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); 224462306a36Sopenharmony_ci 224562306a36Sopenharmony_ci return err; 224662306a36Sopenharmony_ci} 224762306a36Sopenharmony_ci 224862306a36Sopenharmony_cistatic int e_send_ack(struct drbd_work *w, enum drbd_packet ack) 224962306a36Sopenharmony_ci{ 225062306a36Sopenharmony_ci struct drbd_peer_request *peer_req = 225162306a36Sopenharmony_ci container_of(w, struct drbd_peer_request, w); 225262306a36Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 225362306a36Sopenharmony_ci int err; 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ci err = drbd_send_ack(peer_device, ack, peer_req); 225662306a36Sopenharmony_ci dec_unacked(peer_device->device); 225762306a36Sopenharmony_ci 225862306a36Sopenharmony_ci return err; 225962306a36Sopenharmony_ci} 226062306a36Sopenharmony_ci 226162306a36Sopenharmony_cistatic int e_send_superseded(struct drbd_work *w, int unused) 226262306a36Sopenharmony_ci{ 226362306a36Sopenharmony_ci return e_send_ack(w, P_SUPERSEDED); 226462306a36Sopenharmony_ci} 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_cistatic int e_send_retry_write(struct drbd_work *w, int unused) 226762306a36Sopenharmony_ci{ 226862306a36Sopenharmony_ci struct drbd_peer_request *peer_req = 226962306a36Sopenharmony_ci container_of(w, struct drbd_peer_request, w); 227062306a36Sopenharmony_ci struct drbd_connection *connection = peer_req->peer_device->connection; 227162306a36Sopenharmony_ci 227262306a36Sopenharmony_ci return e_send_ack(w, connection->agreed_pro_version >= 100 ? 227362306a36Sopenharmony_ci P_RETRY_WRITE : P_SUPERSEDED); 227462306a36Sopenharmony_ci} 227562306a36Sopenharmony_ci 227662306a36Sopenharmony_cistatic bool seq_greater(u32 a, u32 b) 227762306a36Sopenharmony_ci{ 227862306a36Sopenharmony_ci /* 227962306a36Sopenharmony_ci * We assume 32-bit wrap-around here. 228062306a36Sopenharmony_ci * For 24-bit wrap-around, we would have to shift: 228162306a36Sopenharmony_ci * a <<= 8; b <<= 8; 228262306a36Sopenharmony_ci */ 228362306a36Sopenharmony_ci return (s32)a - (s32)b > 0; 228462306a36Sopenharmony_ci} 228562306a36Sopenharmony_ci 228662306a36Sopenharmony_cistatic u32 seq_max(u32 a, u32 b) 228762306a36Sopenharmony_ci{ 228862306a36Sopenharmony_ci return seq_greater(a, b) ? a : b; 228962306a36Sopenharmony_ci} 229062306a36Sopenharmony_ci 229162306a36Sopenharmony_cistatic void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq) 229262306a36Sopenharmony_ci{ 229362306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 229462306a36Sopenharmony_ci unsigned int newest_peer_seq; 229562306a36Sopenharmony_ci 229662306a36Sopenharmony_ci if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) { 229762306a36Sopenharmony_ci spin_lock(&device->peer_seq_lock); 229862306a36Sopenharmony_ci newest_peer_seq = seq_max(device->peer_seq, peer_seq); 229962306a36Sopenharmony_ci device->peer_seq = newest_peer_seq; 230062306a36Sopenharmony_ci spin_unlock(&device->peer_seq_lock); 230162306a36Sopenharmony_ci /* wake up only if we actually changed device->peer_seq */ 230262306a36Sopenharmony_ci if (peer_seq == newest_peer_seq) 230362306a36Sopenharmony_ci wake_up(&device->seq_wait); 230462306a36Sopenharmony_ci } 230562306a36Sopenharmony_ci} 230662306a36Sopenharmony_ci 230762306a36Sopenharmony_cistatic inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) 230862306a36Sopenharmony_ci{ 230962306a36Sopenharmony_ci return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); 231062306a36Sopenharmony_ci} 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_ci/* maybe change sync_ee into interval trees as well? */ 231362306a36Sopenharmony_cistatic bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) 231462306a36Sopenharmony_ci{ 231562306a36Sopenharmony_ci struct drbd_peer_request *rs_req; 231662306a36Sopenharmony_ci bool rv = false; 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 231962306a36Sopenharmony_ci list_for_each_entry(rs_req, &device->sync_ee, w.list) { 232062306a36Sopenharmony_ci if (overlaps(peer_req->i.sector, peer_req->i.size, 232162306a36Sopenharmony_ci rs_req->i.sector, rs_req->i.size)) { 232262306a36Sopenharmony_ci rv = true; 232362306a36Sopenharmony_ci break; 232462306a36Sopenharmony_ci } 232562306a36Sopenharmony_ci } 232662306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_ci return rv; 232962306a36Sopenharmony_ci} 233062306a36Sopenharmony_ci 233162306a36Sopenharmony_ci/* Called from receive_Data. 233262306a36Sopenharmony_ci * Synchronize packets on sock with packets on msock. 233362306a36Sopenharmony_ci * 233462306a36Sopenharmony_ci * This is here so even when a P_DATA packet traveling via sock overtook an Ack 233562306a36Sopenharmony_ci * packet traveling on msock, they are still processed in the order they have 233662306a36Sopenharmony_ci * been sent. 233762306a36Sopenharmony_ci * 233862306a36Sopenharmony_ci * Note: we don't care for Ack packets overtaking P_DATA packets. 233962306a36Sopenharmony_ci * 234062306a36Sopenharmony_ci * In case packet_seq is larger than device->peer_seq number, there are 234162306a36Sopenharmony_ci * outstanding packets on the msock. We wait for them to arrive. 234262306a36Sopenharmony_ci * In case we are the logically next packet, we update device->peer_seq 234362306a36Sopenharmony_ci * ourselves. Correctly handles 32bit wrap around. 234462306a36Sopenharmony_ci * 234562306a36Sopenharmony_ci * Assume we have a 10 GBit connection, that is about 1<<30 byte per second, 234662306a36Sopenharmony_ci * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds 234762306a36Sopenharmony_ci * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have 234862306a36Sopenharmony_ci * 1<<9 == 512 seconds aka ages for the 32bit wrap around... 234962306a36Sopenharmony_ci * 235062306a36Sopenharmony_ci * returns 0 if we may process the packet, 235162306a36Sopenharmony_ci * -ERESTARTSYS if we were interrupted (by disconnect signal). */ 235262306a36Sopenharmony_cistatic int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq) 235362306a36Sopenharmony_ci{ 235462306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 235562306a36Sopenharmony_ci DEFINE_WAIT(wait); 235662306a36Sopenharmony_ci long timeout; 235762306a36Sopenharmony_ci int ret = 0, tp; 235862306a36Sopenharmony_ci 235962306a36Sopenharmony_ci if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) 236062306a36Sopenharmony_ci return 0; 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci spin_lock(&device->peer_seq_lock); 236362306a36Sopenharmony_ci for (;;) { 236462306a36Sopenharmony_ci if (!seq_greater(peer_seq - 1, device->peer_seq)) { 236562306a36Sopenharmony_ci device->peer_seq = seq_max(device->peer_seq, peer_seq); 236662306a36Sopenharmony_ci break; 236762306a36Sopenharmony_ci } 236862306a36Sopenharmony_ci 236962306a36Sopenharmony_ci if (signal_pending(current)) { 237062306a36Sopenharmony_ci ret = -ERESTARTSYS; 237162306a36Sopenharmony_ci break; 237262306a36Sopenharmony_ci } 237362306a36Sopenharmony_ci 237462306a36Sopenharmony_ci rcu_read_lock(); 237562306a36Sopenharmony_ci tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; 237662306a36Sopenharmony_ci rcu_read_unlock(); 237762306a36Sopenharmony_ci 237862306a36Sopenharmony_ci if (!tp) 237962306a36Sopenharmony_ci break; 238062306a36Sopenharmony_ci 238162306a36Sopenharmony_ci /* Only need to wait if two_primaries is enabled */ 238262306a36Sopenharmony_ci prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE); 238362306a36Sopenharmony_ci spin_unlock(&device->peer_seq_lock); 238462306a36Sopenharmony_ci rcu_read_lock(); 238562306a36Sopenharmony_ci timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10; 238662306a36Sopenharmony_ci rcu_read_unlock(); 238762306a36Sopenharmony_ci timeout = schedule_timeout(timeout); 238862306a36Sopenharmony_ci spin_lock(&device->peer_seq_lock); 238962306a36Sopenharmony_ci if (!timeout) { 239062306a36Sopenharmony_ci ret = -ETIMEDOUT; 239162306a36Sopenharmony_ci drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n"); 239262306a36Sopenharmony_ci break; 239362306a36Sopenharmony_ci } 239462306a36Sopenharmony_ci } 239562306a36Sopenharmony_ci spin_unlock(&device->peer_seq_lock); 239662306a36Sopenharmony_ci finish_wait(&device->seq_wait, &wait); 239762306a36Sopenharmony_ci return ret; 239862306a36Sopenharmony_ci} 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_cistatic enum req_op wire_flags_to_bio_op(u32 dpf) 240162306a36Sopenharmony_ci{ 240262306a36Sopenharmony_ci if (dpf & DP_ZEROES) 240362306a36Sopenharmony_ci return REQ_OP_WRITE_ZEROES; 240462306a36Sopenharmony_ci if (dpf & DP_DISCARD) 240562306a36Sopenharmony_ci return REQ_OP_DISCARD; 240662306a36Sopenharmony_ci else 240762306a36Sopenharmony_ci return REQ_OP_WRITE; 240862306a36Sopenharmony_ci} 240962306a36Sopenharmony_ci 241062306a36Sopenharmony_ci/* see also bio_flags_to_wire() */ 241162306a36Sopenharmony_cistatic blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf) 241262306a36Sopenharmony_ci{ 241362306a36Sopenharmony_ci return wire_flags_to_bio_op(dpf) | 241462306a36Sopenharmony_ci (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | 241562306a36Sopenharmony_ci (dpf & DP_FUA ? REQ_FUA : 0) | 241662306a36Sopenharmony_ci (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); 241762306a36Sopenharmony_ci} 241862306a36Sopenharmony_ci 241962306a36Sopenharmony_cistatic void fail_postponed_requests(struct drbd_device *device, sector_t sector, 242062306a36Sopenharmony_ci unsigned int size) 242162306a36Sopenharmony_ci{ 242262306a36Sopenharmony_ci struct drbd_peer_device *peer_device = first_peer_device(device); 242362306a36Sopenharmony_ci struct drbd_interval *i; 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci repeat: 242662306a36Sopenharmony_ci drbd_for_each_overlap(i, &device->write_requests, sector, size) { 242762306a36Sopenharmony_ci struct drbd_request *req; 242862306a36Sopenharmony_ci struct bio_and_error m; 242962306a36Sopenharmony_ci 243062306a36Sopenharmony_ci if (!i->local) 243162306a36Sopenharmony_ci continue; 243262306a36Sopenharmony_ci req = container_of(i, struct drbd_request, i); 243362306a36Sopenharmony_ci if (!(req->rq_state & RQ_POSTPONED)) 243462306a36Sopenharmony_ci continue; 243562306a36Sopenharmony_ci req->rq_state &= ~RQ_POSTPONED; 243662306a36Sopenharmony_ci __req_mod(req, NEG_ACKED, peer_device, &m); 243762306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 243862306a36Sopenharmony_ci if (m.bio) 243962306a36Sopenharmony_ci complete_master_bio(device, &m); 244062306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 244162306a36Sopenharmony_ci goto repeat; 244262306a36Sopenharmony_ci } 244362306a36Sopenharmony_ci} 244462306a36Sopenharmony_ci 244562306a36Sopenharmony_cistatic int handle_write_conflicts(struct drbd_device *device, 244662306a36Sopenharmony_ci struct drbd_peer_request *peer_req) 244762306a36Sopenharmony_ci{ 244862306a36Sopenharmony_ci struct drbd_connection *connection = peer_req->peer_device->connection; 244962306a36Sopenharmony_ci bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags); 245062306a36Sopenharmony_ci sector_t sector = peer_req->i.sector; 245162306a36Sopenharmony_ci const unsigned int size = peer_req->i.size; 245262306a36Sopenharmony_ci struct drbd_interval *i; 245362306a36Sopenharmony_ci bool equal; 245462306a36Sopenharmony_ci int err; 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci /* 245762306a36Sopenharmony_ci * Inserting the peer request into the write_requests tree will prevent 245862306a36Sopenharmony_ci * new conflicting local requests from being added. 245962306a36Sopenharmony_ci */ 246062306a36Sopenharmony_ci drbd_insert_interval(&device->write_requests, &peer_req->i); 246162306a36Sopenharmony_ci 246262306a36Sopenharmony_ci repeat: 246362306a36Sopenharmony_ci drbd_for_each_overlap(i, &device->write_requests, sector, size) { 246462306a36Sopenharmony_ci if (i == &peer_req->i) 246562306a36Sopenharmony_ci continue; 246662306a36Sopenharmony_ci if (i->completed) 246762306a36Sopenharmony_ci continue; 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci if (!i->local) { 247062306a36Sopenharmony_ci /* 247162306a36Sopenharmony_ci * Our peer has sent a conflicting remote request; this 247262306a36Sopenharmony_ci * should not happen in a two-node setup. Wait for the 247362306a36Sopenharmony_ci * earlier peer request to complete. 247462306a36Sopenharmony_ci */ 247562306a36Sopenharmony_ci err = drbd_wait_misc(device, i); 247662306a36Sopenharmony_ci if (err) 247762306a36Sopenharmony_ci goto out; 247862306a36Sopenharmony_ci goto repeat; 247962306a36Sopenharmony_ci } 248062306a36Sopenharmony_ci 248162306a36Sopenharmony_ci equal = i->sector == sector && i->size == size; 248262306a36Sopenharmony_ci if (resolve_conflicts) { 248362306a36Sopenharmony_ci /* 248462306a36Sopenharmony_ci * If the peer request is fully contained within the 248562306a36Sopenharmony_ci * overlapping request, it can be considered overwritten 248662306a36Sopenharmony_ci * and thus superseded; otherwise, it will be retried 248762306a36Sopenharmony_ci * once all overlapping requests have completed. 248862306a36Sopenharmony_ci */ 248962306a36Sopenharmony_ci bool superseded = i->sector <= sector && i->sector + 249062306a36Sopenharmony_ci (i->size >> 9) >= sector + (size >> 9); 249162306a36Sopenharmony_ci 249262306a36Sopenharmony_ci if (!equal) 249362306a36Sopenharmony_ci drbd_alert(device, "Concurrent writes detected: " 249462306a36Sopenharmony_ci "local=%llus +%u, remote=%llus +%u, " 249562306a36Sopenharmony_ci "assuming %s came first\n", 249662306a36Sopenharmony_ci (unsigned long long)i->sector, i->size, 249762306a36Sopenharmony_ci (unsigned long long)sector, size, 249862306a36Sopenharmony_ci superseded ? "local" : "remote"); 249962306a36Sopenharmony_ci 250062306a36Sopenharmony_ci peer_req->w.cb = superseded ? e_send_superseded : 250162306a36Sopenharmony_ci e_send_retry_write; 250262306a36Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->done_ee); 250362306a36Sopenharmony_ci queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work); 250462306a36Sopenharmony_ci 250562306a36Sopenharmony_ci err = -ENOENT; 250662306a36Sopenharmony_ci goto out; 250762306a36Sopenharmony_ci } else { 250862306a36Sopenharmony_ci struct drbd_request *req = 250962306a36Sopenharmony_ci container_of(i, struct drbd_request, i); 251062306a36Sopenharmony_ci 251162306a36Sopenharmony_ci if (!equal) 251262306a36Sopenharmony_ci drbd_alert(device, "Concurrent writes detected: " 251362306a36Sopenharmony_ci "local=%llus +%u, remote=%llus +%u\n", 251462306a36Sopenharmony_ci (unsigned long long)i->sector, i->size, 251562306a36Sopenharmony_ci (unsigned long long)sector, size); 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci if (req->rq_state & RQ_LOCAL_PENDING || 251862306a36Sopenharmony_ci !(req->rq_state & RQ_POSTPONED)) { 251962306a36Sopenharmony_ci /* 252062306a36Sopenharmony_ci * Wait for the node with the discard flag to 252162306a36Sopenharmony_ci * decide if this request has been superseded 252262306a36Sopenharmony_ci * or needs to be retried. 252362306a36Sopenharmony_ci * Requests that have been superseded will 252462306a36Sopenharmony_ci * disappear from the write_requests tree. 252562306a36Sopenharmony_ci * 252662306a36Sopenharmony_ci * In addition, wait for the conflicting 252762306a36Sopenharmony_ci * request to finish locally before submitting 252862306a36Sopenharmony_ci * the conflicting peer request. 252962306a36Sopenharmony_ci */ 253062306a36Sopenharmony_ci err = drbd_wait_misc(device, &req->i); 253162306a36Sopenharmony_ci if (err) { 253262306a36Sopenharmony_ci _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD); 253362306a36Sopenharmony_ci fail_postponed_requests(device, sector, size); 253462306a36Sopenharmony_ci goto out; 253562306a36Sopenharmony_ci } 253662306a36Sopenharmony_ci goto repeat; 253762306a36Sopenharmony_ci } 253862306a36Sopenharmony_ci /* 253962306a36Sopenharmony_ci * Remember to restart the conflicting requests after 254062306a36Sopenharmony_ci * the new peer request has completed. 254162306a36Sopenharmony_ci */ 254262306a36Sopenharmony_ci peer_req->flags |= EE_RESTART_REQUESTS; 254362306a36Sopenharmony_ci } 254462306a36Sopenharmony_ci } 254562306a36Sopenharmony_ci err = 0; 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_ci out: 254862306a36Sopenharmony_ci if (err) 254962306a36Sopenharmony_ci drbd_remove_epoch_entry_interval(device, peer_req); 255062306a36Sopenharmony_ci return err; 255162306a36Sopenharmony_ci} 255262306a36Sopenharmony_ci 255362306a36Sopenharmony_ci/* mirrored write */ 255462306a36Sopenharmony_cistatic int receive_Data(struct drbd_connection *connection, struct packet_info *pi) 255562306a36Sopenharmony_ci{ 255662306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 255762306a36Sopenharmony_ci struct drbd_device *device; 255862306a36Sopenharmony_ci struct net_conf *nc; 255962306a36Sopenharmony_ci sector_t sector; 256062306a36Sopenharmony_ci struct drbd_peer_request *peer_req; 256162306a36Sopenharmony_ci struct p_data *p = pi->data; 256262306a36Sopenharmony_ci u32 peer_seq = be32_to_cpu(p->seq_num); 256362306a36Sopenharmony_ci u32 dp_flags; 256462306a36Sopenharmony_ci int err, tp; 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 256762306a36Sopenharmony_ci if (!peer_device) 256862306a36Sopenharmony_ci return -EIO; 256962306a36Sopenharmony_ci device = peer_device->device; 257062306a36Sopenharmony_ci 257162306a36Sopenharmony_ci if (!get_ldev(device)) { 257262306a36Sopenharmony_ci int err2; 257362306a36Sopenharmony_ci 257462306a36Sopenharmony_ci err = wait_for_and_update_peer_seq(peer_device, peer_seq); 257562306a36Sopenharmony_ci drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 257662306a36Sopenharmony_ci atomic_inc(&connection->current_epoch->epoch_size); 257762306a36Sopenharmony_ci err2 = drbd_drain_block(peer_device, pi->size); 257862306a36Sopenharmony_ci if (!err) 257962306a36Sopenharmony_ci err = err2; 258062306a36Sopenharmony_ci return err; 258162306a36Sopenharmony_ci } 258262306a36Sopenharmony_ci 258362306a36Sopenharmony_ci /* 258462306a36Sopenharmony_ci * Corresponding put_ldev done either below (on various errors), or in 258562306a36Sopenharmony_ci * drbd_peer_request_endio, if we successfully submit the data at the 258662306a36Sopenharmony_ci * end of this function. 258762306a36Sopenharmony_ci */ 258862306a36Sopenharmony_ci 258962306a36Sopenharmony_ci sector = be64_to_cpu(p->sector); 259062306a36Sopenharmony_ci peer_req = read_in_block(peer_device, p->block_id, sector, pi); 259162306a36Sopenharmony_ci if (!peer_req) { 259262306a36Sopenharmony_ci put_ldev(device); 259362306a36Sopenharmony_ci return -EIO; 259462306a36Sopenharmony_ci } 259562306a36Sopenharmony_ci 259662306a36Sopenharmony_ci peer_req->w.cb = e_end_block; 259762306a36Sopenharmony_ci peer_req->submit_jif = jiffies; 259862306a36Sopenharmony_ci peer_req->flags |= EE_APPLICATION; 259962306a36Sopenharmony_ci 260062306a36Sopenharmony_ci dp_flags = be32_to_cpu(p->dp_flags); 260162306a36Sopenharmony_ci peer_req->opf = wire_flags_to_bio(connection, dp_flags); 260262306a36Sopenharmony_ci if (pi->cmd == P_TRIM) { 260362306a36Sopenharmony_ci D_ASSERT(peer_device, peer_req->i.size > 0); 260462306a36Sopenharmony_ci D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD); 260562306a36Sopenharmony_ci D_ASSERT(peer_device, peer_req->pages == NULL); 260662306a36Sopenharmony_ci /* need to play safe: an older DRBD sender 260762306a36Sopenharmony_ci * may mean zero-out while sending P_TRIM. */ 260862306a36Sopenharmony_ci if (0 == (connection->agreed_features & DRBD_FF_WZEROES)) 260962306a36Sopenharmony_ci peer_req->flags |= EE_ZEROOUT; 261062306a36Sopenharmony_ci } else if (pi->cmd == P_ZEROES) { 261162306a36Sopenharmony_ci D_ASSERT(peer_device, peer_req->i.size > 0); 261262306a36Sopenharmony_ci D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES); 261362306a36Sopenharmony_ci D_ASSERT(peer_device, peer_req->pages == NULL); 261462306a36Sopenharmony_ci /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */ 261562306a36Sopenharmony_ci if (dp_flags & DP_DISCARD) 261662306a36Sopenharmony_ci peer_req->flags |= EE_TRIM; 261762306a36Sopenharmony_ci } else if (peer_req->pages == NULL) { 261862306a36Sopenharmony_ci D_ASSERT(device, peer_req->i.size == 0); 261962306a36Sopenharmony_ci D_ASSERT(device, dp_flags & DP_FLUSH); 262062306a36Sopenharmony_ci } 262162306a36Sopenharmony_ci 262262306a36Sopenharmony_ci if (dp_flags & DP_MAY_SET_IN_SYNC) 262362306a36Sopenharmony_ci peer_req->flags |= EE_MAY_SET_IN_SYNC; 262462306a36Sopenharmony_ci 262562306a36Sopenharmony_ci spin_lock(&connection->epoch_lock); 262662306a36Sopenharmony_ci peer_req->epoch = connection->current_epoch; 262762306a36Sopenharmony_ci atomic_inc(&peer_req->epoch->epoch_size); 262862306a36Sopenharmony_ci atomic_inc(&peer_req->epoch->active); 262962306a36Sopenharmony_ci spin_unlock(&connection->epoch_lock); 263062306a36Sopenharmony_ci 263162306a36Sopenharmony_ci rcu_read_lock(); 263262306a36Sopenharmony_ci nc = rcu_dereference(peer_device->connection->net_conf); 263362306a36Sopenharmony_ci tp = nc->two_primaries; 263462306a36Sopenharmony_ci if (peer_device->connection->agreed_pro_version < 100) { 263562306a36Sopenharmony_ci switch (nc->wire_protocol) { 263662306a36Sopenharmony_ci case DRBD_PROT_C: 263762306a36Sopenharmony_ci dp_flags |= DP_SEND_WRITE_ACK; 263862306a36Sopenharmony_ci break; 263962306a36Sopenharmony_ci case DRBD_PROT_B: 264062306a36Sopenharmony_ci dp_flags |= DP_SEND_RECEIVE_ACK; 264162306a36Sopenharmony_ci break; 264262306a36Sopenharmony_ci } 264362306a36Sopenharmony_ci } 264462306a36Sopenharmony_ci rcu_read_unlock(); 264562306a36Sopenharmony_ci 264662306a36Sopenharmony_ci if (dp_flags & DP_SEND_WRITE_ACK) { 264762306a36Sopenharmony_ci peer_req->flags |= EE_SEND_WRITE_ACK; 264862306a36Sopenharmony_ci inc_unacked(device); 264962306a36Sopenharmony_ci /* corresponding dec_unacked() in e_end_block() 265062306a36Sopenharmony_ci * respective _drbd_clear_done_ee */ 265162306a36Sopenharmony_ci } 265262306a36Sopenharmony_ci 265362306a36Sopenharmony_ci if (dp_flags & DP_SEND_RECEIVE_ACK) { 265462306a36Sopenharmony_ci /* I really don't like it that the receiver thread 265562306a36Sopenharmony_ci * sends on the msock, but anyways */ 265662306a36Sopenharmony_ci drbd_send_ack(peer_device, P_RECV_ACK, peer_req); 265762306a36Sopenharmony_ci } 265862306a36Sopenharmony_ci 265962306a36Sopenharmony_ci if (tp) { 266062306a36Sopenharmony_ci /* two primaries implies protocol C */ 266162306a36Sopenharmony_ci D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK); 266262306a36Sopenharmony_ci peer_req->flags |= EE_IN_INTERVAL_TREE; 266362306a36Sopenharmony_ci err = wait_for_and_update_peer_seq(peer_device, peer_seq); 266462306a36Sopenharmony_ci if (err) 266562306a36Sopenharmony_ci goto out_interrupted; 266662306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 266762306a36Sopenharmony_ci err = handle_write_conflicts(device, peer_req); 266862306a36Sopenharmony_ci if (err) { 266962306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 267062306a36Sopenharmony_ci if (err == -ENOENT) { 267162306a36Sopenharmony_ci put_ldev(device); 267262306a36Sopenharmony_ci return 0; 267362306a36Sopenharmony_ci } 267462306a36Sopenharmony_ci goto out_interrupted; 267562306a36Sopenharmony_ci } 267662306a36Sopenharmony_ci } else { 267762306a36Sopenharmony_ci update_peer_seq(peer_device, peer_seq); 267862306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 267962306a36Sopenharmony_ci } 268062306a36Sopenharmony_ci /* TRIM and is processed synchronously, 268162306a36Sopenharmony_ci * we wait for all pending requests, respectively wait for 268262306a36Sopenharmony_ci * active_ee to become empty in drbd_submit_peer_request(); 268362306a36Sopenharmony_ci * better not add ourselves here. */ 268462306a36Sopenharmony_ci if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0) 268562306a36Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->active_ee); 268662306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 268762306a36Sopenharmony_ci 268862306a36Sopenharmony_ci if (device->state.conn == C_SYNC_TARGET) 268962306a36Sopenharmony_ci wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); 269062306a36Sopenharmony_ci 269162306a36Sopenharmony_ci if (device->state.pdsk < D_INCONSISTENT) { 269262306a36Sopenharmony_ci /* In case we have the only disk of the cluster, */ 269362306a36Sopenharmony_ci drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size); 269462306a36Sopenharmony_ci peer_req->flags &= ~EE_MAY_SET_IN_SYNC; 269562306a36Sopenharmony_ci drbd_al_begin_io(device, &peer_req->i); 269662306a36Sopenharmony_ci peer_req->flags |= EE_CALL_AL_COMPLETE_IO; 269762306a36Sopenharmony_ci } 269862306a36Sopenharmony_ci 269962306a36Sopenharmony_ci err = drbd_submit_peer_request(peer_req); 270062306a36Sopenharmony_ci if (!err) 270162306a36Sopenharmony_ci return 0; 270262306a36Sopenharmony_ci 270362306a36Sopenharmony_ci /* don't care for the reason here */ 270462306a36Sopenharmony_ci drbd_err(device, "submit failed, triggering re-connect\n"); 270562306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 270662306a36Sopenharmony_ci list_del(&peer_req->w.list); 270762306a36Sopenharmony_ci drbd_remove_epoch_entry_interval(device, peer_req); 270862306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 270962306a36Sopenharmony_ci if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) { 271062306a36Sopenharmony_ci peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 271162306a36Sopenharmony_ci drbd_al_complete_io(device, &peer_req->i); 271262306a36Sopenharmony_ci } 271362306a36Sopenharmony_ci 271462306a36Sopenharmony_ciout_interrupted: 271562306a36Sopenharmony_ci drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP); 271662306a36Sopenharmony_ci put_ldev(device); 271762306a36Sopenharmony_ci drbd_free_peer_req(device, peer_req); 271862306a36Sopenharmony_ci return err; 271962306a36Sopenharmony_ci} 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_ci/* We may throttle resync, if the lower device seems to be busy, 272262306a36Sopenharmony_ci * and current sync rate is above c_min_rate. 272362306a36Sopenharmony_ci * 272462306a36Sopenharmony_ci * To decide whether or not the lower device is busy, we use a scheme similar 272562306a36Sopenharmony_ci * to MD RAID is_mddev_idle(): if the partition stats reveal "significant" 272662306a36Sopenharmony_ci * (more than 64 sectors) of activity we cannot account for with our own resync 272762306a36Sopenharmony_ci * activity, it obviously is "busy". 272862306a36Sopenharmony_ci * 272962306a36Sopenharmony_ci * The current sync rate used here uses only the most recent two step marks, 273062306a36Sopenharmony_ci * to have a short time average so we can react faster. 273162306a36Sopenharmony_ci */ 273262306a36Sopenharmony_cibool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector, 273362306a36Sopenharmony_ci bool throttle_if_app_is_waiting) 273462306a36Sopenharmony_ci{ 273562306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 273662306a36Sopenharmony_ci struct lc_element *tmp; 273762306a36Sopenharmony_ci bool throttle = drbd_rs_c_min_rate_throttle(device); 273862306a36Sopenharmony_ci 273962306a36Sopenharmony_ci if (!throttle || throttle_if_app_is_waiting) 274062306a36Sopenharmony_ci return throttle; 274162306a36Sopenharmony_ci 274262306a36Sopenharmony_ci spin_lock_irq(&device->al_lock); 274362306a36Sopenharmony_ci tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); 274462306a36Sopenharmony_ci if (tmp) { 274562306a36Sopenharmony_ci struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 274662306a36Sopenharmony_ci if (test_bit(BME_PRIORITY, &bm_ext->flags)) 274762306a36Sopenharmony_ci throttle = false; 274862306a36Sopenharmony_ci /* Do not slow down if app IO is already waiting for this extent, 274962306a36Sopenharmony_ci * and our progress is necessary for application IO to complete. */ 275062306a36Sopenharmony_ci } 275162306a36Sopenharmony_ci spin_unlock_irq(&device->al_lock); 275262306a36Sopenharmony_ci 275362306a36Sopenharmony_ci return throttle; 275462306a36Sopenharmony_ci} 275562306a36Sopenharmony_ci 275662306a36Sopenharmony_cibool drbd_rs_c_min_rate_throttle(struct drbd_device *device) 275762306a36Sopenharmony_ci{ 275862306a36Sopenharmony_ci struct gendisk *disk = device->ldev->backing_bdev->bd_disk; 275962306a36Sopenharmony_ci unsigned long db, dt, dbdt; 276062306a36Sopenharmony_ci unsigned int c_min_rate; 276162306a36Sopenharmony_ci int curr_events; 276262306a36Sopenharmony_ci 276362306a36Sopenharmony_ci rcu_read_lock(); 276462306a36Sopenharmony_ci c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; 276562306a36Sopenharmony_ci rcu_read_unlock(); 276662306a36Sopenharmony_ci 276762306a36Sopenharmony_ci /* feature disabled? */ 276862306a36Sopenharmony_ci if (c_min_rate == 0) 276962306a36Sopenharmony_ci return false; 277062306a36Sopenharmony_ci 277162306a36Sopenharmony_ci curr_events = (int)part_stat_read_accum(disk->part0, sectors) - 277262306a36Sopenharmony_ci atomic_read(&device->rs_sect_ev); 277362306a36Sopenharmony_ci 277462306a36Sopenharmony_ci if (atomic_read(&device->ap_actlog_cnt) 277562306a36Sopenharmony_ci || curr_events - device->rs_last_events > 64) { 277662306a36Sopenharmony_ci unsigned long rs_left; 277762306a36Sopenharmony_ci int i; 277862306a36Sopenharmony_ci 277962306a36Sopenharmony_ci device->rs_last_events = curr_events; 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_ci /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, 278262306a36Sopenharmony_ci * approx. */ 278362306a36Sopenharmony_ci i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; 278462306a36Sopenharmony_ci 278562306a36Sopenharmony_ci if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 278662306a36Sopenharmony_ci rs_left = device->ov_left; 278762306a36Sopenharmony_ci else 278862306a36Sopenharmony_ci rs_left = drbd_bm_total_weight(device) - device->rs_failed; 278962306a36Sopenharmony_ci 279062306a36Sopenharmony_ci dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ; 279162306a36Sopenharmony_ci if (!dt) 279262306a36Sopenharmony_ci dt++; 279362306a36Sopenharmony_ci db = device->rs_mark_left[i] - rs_left; 279462306a36Sopenharmony_ci dbdt = Bit2KB(db/dt); 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_ci if (dbdt > c_min_rate) 279762306a36Sopenharmony_ci return true; 279862306a36Sopenharmony_ci } 279962306a36Sopenharmony_ci return false; 280062306a36Sopenharmony_ci} 280162306a36Sopenharmony_ci 280262306a36Sopenharmony_cistatic int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) 280362306a36Sopenharmony_ci{ 280462306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 280562306a36Sopenharmony_ci struct drbd_device *device; 280662306a36Sopenharmony_ci sector_t sector; 280762306a36Sopenharmony_ci sector_t capacity; 280862306a36Sopenharmony_ci struct drbd_peer_request *peer_req; 280962306a36Sopenharmony_ci struct digest_info *di = NULL; 281062306a36Sopenharmony_ci int size, verb; 281162306a36Sopenharmony_ci struct p_block_req *p = pi->data; 281262306a36Sopenharmony_ci 281362306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 281462306a36Sopenharmony_ci if (!peer_device) 281562306a36Sopenharmony_ci return -EIO; 281662306a36Sopenharmony_ci device = peer_device->device; 281762306a36Sopenharmony_ci capacity = get_capacity(device->vdisk); 281862306a36Sopenharmony_ci 281962306a36Sopenharmony_ci sector = be64_to_cpu(p->sector); 282062306a36Sopenharmony_ci size = be32_to_cpu(p->blksize); 282162306a36Sopenharmony_ci 282262306a36Sopenharmony_ci if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { 282362306a36Sopenharmony_ci drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 282462306a36Sopenharmony_ci (unsigned long long)sector, size); 282562306a36Sopenharmony_ci return -EINVAL; 282662306a36Sopenharmony_ci } 282762306a36Sopenharmony_ci if (sector + (size>>9) > capacity) { 282862306a36Sopenharmony_ci drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 282962306a36Sopenharmony_ci (unsigned long long)sector, size); 283062306a36Sopenharmony_ci return -EINVAL; 283162306a36Sopenharmony_ci } 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_ci if (!get_ldev_if_state(device, D_UP_TO_DATE)) { 283462306a36Sopenharmony_ci verb = 1; 283562306a36Sopenharmony_ci switch (pi->cmd) { 283662306a36Sopenharmony_ci case P_DATA_REQUEST: 283762306a36Sopenharmony_ci drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p); 283862306a36Sopenharmony_ci break; 283962306a36Sopenharmony_ci case P_RS_THIN_REQ: 284062306a36Sopenharmony_ci case P_RS_DATA_REQUEST: 284162306a36Sopenharmony_ci case P_CSUM_RS_REQUEST: 284262306a36Sopenharmony_ci case P_OV_REQUEST: 284362306a36Sopenharmony_ci drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p); 284462306a36Sopenharmony_ci break; 284562306a36Sopenharmony_ci case P_OV_REPLY: 284662306a36Sopenharmony_ci verb = 0; 284762306a36Sopenharmony_ci dec_rs_pending(peer_device); 284862306a36Sopenharmony_ci drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); 284962306a36Sopenharmony_ci break; 285062306a36Sopenharmony_ci default: 285162306a36Sopenharmony_ci BUG(); 285262306a36Sopenharmony_ci } 285362306a36Sopenharmony_ci if (verb && drbd_ratelimit()) 285462306a36Sopenharmony_ci drbd_err(device, "Can not satisfy peer's read request, " 285562306a36Sopenharmony_ci "no local data.\n"); 285662306a36Sopenharmony_ci 285762306a36Sopenharmony_ci /* drain possibly payload */ 285862306a36Sopenharmony_ci return drbd_drain_block(peer_device, pi->size); 285962306a36Sopenharmony_ci } 286062306a36Sopenharmony_ci 286162306a36Sopenharmony_ci /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 286262306a36Sopenharmony_ci * "criss-cross" setup, that might cause write-out on some other DRBD, 286362306a36Sopenharmony_ci * which in turn might block on the other node at this very place. */ 286462306a36Sopenharmony_ci peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, 286562306a36Sopenharmony_ci size, GFP_NOIO); 286662306a36Sopenharmony_ci if (!peer_req) { 286762306a36Sopenharmony_ci put_ldev(device); 286862306a36Sopenharmony_ci return -ENOMEM; 286962306a36Sopenharmony_ci } 287062306a36Sopenharmony_ci peer_req->opf = REQ_OP_READ; 287162306a36Sopenharmony_ci 287262306a36Sopenharmony_ci switch (pi->cmd) { 287362306a36Sopenharmony_ci case P_DATA_REQUEST: 287462306a36Sopenharmony_ci peer_req->w.cb = w_e_end_data_req; 287562306a36Sopenharmony_ci /* application IO, don't drbd_rs_begin_io */ 287662306a36Sopenharmony_ci peer_req->flags |= EE_APPLICATION; 287762306a36Sopenharmony_ci goto submit; 287862306a36Sopenharmony_ci 287962306a36Sopenharmony_ci case P_RS_THIN_REQ: 288062306a36Sopenharmony_ci /* If at some point in the future we have a smart way to 288162306a36Sopenharmony_ci find out if this data block is completely deallocated, 288262306a36Sopenharmony_ci then we would do something smarter here than reading 288362306a36Sopenharmony_ci the block... */ 288462306a36Sopenharmony_ci peer_req->flags |= EE_RS_THIN_REQ; 288562306a36Sopenharmony_ci fallthrough; 288662306a36Sopenharmony_ci case P_RS_DATA_REQUEST: 288762306a36Sopenharmony_ci peer_req->w.cb = w_e_end_rsdata_req; 288862306a36Sopenharmony_ci /* used in the sector offset progress display */ 288962306a36Sopenharmony_ci device->bm_resync_fo = BM_SECT_TO_BIT(sector); 289062306a36Sopenharmony_ci break; 289162306a36Sopenharmony_ci 289262306a36Sopenharmony_ci case P_OV_REPLY: 289362306a36Sopenharmony_ci case P_CSUM_RS_REQUEST: 289462306a36Sopenharmony_ci di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); 289562306a36Sopenharmony_ci if (!di) 289662306a36Sopenharmony_ci goto out_free_e; 289762306a36Sopenharmony_ci 289862306a36Sopenharmony_ci di->digest_size = pi->size; 289962306a36Sopenharmony_ci di->digest = (((char *)di)+sizeof(struct digest_info)); 290062306a36Sopenharmony_ci 290162306a36Sopenharmony_ci peer_req->digest = di; 290262306a36Sopenharmony_ci peer_req->flags |= EE_HAS_DIGEST; 290362306a36Sopenharmony_ci 290462306a36Sopenharmony_ci if (drbd_recv_all(peer_device->connection, di->digest, pi->size)) 290562306a36Sopenharmony_ci goto out_free_e; 290662306a36Sopenharmony_ci 290762306a36Sopenharmony_ci if (pi->cmd == P_CSUM_RS_REQUEST) { 290862306a36Sopenharmony_ci D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 290962306a36Sopenharmony_ci peer_req->w.cb = w_e_end_csum_rs_req; 291062306a36Sopenharmony_ci /* used in the sector offset progress display */ 291162306a36Sopenharmony_ci device->bm_resync_fo = BM_SECT_TO_BIT(sector); 291262306a36Sopenharmony_ci /* remember to report stats in drbd_resync_finished */ 291362306a36Sopenharmony_ci device->use_csums = true; 291462306a36Sopenharmony_ci } else if (pi->cmd == P_OV_REPLY) { 291562306a36Sopenharmony_ci /* track progress, we may need to throttle */ 291662306a36Sopenharmony_ci atomic_add(size >> 9, &device->rs_sect_in); 291762306a36Sopenharmony_ci peer_req->w.cb = w_e_end_ov_reply; 291862306a36Sopenharmony_ci dec_rs_pending(peer_device); 291962306a36Sopenharmony_ci /* drbd_rs_begin_io done when we sent this request, 292062306a36Sopenharmony_ci * but accounting still needs to be done. */ 292162306a36Sopenharmony_ci goto submit_for_resync; 292262306a36Sopenharmony_ci } 292362306a36Sopenharmony_ci break; 292462306a36Sopenharmony_ci 292562306a36Sopenharmony_ci case P_OV_REQUEST: 292662306a36Sopenharmony_ci if (device->ov_start_sector == ~(sector_t)0 && 292762306a36Sopenharmony_ci peer_device->connection->agreed_pro_version >= 90) { 292862306a36Sopenharmony_ci unsigned long now = jiffies; 292962306a36Sopenharmony_ci int i; 293062306a36Sopenharmony_ci device->ov_start_sector = sector; 293162306a36Sopenharmony_ci device->ov_position = sector; 293262306a36Sopenharmony_ci device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector); 293362306a36Sopenharmony_ci device->rs_total = device->ov_left; 293462306a36Sopenharmony_ci for (i = 0; i < DRBD_SYNC_MARKS; i++) { 293562306a36Sopenharmony_ci device->rs_mark_left[i] = device->ov_left; 293662306a36Sopenharmony_ci device->rs_mark_time[i] = now; 293762306a36Sopenharmony_ci } 293862306a36Sopenharmony_ci drbd_info(device, "Online Verify start sector: %llu\n", 293962306a36Sopenharmony_ci (unsigned long long)sector); 294062306a36Sopenharmony_ci } 294162306a36Sopenharmony_ci peer_req->w.cb = w_e_end_ov_req; 294262306a36Sopenharmony_ci break; 294362306a36Sopenharmony_ci 294462306a36Sopenharmony_ci default: 294562306a36Sopenharmony_ci BUG(); 294662306a36Sopenharmony_ci } 294762306a36Sopenharmony_ci 294862306a36Sopenharmony_ci /* Throttle, drbd_rs_begin_io and submit should become asynchronous 294962306a36Sopenharmony_ci * wrt the receiver, but it is not as straightforward as it may seem. 295062306a36Sopenharmony_ci * Various places in the resync start and stop logic assume resync 295162306a36Sopenharmony_ci * requests are processed in order, requeuing this on the worker thread 295262306a36Sopenharmony_ci * introduces a bunch of new code for synchronization between threads. 295362306a36Sopenharmony_ci * 295462306a36Sopenharmony_ci * Unlimited throttling before drbd_rs_begin_io may stall the resync 295562306a36Sopenharmony_ci * "forever", throttling after drbd_rs_begin_io will lock that extent 295662306a36Sopenharmony_ci * for application writes for the same time. For now, just throttle 295762306a36Sopenharmony_ci * here, where the rest of the code expects the receiver to sleep for 295862306a36Sopenharmony_ci * a while, anyways. 295962306a36Sopenharmony_ci */ 296062306a36Sopenharmony_ci 296162306a36Sopenharmony_ci /* Throttle before drbd_rs_begin_io, as that locks out application IO; 296262306a36Sopenharmony_ci * this defers syncer requests for some time, before letting at least 296362306a36Sopenharmony_ci * on request through. The resync controller on the receiving side 296462306a36Sopenharmony_ci * will adapt to the incoming rate accordingly. 296562306a36Sopenharmony_ci * 296662306a36Sopenharmony_ci * We cannot throttle here if remote is Primary/SyncTarget: 296762306a36Sopenharmony_ci * we would also throttle its application reads. 296862306a36Sopenharmony_ci * In that case, throttling is done on the SyncTarget only. 296962306a36Sopenharmony_ci */ 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_ci /* Even though this may be a resync request, we do add to "read_ee"; 297262306a36Sopenharmony_ci * "sync_ee" is only used for resync WRITEs. 297362306a36Sopenharmony_ci * Add to list early, so debugfs can find this request 297462306a36Sopenharmony_ci * even if we have to sleep below. */ 297562306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 297662306a36Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->read_ee); 297762306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 297862306a36Sopenharmony_ci 297962306a36Sopenharmony_ci update_receiver_timing_details(connection, drbd_rs_should_slow_down); 298062306a36Sopenharmony_ci if (device->state.peer != R_PRIMARY 298162306a36Sopenharmony_ci && drbd_rs_should_slow_down(peer_device, sector, false)) 298262306a36Sopenharmony_ci schedule_timeout_uninterruptible(HZ/10); 298362306a36Sopenharmony_ci update_receiver_timing_details(connection, drbd_rs_begin_io); 298462306a36Sopenharmony_ci if (drbd_rs_begin_io(device, sector)) 298562306a36Sopenharmony_ci goto out_free_e; 298662306a36Sopenharmony_ci 298762306a36Sopenharmony_cisubmit_for_resync: 298862306a36Sopenharmony_ci atomic_add(size >> 9, &device->rs_sect_ev); 298962306a36Sopenharmony_ci 299062306a36Sopenharmony_cisubmit: 299162306a36Sopenharmony_ci update_receiver_timing_details(connection, drbd_submit_peer_request); 299262306a36Sopenharmony_ci inc_unacked(device); 299362306a36Sopenharmony_ci if (drbd_submit_peer_request(peer_req) == 0) 299462306a36Sopenharmony_ci return 0; 299562306a36Sopenharmony_ci 299662306a36Sopenharmony_ci /* don't care for the reason here */ 299762306a36Sopenharmony_ci drbd_err(device, "submit failed, triggering re-connect\n"); 299862306a36Sopenharmony_ci 299962306a36Sopenharmony_ciout_free_e: 300062306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 300162306a36Sopenharmony_ci list_del(&peer_req->w.list); 300262306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 300362306a36Sopenharmony_ci /* no drbd_rs_complete_io(), we are dropping the connection anyways */ 300462306a36Sopenharmony_ci 300562306a36Sopenharmony_ci put_ldev(device); 300662306a36Sopenharmony_ci drbd_free_peer_req(device, peer_req); 300762306a36Sopenharmony_ci return -EIO; 300862306a36Sopenharmony_ci} 300962306a36Sopenharmony_ci 301062306a36Sopenharmony_ci/* 301162306a36Sopenharmony_ci * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries 301262306a36Sopenharmony_ci */ 301362306a36Sopenharmony_cistatic int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local) 301462306a36Sopenharmony_ci{ 301562306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 301662306a36Sopenharmony_ci int self, peer, rv = -100; 301762306a36Sopenharmony_ci unsigned long ch_self, ch_peer; 301862306a36Sopenharmony_ci enum drbd_after_sb_p after_sb_0p; 301962306a36Sopenharmony_ci 302062306a36Sopenharmony_ci self = device->ldev->md.uuid[UI_BITMAP] & 1; 302162306a36Sopenharmony_ci peer = device->p_uuid[UI_BITMAP] & 1; 302262306a36Sopenharmony_ci 302362306a36Sopenharmony_ci ch_peer = device->p_uuid[UI_SIZE]; 302462306a36Sopenharmony_ci ch_self = device->comm_bm_set; 302562306a36Sopenharmony_ci 302662306a36Sopenharmony_ci rcu_read_lock(); 302762306a36Sopenharmony_ci after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p; 302862306a36Sopenharmony_ci rcu_read_unlock(); 302962306a36Sopenharmony_ci switch (after_sb_0p) { 303062306a36Sopenharmony_ci case ASB_CONSENSUS: 303162306a36Sopenharmony_ci case ASB_DISCARD_SECONDARY: 303262306a36Sopenharmony_ci case ASB_CALL_HELPER: 303362306a36Sopenharmony_ci case ASB_VIOLENTLY: 303462306a36Sopenharmony_ci drbd_err(device, "Configuration error.\n"); 303562306a36Sopenharmony_ci break; 303662306a36Sopenharmony_ci case ASB_DISCONNECT: 303762306a36Sopenharmony_ci break; 303862306a36Sopenharmony_ci case ASB_DISCARD_YOUNGER_PRI: 303962306a36Sopenharmony_ci if (self == 0 && peer == 1) { 304062306a36Sopenharmony_ci rv = -1; 304162306a36Sopenharmony_ci break; 304262306a36Sopenharmony_ci } 304362306a36Sopenharmony_ci if (self == 1 && peer == 0) { 304462306a36Sopenharmony_ci rv = 1; 304562306a36Sopenharmony_ci break; 304662306a36Sopenharmony_ci } 304762306a36Sopenharmony_ci fallthrough; /* to one of the other strategies */ 304862306a36Sopenharmony_ci case ASB_DISCARD_OLDER_PRI: 304962306a36Sopenharmony_ci if (self == 0 && peer == 1) { 305062306a36Sopenharmony_ci rv = 1; 305162306a36Sopenharmony_ci break; 305262306a36Sopenharmony_ci } 305362306a36Sopenharmony_ci if (self == 1 && peer == 0) { 305462306a36Sopenharmony_ci rv = -1; 305562306a36Sopenharmony_ci break; 305662306a36Sopenharmony_ci } 305762306a36Sopenharmony_ci /* Else fall through to one of the other strategies... */ 305862306a36Sopenharmony_ci drbd_warn(device, "Discard younger/older primary did not find a decision\n" 305962306a36Sopenharmony_ci "Using discard-least-changes instead\n"); 306062306a36Sopenharmony_ci fallthrough; 306162306a36Sopenharmony_ci case ASB_DISCARD_ZERO_CHG: 306262306a36Sopenharmony_ci if (ch_peer == 0 && ch_self == 0) { 306362306a36Sopenharmony_ci rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 306462306a36Sopenharmony_ci ? -1 : 1; 306562306a36Sopenharmony_ci break; 306662306a36Sopenharmony_ci } else { 306762306a36Sopenharmony_ci if (ch_peer == 0) { rv = 1; break; } 306862306a36Sopenharmony_ci if (ch_self == 0) { rv = -1; break; } 306962306a36Sopenharmony_ci } 307062306a36Sopenharmony_ci if (after_sb_0p == ASB_DISCARD_ZERO_CHG) 307162306a36Sopenharmony_ci break; 307262306a36Sopenharmony_ci fallthrough; 307362306a36Sopenharmony_ci case ASB_DISCARD_LEAST_CHG: 307462306a36Sopenharmony_ci if (ch_self < ch_peer) 307562306a36Sopenharmony_ci rv = -1; 307662306a36Sopenharmony_ci else if (ch_self > ch_peer) 307762306a36Sopenharmony_ci rv = 1; 307862306a36Sopenharmony_ci else /* ( ch_self == ch_peer ) */ 307962306a36Sopenharmony_ci /* Well, then use something else. */ 308062306a36Sopenharmony_ci rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 308162306a36Sopenharmony_ci ? -1 : 1; 308262306a36Sopenharmony_ci break; 308362306a36Sopenharmony_ci case ASB_DISCARD_LOCAL: 308462306a36Sopenharmony_ci rv = -1; 308562306a36Sopenharmony_ci break; 308662306a36Sopenharmony_ci case ASB_DISCARD_REMOTE: 308762306a36Sopenharmony_ci rv = 1; 308862306a36Sopenharmony_ci } 308962306a36Sopenharmony_ci 309062306a36Sopenharmony_ci return rv; 309162306a36Sopenharmony_ci} 309262306a36Sopenharmony_ci 309362306a36Sopenharmony_ci/* 309462306a36Sopenharmony_ci * drbd_asb_recover_1p - Recover after split-brain with one remaining primary 309562306a36Sopenharmony_ci */ 309662306a36Sopenharmony_cistatic int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local) 309762306a36Sopenharmony_ci{ 309862306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 309962306a36Sopenharmony_ci int hg, rv = -100; 310062306a36Sopenharmony_ci enum drbd_after_sb_p after_sb_1p; 310162306a36Sopenharmony_ci 310262306a36Sopenharmony_ci rcu_read_lock(); 310362306a36Sopenharmony_ci after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p; 310462306a36Sopenharmony_ci rcu_read_unlock(); 310562306a36Sopenharmony_ci switch (after_sb_1p) { 310662306a36Sopenharmony_ci case ASB_DISCARD_YOUNGER_PRI: 310762306a36Sopenharmony_ci case ASB_DISCARD_OLDER_PRI: 310862306a36Sopenharmony_ci case ASB_DISCARD_LEAST_CHG: 310962306a36Sopenharmony_ci case ASB_DISCARD_LOCAL: 311062306a36Sopenharmony_ci case ASB_DISCARD_REMOTE: 311162306a36Sopenharmony_ci case ASB_DISCARD_ZERO_CHG: 311262306a36Sopenharmony_ci drbd_err(device, "Configuration error.\n"); 311362306a36Sopenharmony_ci break; 311462306a36Sopenharmony_ci case ASB_DISCONNECT: 311562306a36Sopenharmony_ci break; 311662306a36Sopenharmony_ci case ASB_CONSENSUS: 311762306a36Sopenharmony_ci hg = drbd_asb_recover_0p(peer_device); 311862306a36Sopenharmony_ci if (hg == -1 && device->state.role == R_SECONDARY) 311962306a36Sopenharmony_ci rv = hg; 312062306a36Sopenharmony_ci if (hg == 1 && device->state.role == R_PRIMARY) 312162306a36Sopenharmony_ci rv = hg; 312262306a36Sopenharmony_ci break; 312362306a36Sopenharmony_ci case ASB_VIOLENTLY: 312462306a36Sopenharmony_ci rv = drbd_asb_recover_0p(peer_device); 312562306a36Sopenharmony_ci break; 312662306a36Sopenharmony_ci case ASB_DISCARD_SECONDARY: 312762306a36Sopenharmony_ci return device->state.role == R_PRIMARY ? 1 : -1; 312862306a36Sopenharmony_ci case ASB_CALL_HELPER: 312962306a36Sopenharmony_ci hg = drbd_asb_recover_0p(peer_device); 313062306a36Sopenharmony_ci if (hg == -1 && device->state.role == R_PRIMARY) { 313162306a36Sopenharmony_ci enum drbd_state_rv rv2; 313262306a36Sopenharmony_ci 313362306a36Sopenharmony_ci /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 313462306a36Sopenharmony_ci * we might be here in C_WF_REPORT_PARAMS which is transient. 313562306a36Sopenharmony_ci * we do not need to wait for the after state change work either. */ 313662306a36Sopenharmony_ci rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 313762306a36Sopenharmony_ci if (rv2 != SS_SUCCESS) { 313862306a36Sopenharmony_ci drbd_khelper(device, "pri-lost-after-sb"); 313962306a36Sopenharmony_ci } else { 314062306a36Sopenharmony_ci drbd_warn(device, "Successfully gave up primary role.\n"); 314162306a36Sopenharmony_ci rv = hg; 314262306a36Sopenharmony_ci } 314362306a36Sopenharmony_ci } else 314462306a36Sopenharmony_ci rv = hg; 314562306a36Sopenharmony_ci } 314662306a36Sopenharmony_ci 314762306a36Sopenharmony_ci return rv; 314862306a36Sopenharmony_ci} 314962306a36Sopenharmony_ci 315062306a36Sopenharmony_ci/* 315162306a36Sopenharmony_ci * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries 315262306a36Sopenharmony_ci */ 315362306a36Sopenharmony_cistatic int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local) 315462306a36Sopenharmony_ci{ 315562306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 315662306a36Sopenharmony_ci int hg, rv = -100; 315762306a36Sopenharmony_ci enum drbd_after_sb_p after_sb_2p; 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci rcu_read_lock(); 316062306a36Sopenharmony_ci after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p; 316162306a36Sopenharmony_ci rcu_read_unlock(); 316262306a36Sopenharmony_ci switch (after_sb_2p) { 316362306a36Sopenharmony_ci case ASB_DISCARD_YOUNGER_PRI: 316462306a36Sopenharmony_ci case ASB_DISCARD_OLDER_PRI: 316562306a36Sopenharmony_ci case ASB_DISCARD_LEAST_CHG: 316662306a36Sopenharmony_ci case ASB_DISCARD_LOCAL: 316762306a36Sopenharmony_ci case ASB_DISCARD_REMOTE: 316862306a36Sopenharmony_ci case ASB_CONSENSUS: 316962306a36Sopenharmony_ci case ASB_DISCARD_SECONDARY: 317062306a36Sopenharmony_ci case ASB_DISCARD_ZERO_CHG: 317162306a36Sopenharmony_ci drbd_err(device, "Configuration error.\n"); 317262306a36Sopenharmony_ci break; 317362306a36Sopenharmony_ci case ASB_VIOLENTLY: 317462306a36Sopenharmony_ci rv = drbd_asb_recover_0p(peer_device); 317562306a36Sopenharmony_ci break; 317662306a36Sopenharmony_ci case ASB_DISCONNECT: 317762306a36Sopenharmony_ci break; 317862306a36Sopenharmony_ci case ASB_CALL_HELPER: 317962306a36Sopenharmony_ci hg = drbd_asb_recover_0p(peer_device); 318062306a36Sopenharmony_ci if (hg == -1) { 318162306a36Sopenharmony_ci enum drbd_state_rv rv2; 318262306a36Sopenharmony_ci 318362306a36Sopenharmony_ci /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 318462306a36Sopenharmony_ci * we might be here in C_WF_REPORT_PARAMS which is transient. 318562306a36Sopenharmony_ci * we do not need to wait for the after state change work either. */ 318662306a36Sopenharmony_ci rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 318762306a36Sopenharmony_ci if (rv2 != SS_SUCCESS) { 318862306a36Sopenharmony_ci drbd_khelper(device, "pri-lost-after-sb"); 318962306a36Sopenharmony_ci } else { 319062306a36Sopenharmony_ci drbd_warn(device, "Successfully gave up primary role.\n"); 319162306a36Sopenharmony_ci rv = hg; 319262306a36Sopenharmony_ci } 319362306a36Sopenharmony_ci } else 319462306a36Sopenharmony_ci rv = hg; 319562306a36Sopenharmony_ci } 319662306a36Sopenharmony_ci 319762306a36Sopenharmony_ci return rv; 319862306a36Sopenharmony_ci} 319962306a36Sopenharmony_ci 320062306a36Sopenharmony_cistatic void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, 320162306a36Sopenharmony_ci u64 bits, u64 flags) 320262306a36Sopenharmony_ci{ 320362306a36Sopenharmony_ci if (!uuid) { 320462306a36Sopenharmony_ci drbd_info(device, "%s uuid info vanished while I was looking!\n", text); 320562306a36Sopenharmony_ci return; 320662306a36Sopenharmony_ci } 320762306a36Sopenharmony_ci drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", 320862306a36Sopenharmony_ci text, 320962306a36Sopenharmony_ci (unsigned long long)uuid[UI_CURRENT], 321062306a36Sopenharmony_ci (unsigned long long)uuid[UI_BITMAP], 321162306a36Sopenharmony_ci (unsigned long long)uuid[UI_HISTORY_START], 321262306a36Sopenharmony_ci (unsigned long long)uuid[UI_HISTORY_END], 321362306a36Sopenharmony_ci (unsigned long long)bits, 321462306a36Sopenharmony_ci (unsigned long long)flags); 321562306a36Sopenharmony_ci} 321662306a36Sopenharmony_ci 321762306a36Sopenharmony_ci/* 321862306a36Sopenharmony_ci 100 after split brain try auto recover 321962306a36Sopenharmony_ci 2 C_SYNC_SOURCE set BitMap 322062306a36Sopenharmony_ci 1 C_SYNC_SOURCE use BitMap 322162306a36Sopenharmony_ci 0 no Sync 322262306a36Sopenharmony_ci -1 C_SYNC_TARGET use BitMap 322362306a36Sopenharmony_ci -2 C_SYNC_TARGET set BitMap 322462306a36Sopenharmony_ci -100 after split brain, disconnect 322562306a36Sopenharmony_ci-1000 unrelated data 322662306a36Sopenharmony_ci-1091 requires proto 91 322762306a36Sopenharmony_ci-1096 requires proto 96 322862306a36Sopenharmony_ci */ 322962306a36Sopenharmony_ci 323062306a36Sopenharmony_cistatic int drbd_uuid_compare(struct drbd_peer_device *const peer_device, 323162306a36Sopenharmony_ci enum drbd_role const peer_role, int *rule_nr) __must_hold(local) 323262306a36Sopenharmony_ci{ 323362306a36Sopenharmony_ci struct drbd_connection *const connection = peer_device->connection; 323462306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 323562306a36Sopenharmony_ci u64 self, peer; 323662306a36Sopenharmony_ci int i, j; 323762306a36Sopenharmony_ci 323862306a36Sopenharmony_ci self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 323962306a36Sopenharmony_ci peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 324062306a36Sopenharmony_ci 324162306a36Sopenharmony_ci *rule_nr = 10; 324262306a36Sopenharmony_ci if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) 324362306a36Sopenharmony_ci return 0; 324462306a36Sopenharmony_ci 324562306a36Sopenharmony_ci *rule_nr = 20; 324662306a36Sopenharmony_ci if ((self == UUID_JUST_CREATED || self == (u64)0) && 324762306a36Sopenharmony_ci peer != UUID_JUST_CREATED) 324862306a36Sopenharmony_ci return -2; 324962306a36Sopenharmony_ci 325062306a36Sopenharmony_ci *rule_nr = 30; 325162306a36Sopenharmony_ci if (self != UUID_JUST_CREATED && 325262306a36Sopenharmony_ci (peer == UUID_JUST_CREATED || peer == (u64)0)) 325362306a36Sopenharmony_ci return 2; 325462306a36Sopenharmony_ci 325562306a36Sopenharmony_ci if (self == peer) { 325662306a36Sopenharmony_ci int rct, dc; /* roles at crash time */ 325762306a36Sopenharmony_ci 325862306a36Sopenharmony_ci if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { 325962306a36Sopenharmony_ci 326062306a36Sopenharmony_ci if (connection->agreed_pro_version < 91) 326162306a36Sopenharmony_ci return -1091; 326262306a36Sopenharmony_ci 326362306a36Sopenharmony_ci if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && 326462306a36Sopenharmony_ci (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { 326562306a36Sopenharmony_ci drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n"); 326662306a36Sopenharmony_ci drbd_uuid_move_history(device); 326762306a36Sopenharmony_ci device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; 326862306a36Sopenharmony_ci device->ldev->md.uuid[UI_BITMAP] = 0; 326962306a36Sopenharmony_ci 327062306a36Sopenharmony_ci drbd_uuid_dump(device, "self", device->ldev->md.uuid, 327162306a36Sopenharmony_ci device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 327262306a36Sopenharmony_ci *rule_nr = 34; 327362306a36Sopenharmony_ci } else { 327462306a36Sopenharmony_ci drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n"); 327562306a36Sopenharmony_ci *rule_nr = 36; 327662306a36Sopenharmony_ci } 327762306a36Sopenharmony_ci 327862306a36Sopenharmony_ci return 1; 327962306a36Sopenharmony_ci } 328062306a36Sopenharmony_ci 328162306a36Sopenharmony_ci if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { 328262306a36Sopenharmony_ci 328362306a36Sopenharmony_ci if (connection->agreed_pro_version < 91) 328462306a36Sopenharmony_ci return -1091; 328562306a36Sopenharmony_ci 328662306a36Sopenharmony_ci if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && 328762306a36Sopenharmony_ci (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) { 328862306a36Sopenharmony_ci drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n"); 328962306a36Sopenharmony_ci 329062306a36Sopenharmony_ci device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START]; 329162306a36Sopenharmony_ci device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP]; 329262306a36Sopenharmony_ci device->p_uuid[UI_BITMAP] = 0UL; 329362306a36Sopenharmony_ci 329462306a36Sopenharmony_ci drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 329562306a36Sopenharmony_ci *rule_nr = 35; 329662306a36Sopenharmony_ci } else { 329762306a36Sopenharmony_ci drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n"); 329862306a36Sopenharmony_ci *rule_nr = 37; 329962306a36Sopenharmony_ci } 330062306a36Sopenharmony_ci 330162306a36Sopenharmony_ci return -1; 330262306a36Sopenharmony_ci } 330362306a36Sopenharmony_ci 330462306a36Sopenharmony_ci /* Common power [off|failure] */ 330562306a36Sopenharmony_ci rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) + 330662306a36Sopenharmony_ci (device->p_uuid[UI_FLAGS] & 2); 330762306a36Sopenharmony_ci /* lowest bit is set when we were primary, 330862306a36Sopenharmony_ci * next bit (weight 2) is set when peer was primary */ 330962306a36Sopenharmony_ci *rule_nr = 40; 331062306a36Sopenharmony_ci 331162306a36Sopenharmony_ci /* Neither has the "crashed primary" flag set, 331262306a36Sopenharmony_ci * only a replication link hickup. */ 331362306a36Sopenharmony_ci if (rct == 0) 331462306a36Sopenharmony_ci return 0; 331562306a36Sopenharmony_ci 331662306a36Sopenharmony_ci /* Current UUID equal and no bitmap uuid; does not necessarily 331762306a36Sopenharmony_ci * mean this was a "simultaneous hard crash", maybe IO was 331862306a36Sopenharmony_ci * frozen, so no UUID-bump happened. 331962306a36Sopenharmony_ci * This is a protocol change, overload DRBD_FF_WSAME as flag 332062306a36Sopenharmony_ci * for "new-enough" peer DRBD version. */ 332162306a36Sopenharmony_ci if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) { 332262306a36Sopenharmony_ci *rule_nr = 41; 332362306a36Sopenharmony_ci if (!(connection->agreed_features & DRBD_FF_WSAME)) { 332462306a36Sopenharmony_ci drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n"); 332562306a36Sopenharmony_ci return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8)); 332662306a36Sopenharmony_ci } 332762306a36Sopenharmony_ci if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) { 332862306a36Sopenharmony_ci /* At least one has the "crashed primary" bit set, 332962306a36Sopenharmony_ci * both are primary now, but neither has rotated its UUIDs? 333062306a36Sopenharmony_ci * "Can not happen." */ 333162306a36Sopenharmony_ci drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n"); 333262306a36Sopenharmony_ci return -100; 333362306a36Sopenharmony_ci } 333462306a36Sopenharmony_ci if (device->state.role == R_PRIMARY) 333562306a36Sopenharmony_ci return 1; 333662306a36Sopenharmony_ci return -1; 333762306a36Sopenharmony_ci } 333862306a36Sopenharmony_ci 333962306a36Sopenharmony_ci /* Both are secondary. 334062306a36Sopenharmony_ci * Really looks like recovery from simultaneous hard crash. 334162306a36Sopenharmony_ci * Check which had been primary before, and arbitrate. */ 334262306a36Sopenharmony_ci switch (rct) { 334362306a36Sopenharmony_ci case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */ 334462306a36Sopenharmony_ci case 1: /* self_pri && !peer_pri */ return 1; 334562306a36Sopenharmony_ci case 2: /* !self_pri && peer_pri */ return -1; 334662306a36Sopenharmony_ci case 3: /* self_pri && peer_pri */ 334762306a36Sopenharmony_ci dc = test_bit(RESOLVE_CONFLICTS, &connection->flags); 334862306a36Sopenharmony_ci return dc ? -1 : 1; 334962306a36Sopenharmony_ci } 335062306a36Sopenharmony_ci } 335162306a36Sopenharmony_ci 335262306a36Sopenharmony_ci *rule_nr = 50; 335362306a36Sopenharmony_ci peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 335462306a36Sopenharmony_ci if (self == peer) 335562306a36Sopenharmony_ci return -1; 335662306a36Sopenharmony_ci 335762306a36Sopenharmony_ci *rule_nr = 51; 335862306a36Sopenharmony_ci peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); 335962306a36Sopenharmony_ci if (self == peer) { 336062306a36Sopenharmony_ci if (connection->agreed_pro_version < 96 ? 336162306a36Sopenharmony_ci (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == 336262306a36Sopenharmony_ci (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : 336362306a36Sopenharmony_ci peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { 336462306a36Sopenharmony_ci /* The last P_SYNC_UUID did not get though. Undo the last start of 336562306a36Sopenharmony_ci resync as sync source modifications of the peer's UUIDs. */ 336662306a36Sopenharmony_ci 336762306a36Sopenharmony_ci if (connection->agreed_pro_version < 91) 336862306a36Sopenharmony_ci return -1091; 336962306a36Sopenharmony_ci 337062306a36Sopenharmony_ci device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; 337162306a36Sopenharmony_ci device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1]; 337262306a36Sopenharmony_ci 337362306a36Sopenharmony_ci drbd_info(device, "Lost last syncUUID packet, corrected:\n"); 337462306a36Sopenharmony_ci drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 337562306a36Sopenharmony_ci 337662306a36Sopenharmony_ci return -1; 337762306a36Sopenharmony_ci } 337862306a36Sopenharmony_ci } 337962306a36Sopenharmony_ci 338062306a36Sopenharmony_ci *rule_nr = 60; 338162306a36Sopenharmony_ci self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 338262306a36Sopenharmony_ci for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 338362306a36Sopenharmony_ci peer = device->p_uuid[i] & ~((u64)1); 338462306a36Sopenharmony_ci if (self == peer) 338562306a36Sopenharmony_ci return -2; 338662306a36Sopenharmony_ci } 338762306a36Sopenharmony_ci 338862306a36Sopenharmony_ci *rule_nr = 70; 338962306a36Sopenharmony_ci self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 339062306a36Sopenharmony_ci peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 339162306a36Sopenharmony_ci if (self == peer) 339262306a36Sopenharmony_ci return 1; 339362306a36Sopenharmony_ci 339462306a36Sopenharmony_ci *rule_nr = 71; 339562306a36Sopenharmony_ci self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 339662306a36Sopenharmony_ci if (self == peer) { 339762306a36Sopenharmony_ci if (connection->agreed_pro_version < 96 ? 339862306a36Sopenharmony_ci (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == 339962306a36Sopenharmony_ci (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : 340062306a36Sopenharmony_ci self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { 340162306a36Sopenharmony_ci /* The last P_SYNC_UUID did not get though. Undo the last start of 340262306a36Sopenharmony_ci resync as sync source modifications of our UUIDs. */ 340362306a36Sopenharmony_ci 340462306a36Sopenharmony_ci if (connection->agreed_pro_version < 91) 340562306a36Sopenharmony_ci return -1091; 340662306a36Sopenharmony_ci 340762306a36Sopenharmony_ci __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); 340862306a36Sopenharmony_ci __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]); 340962306a36Sopenharmony_ci 341062306a36Sopenharmony_ci drbd_info(device, "Last syncUUID did not get through, corrected:\n"); 341162306a36Sopenharmony_ci drbd_uuid_dump(device, "self", device->ldev->md.uuid, 341262306a36Sopenharmony_ci device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 341362306a36Sopenharmony_ci 341462306a36Sopenharmony_ci return 1; 341562306a36Sopenharmony_ci } 341662306a36Sopenharmony_ci } 341762306a36Sopenharmony_ci 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_ci *rule_nr = 80; 342062306a36Sopenharmony_ci peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 342162306a36Sopenharmony_ci for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 342262306a36Sopenharmony_ci self = device->ldev->md.uuid[i] & ~((u64)1); 342362306a36Sopenharmony_ci if (self == peer) 342462306a36Sopenharmony_ci return 2; 342562306a36Sopenharmony_ci } 342662306a36Sopenharmony_ci 342762306a36Sopenharmony_ci *rule_nr = 90; 342862306a36Sopenharmony_ci self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 342962306a36Sopenharmony_ci peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 343062306a36Sopenharmony_ci if (self == peer && self != ((u64)0)) 343162306a36Sopenharmony_ci return 100; 343262306a36Sopenharmony_ci 343362306a36Sopenharmony_ci *rule_nr = 100; 343462306a36Sopenharmony_ci for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 343562306a36Sopenharmony_ci self = device->ldev->md.uuid[i] & ~((u64)1); 343662306a36Sopenharmony_ci for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { 343762306a36Sopenharmony_ci peer = device->p_uuid[j] & ~((u64)1); 343862306a36Sopenharmony_ci if (self == peer) 343962306a36Sopenharmony_ci return -100; 344062306a36Sopenharmony_ci } 344162306a36Sopenharmony_ci } 344262306a36Sopenharmony_ci 344362306a36Sopenharmony_ci return -1000; 344462306a36Sopenharmony_ci} 344562306a36Sopenharmony_ci 344662306a36Sopenharmony_ci/* drbd_sync_handshake() returns the new conn state on success, or 344762306a36Sopenharmony_ci CONN_MASK (-1) on failure. 344862306a36Sopenharmony_ci */ 344962306a36Sopenharmony_cistatic enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, 345062306a36Sopenharmony_ci enum drbd_role peer_role, 345162306a36Sopenharmony_ci enum drbd_disk_state peer_disk) __must_hold(local) 345262306a36Sopenharmony_ci{ 345362306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 345462306a36Sopenharmony_ci enum drbd_conns rv = C_MASK; 345562306a36Sopenharmony_ci enum drbd_disk_state mydisk; 345662306a36Sopenharmony_ci struct net_conf *nc; 345762306a36Sopenharmony_ci int hg, rule_nr, rr_conflict, tentative, always_asbp; 345862306a36Sopenharmony_ci 345962306a36Sopenharmony_ci mydisk = device->state.disk; 346062306a36Sopenharmony_ci if (mydisk == D_NEGOTIATING) 346162306a36Sopenharmony_ci mydisk = device->new_state_tmp.disk; 346262306a36Sopenharmony_ci 346362306a36Sopenharmony_ci drbd_info(device, "drbd_sync_handshake:\n"); 346462306a36Sopenharmony_ci 346562306a36Sopenharmony_ci spin_lock_irq(&device->ldev->md.uuid_lock); 346662306a36Sopenharmony_ci drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0); 346762306a36Sopenharmony_ci drbd_uuid_dump(device, "peer", device->p_uuid, 346862306a36Sopenharmony_ci device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 346962306a36Sopenharmony_ci 347062306a36Sopenharmony_ci hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr); 347162306a36Sopenharmony_ci spin_unlock_irq(&device->ldev->md.uuid_lock); 347262306a36Sopenharmony_ci 347362306a36Sopenharmony_ci drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); 347462306a36Sopenharmony_ci 347562306a36Sopenharmony_ci if (hg == -1000) { 347662306a36Sopenharmony_ci drbd_alert(device, "Unrelated data, aborting!\n"); 347762306a36Sopenharmony_ci return C_MASK; 347862306a36Sopenharmony_ci } 347962306a36Sopenharmony_ci if (hg < -0x10000) { 348062306a36Sopenharmony_ci int proto, fflags; 348162306a36Sopenharmony_ci hg = -hg; 348262306a36Sopenharmony_ci proto = hg & 0xff; 348362306a36Sopenharmony_ci fflags = (hg >> 8) & 0xff; 348462306a36Sopenharmony_ci drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n", 348562306a36Sopenharmony_ci proto, fflags); 348662306a36Sopenharmony_ci return C_MASK; 348762306a36Sopenharmony_ci } 348862306a36Sopenharmony_ci if (hg < -1000) { 348962306a36Sopenharmony_ci drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); 349062306a36Sopenharmony_ci return C_MASK; 349162306a36Sopenharmony_ci } 349262306a36Sopenharmony_ci 349362306a36Sopenharmony_ci if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) || 349462306a36Sopenharmony_ci (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) { 349562306a36Sopenharmony_ci int f = (hg == -100) || abs(hg) == 2; 349662306a36Sopenharmony_ci hg = mydisk > D_INCONSISTENT ? 1 : -1; 349762306a36Sopenharmony_ci if (f) 349862306a36Sopenharmony_ci hg = hg*2; 349962306a36Sopenharmony_ci drbd_info(device, "Becoming sync %s due to disk states.\n", 350062306a36Sopenharmony_ci hg > 0 ? "source" : "target"); 350162306a36Sopenharmony_ci } 350262306a36Sopenharmony_ci 350362306a36Sopenharmony_ci if (abs(hg) == 100) 350462306a36Sopenharmony_ci drbd_khelper(device, "initial-split-brain"); 350562306a36Sopenharmony_ci 350662306a36Sopenharmony_ci rcu_read_lock(); 350762306a36Sopenharmony_ci nc = rcu_dereference(peer_device->connection->net_conf); 350862306a36Sopenharmony_ci always_asbp = nc->always_asbp; 350962306a36Sopenharmony_ci rr_conflict = nc->rr_conflict; 351062306a36Sopenharmony_ci tentative = nc->tentative; 351162306a36Sopenharmony_ci rcu_read_unlock(); 351262306a36Sopenharmony_ci 351362306a36Sopenharmony_ci if (hg == 100 || (hg == -100 && always_asbp)) { 351462306a36Sopenharmony_ci int pcount = (device->state.role == R_PRIMARY) 351562306a36Sopenharmony_ci + (peer_role == R_PRIMARY); 351662306a36Sopenharmony_ci int forced = (hg == -100); 351762306a36Sopenharmony_ci 351862306a36Sopenharmony_ci switch (pcount) { 351962306a36Sopenharmony_ci case 0: 352062306a36Sopenharmony_ci hg = drbd_asb_recover_0p(peer_device); 352162306a36Sopenharmony_ci break; 352262306a36Sopenharmony_ci case 1: 352362306a36Sopenharmony_ci hg = drbd_asb_recover_1p(peer_device); 352462306a36Sopenharmony_ci break; 352562306a36Sopenharmony_ci case 2: 352662306a36Sopenharmony_ci hg = drbd_asb_recover_2p(peer_device); 352762306a36Sopenharmony_ci break; 352862306a36Sopenharmony_ci } 352962306a36Sopenharmony_ci if (abs(hg) < 100) { 353062306a36Sopenharmony_ci drbd_warn(device, "Split-Brain detected, %d primaries, " 353162306a36Sopenharmony_ci "automatically solved. Sync from %s node\n", 353262306a36Sopenharmony_ci pcount, (hg < 0) ? "peer" : "this"); 353362306a36Sopenharmony_ci if (forced) { 353462306a36Sopenharmony_ci drbd_warn(device, "Doing a full sync, since" 353562306a36Sopenharmony_ci " UUIDs where ambiguous.\n"); 353662306a36Sopenharmony_ci hg = hg*2; 353762306a36Sopenharmony_ci } 353862306a36Sopenharmony_ci } 353962306a36Sopenharmony_ci } 354062306a36Sopenharmony_ci 354162306a36Sopenharmony_ci if (hg == -100) { 354262306a36Sopenharmony_ci if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1)) 354362306a36Sopenharmony_ci hg = -1; 354462306a36Sopenharmony_ci if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1)) 354562306a36Sopenharmony_ci hg = 1; 354662306a36Sopenharmony_ci 354762306a36Sopenharmony_ci if (abs(hg) < 100) 354862306a36Sopenharmony_ci drbd_warn(device, "Split-Brain detected, manually solved. " 354962306a36Sopenharmony_ci "Sync from %s node\n", 355062306a36Sopenharmony_ci (hg < 0) ? "peer" : "this"); 355162306a36Sopenharmony_ci } 355262306a36Sopenharmony_ci 355362306a36Sopenharmony_ci if (hg == -100) { 355462306a36Sopenharmony_ci /* FIXME this log message is not correct if we end up here 355562306a36Sopenharmony_ci * after an attempted attach on a diskless node. 355662306a36Sopenharmony_ci * We just refuse to attach -- well, we drop the "connection" 355762306a36Sopenharmony_ci * to that disk, in a way... */ 355862306a36Sopenharmony_ci drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n"); 355962306a36Sopenharmony_ci drbd_khelper(device, "split-brain"); 356062306a36Sopenharmony_ci return C_MASK; 356162306a36Sopenharmony_ci } 356262306a36Sopenharmony_ci 356362306a36Sopenharmony_ci if (hg > 0 && mydisk <= D_INCONSISTENT) { 356462306a36Sopenharmony_ci drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n"); 356562306a36Sopenharmony_ci return C_MASK; 356662306a36Sopenharmony_ci } 356762306a36Sopenharmony_ci 356862306a36Sopenharmony_ci if (hg < 0 && /* by intention we do not use mydisk here. */ 356962306a36Sopenharmony_ci device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) { 357062306a36Sopenharmony_ci switch (rr_conflict) { 357162306a36Sopenharmony_ci case ASB_CALL_HELPER: 357262306a36Sopenharmony_ci drbd_khelper(device, "pri-lost"); 357362306a36Sopenharmony_ci fallthrough; 357462306a36Sopenharmony_ci case ASB_DISCONNECT: 357562306a36Sopenharmony_ci drbd_err(device, "I shall become SyncTarget, but I am primary!\n"); 357662306a36Sopenharmony_ci return C_MASK; 357762306a36Sopenharmony_ci case ASB_VIOLENTLY: 357862306a36Sopenharmony_ci drbd_warn(device, "Becoming SyncTarget, violating the stable-data" 357962306a36Sopenharmony_ci "assumption\n"); 358062306a36Sopenharmony_ci } 358162306a36Sopenharmony_ci } 358262306a36Sopenharmony_ci 358362306a36Sopenharmony_ci if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) { 358462306a36Sopenharmony_ci if (hg == 0) 358562306a36Sopenharmony_ci drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n"); 358662306a36Sopenharmony_ci else 358762306a36Sopenharmony_ci drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.", 358862306a36Sopenharmony_ci drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), 358962306a36Sopenharmony_ci abs(hg) >= 2 ? "full" : "bit-map based"); 359062306a36Sopenharmony_ci return C_MASK; 359162306a36Sopenharmony_ci } 359262306a36Sopenharmony_ci 359362306a36Sopenharmony_ci if (abs(hg) >= 2) { 359462306a36Sopenharmony_ci drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); 359562306a36Sopenharmony_ci if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", 359662306a36Sopenharmony_ci BM_LOCKED_SET_ALLOWED, NULL)) 359762306a36Sopenharmony_ci return C_MASK; 359862306a36Sopenharmony_ci } 359962306a36Sopenharmony_ci 360062306a36Sopenharmony_ci if (hg > 0) { /* become sync source. */ 360162306a36Sopenharmony_ci rv = C_WF_BITMAP_S; 360262306a36Sopenharmony_ci } else if (hg < 0) { /* become sync target */ 360362306a36Sopenharmony_ci rv = C_WF_BITMAP_T; 360462306a36Sopenharmony_ci } else { 360562306a36Sopenharmony_ci rv = C_CONNECTED; 360662306a36Sopenharmony_ci if (drbd_bm_total_weight(device)) { 360762306a36Sopenharmony_ci drbd_info(device, "No resync, but %lu bits in bitmap!\n", 360862306a36Sopenharmony_ci drbd_bm_total_weight(device)); 360962306a36Sopenharmony_ci } 361062306a36Sopenharmony_ci } 361162306a36Sopenharmony_ci 361262306a36Sopenharmony_ci return rv; 361362306a36Sopenharmony_ci} 361462306a36Sopenharmony_ci 361562306a36Sopenharmony_cistatic enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer) 361662306a36Sopenharmony_ci{ 361762306a36Sopenharmony_ci /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ 361862306a36Sopenharmony_ci if (peer == ASB_DISCARD_REMOTE) 361962306a36Sopenharmony_ci return ASB_DISCARD_LOCAL; 362062306a36Sopenharmony_ci 362162306a36Sopenharmony_ci /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ 362262306a36Sopenharmony_ci if (peer == ASB_DISCARD_LOCAL) 362362306a36Sopenharmony_ci return ASB_DISCARD_REMOTE; 362462306a36Sopenharmony_ci 362562306a36Sopenharmony_ci /* everything else is valid if they are equal on both sides. */ 362662306a36Sopenharmony_ci return peer; 362762306a36Sopenharmony_ci} 362862306a36Sopenharmony_ci 362962306a36Sopenharmony_cistatic int receive_protocol(struct drbd_connection *connection, struct packet_info *pi) 363062306a36Sopenharmony_ci{ 363162306a36Sopenharmony_ci struct p_protocol *p = pi->data; 363262306a36Sopenharmony_ci enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 363362306a36Sopenharmony_ci int p_proto, p_discard_my_data, p_two_primaries, cf; 363462306a36Sopenharmony_ci struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; 363562306a36Sopenharmony_ci char integrity_alg[SHARED_SECRET_MAX] = ""; 363662306a36Sopenharmony_ci struct crypto_shash *peer_integrity_tfm = NULL; 363762306a36Sopenharmony_ci void *int_dig_in = NULL, *int_dig_vv = NULL; 363862306a36Sopenharmony_ci 363962306a36Sopenharmony_ci p_proto = be32_to_cpu(p->protocol); 364062306a36Sopenharmony_ci p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 364162306a36Sopenharmony_ci p_after_sb_1p = be32_to_cpu(p->after_sb_1p); 364262306a36Sopenharmony_ci p_after_sb_2p = be32_to_cpu(p->after_sb_2p); 364362306a36Sopenharmony_ci p_two_primaries = be32_to_cpu(p->two_primaries); 364462306a36Sopenharmony_ci cf = be32_to_cpu(p->conn_flags); 364562306a36Sopenharmony_ci p_discard_my_data = cf & CF_DISCARD_MY_DATA; 364662306a36Sopenharmony_ci 364762306a36Sopenharmony_ci if (connection->agreed_pro_version >= 87) { 364862306a36Sopenharmony_ci int err; 364962306a36Sopenharmony_ci 365062306a36Sopenharmony_ci if (pi->size > sizeof(integrity_alg)) 365162306a36Sopenharmony_ci return -EIO; 365262306a36Sopenharmony_ci err = drbd_recv_all(connection, integrity_alg, pi->size); 365362306a36Sopenharmony_ci if (err) 365462306a36Sopenharmony_ci return err; 365562306a36Sopenharmony_ci integrity_alg[SHARED_SECRET_MAX - 1] = 0; 365662306a36Sopenharmony_ci } 365762306a36Sopenharmony_ci 365862306a36Sopenharmony_ci if (pi->cmd != P_PROTOCOL_UPDATE) { 365962306a36Sopenharmony_ci clear_bit(CONN_DRY_RUN, &connection->flags); 366062306a36Sopenharmony_ci 366162306a36Sopenharmony_ci if (cf & CF_DRY_RUN) 366262306a36Sopenharmony_ci set_bit(CONN_DRY_RUN, &connection->flags); 366362306a36Sopenharmony_ci 366462306a36Sopenharmony_ci rcu_read_lock(); 366562306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 366662306a36Sopenharmony_ci 366762306a36Sopenharmony_ci if (p_proto != nc->wire_protocol) { 366862306a36Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "protocol"); 366962306a36Sopenharmony_ci goto disconnect_rcu_unlock; 367062306a36Sopenharmony_ci } 367162306a36Sopenharmony_ci 367262306a36Sopenharmony_ci if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { 367362306a36Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri"); 367462306a36Sopenharmony_ci goto disconnect_rcu_unlock; 367562306a36Sopenharmony_ci } 367662306a36Sopenharmony_ci 367762306a36Sopenharmony_ci if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { 367862306a36Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri"); 367962306a36Sopenharmony_ci goto disconnect_rcu_unlock; 368062306a36Sopenharmony_ci } 368162306a36Sopenharmony_ci 368262306a36Sopenharmony_ci if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { 368362306a36Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri"); 368462306a36Sopenharmony_ci goto disconnect_rcu_unlock; 368562306a36Sopenharmony_ci } 368662306a36Sopenharmony_ci 368762306a36Sopenharmony_ci if (p_discard_my_data && nc->discard_my_data) { 368862306a36Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "discard-my-data"); 368962306a36Sopenharmony_ci goto disconnect_rcu_unlock; 369062306a36Sopenharmony_ci } 369162306a36Sopenharmony_ci 369262306a36Sopenharmony_ci if (p_two_primaries != nc->two_primaries) { 369362306a36Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries"); 369462306a36Sopenharmony_ci goto disconnect_rcu_unlock; 369562306a36Sopenharmony_ci } 369662306a36Sopenharmony_ci 369762306a36Sopenharmony_ci if (strcmp(integrity_alg, nc->integrity_alg)) { 369862306a36Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg"); 369962306a36Sopenharmony_ci goto disconnect_rcu_unlock; 370062306a36Sopenharmony_ci } 370162306a36Sopenharmony_ci 370262306a36Sopenharmony_ci rcu_read_unlock(); 370362306a36Sopenharmony_ci } 370462306a36Sopenharmony_ci 370562306a36Sopenharmony_ci if (integrity_alg[0]) { 370662306a36Sopenharmony_ci int hash_size; 370762306a36Sopenharmony_ci 370862306a36Sopenharmony_ci /* 370962306a36Sopenharmony_ci * We can only change the peer data integrity algorithm 371062306a36Sopenharmony_ci * here. Changing our own data integrity algorithm 371162306a36Sopenharmony_ci * requires that we send a P_PROTOCOL_UPDATE packet at 371262306a36Sopenharmony_ci * the same time; otherwise, the peer has no way to 371362306a36Sopenharmony_ci * tell between which packets the algorithm should 371462306a36Sopenharmony_ci * change. 371562306a36Sopenharmony_ci */ 371662306a36Sopenharmony_ci 371762306a36Sopenharmony_ci peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0); 371862306a36Sopenharmony_ci if (IS_ERR(peer_integrity_tfm)) { 371962306a36Sopenharmony_ci peer_integrity_tfm = NULL; 372062306a36Sopenharmony_ci drbd_err(connection, "peer data-integrity-alg %s not supported\n", 372162306a36Sopenharmony_ci integrity_alg); 372262306a36Sopenharmony_ci goto disconnect; 372362306a36Sopenharmony_ci } 372462306a36Sopenharmony_ci 372562306a36Sopenharmony_ci hash_size = crypto_shash_digestsize(peer_integrity_tfm); 372662306a36Sopenharmony_ci int_dig_in = kmalloc(hash_size, GFP_KERNEL); 372762306a36Sopenharmony_ci int_dig_vv = kmalloc(hash_size, GFP_KERNEL); 372862306a36Sopenharmony_ci if (!(int_dig_in && int_dig_vv)) { 372962306a36Sopenharmony_ci drbd_err(connection, "Allocation of buffers for data integrity checking failed\n"); 373062306a36Sopenharmony_ci goto disconnect; 373162306a36Sopenharmony_ci } 373262306a36Sopenharmony_ci } 373362306a36Sopenharmony_ci 373462306a36Sopenharmony_ci new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); 373562306a36Sopenharmony_ci if (!new_net_conf) 373662306a36Sopenharmony_ci goto disconnect; 373762306a36Sopenharmony_ci 373862306a36Sopenharmony_ci mutex_lock(&connection->data.mutex); 373962306a36Sopenharmony_ci mutex_lock(&connection->resource->conf_update); 374062306a36Sopenharmony_ci old_net_conf = connection->net_conf; 374162306a36Sopenharmony_ci *new_net_conf = *old_net_conf; 374262306a36Sopenharmony_ci 374362306a36Sopenharmony_ci new_net_conf->wire_protocol = p_proto; 374462306a36Sopenharmony_ci new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p); 374562306a36Sopenharmony_ci new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p); 374662306a36Sopenharmony_ci new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); 374762306a36Sopenharmony_ci new_net_conf->two_primaries = p_two_primaries; 374862306a36Sopenharmony_ci 374962306a36Sopenharmony_ci rcu_assign_pointer(connection->net_conf, new_net_conf); 375062306a36Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 375162306a36Sopenharmony_ci mutex_unlock(&connection->data.mutex); 375262306a36Sopenharmony_ci 375362306a36Sopenharmony_ci crypto_free_shash(connection->peer_integrity_tfm); 375462306a36Sopenharmony_ci kfree(connection->int_dig_in); 375562306a36Sopenharmony_ci kfree(connection->int_dig_vv); 375662306a36Sopenharmony_ci connection->peer_integrity_tfm = peer_integrity_tfm; 375762306a36Sopenharmony_ci connection->int_dig_in = int_dig_in; 375862306a36Sopenharmony_ci connection->int_dig_vv = int_dig_vv; 375962306a36Sopenharmony_ci 376062306a36Sopenharmony_ci if (strcmp(old_net_conf->integrity_alg, integrity_alg)) 376162306a36Sopenharmony_ci drbd_info(connection, "peer data-integrity-alg: %s\n", 376262306a36Sopenharmony_ci integrity_alg[0] ? integrity_alg : "(none)"); 376362306a36Sopenharmony_ci 376462306a36Sopenharmony_ci kvfree_rcu_mightsleep(old_net_conf); 376562306a36Sopenharmony_ci return 0; 376662306a36Sopenharmony_ci 376762306a36Sopenharmony_cidisconnect_rcu_unlock: 376862306a36Sopenharmony_ci rcu_read_unlock(); 376962306a36Sopenharmony_cidisconnect: 377062306a36Sopenharmony_ci crypto_free_shash(peer_integrity_tfm); 377162306a36Sopenharmony_ci kfree(int_dig_in); 377262306a36Sopenharmony_ci kfree(int_dig_vv); 377362306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 377462306a36Sopenharmony_ci return -EIO; 377562306a36Sopenharmony_ci} 377662306a36Sopenharmony_ci 377762306a36Sopenharmony_ci/* helper function 377862306a36Sopenharmony_ci * input: alg name, feature name 377962306a36Sopenharmony_ci * return: NULL (alg name was "") 378062306a36Sopenharmony_ci * ERR_PTR(error) if something goes wrong 378162306a36Sopenharmony_ci * or the crypto hash ptr, if it worked out ok. */ 378262306a36Sopenharmony_cistatic struct crypto_shash *drbd_crypto_alloc_digest_safe( 378362306a36Sopenharmony_ci const struct drbd_device *device, 378462306a36Sopenharmony_ci const char *alg, const char *name) 378562306a36Sopenharmony_ci{ 378662306a36Sopenharmony_ci struct crypto_shash *tfm; 378762306a36Sopenharmony_ci 378862306a36Sopenharmony_ci if (!alg[0]) 378962306a36Sopenharmony_ci return NULL; 379062306a36Sopenharmony_ci 379162306a36Sopenharmony_ci tfm = crypto_alloc_shash(alg, 0, 0); 379262306a36Sopenharmony_ci if (IS_ERR(tfm)) { 379362306a36Sopenharmony_ci drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n", 379462306a36Sopenharmony_ci alg, name, PTR_ERR(tfm)); 379562306a36Sopenharmony_ci return tfm; 379662306a36Sopenharmony_ci } 379762306a36Sopenharmony_ci return tfm; 379862306a36Sopenharmony_ci} 379962306a36Sopenharmony_ci 380062306a36Sopenharmony_cistatic int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi) 380162306a36Sopenharmony_ci{ 380262306a36Sopenharmony_ci void *buffer = connection->data.rbuf; 380362306a36Sopenharmony_ci int size = pi->size; 380462306a36Sopenharmony_ci 380562306a36Sopenharmony_ci while (size) { 380662306a36Sopenharmony_ci int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE); 380762306a36Sopenharmony_ci s = drbd_recv(connection, buffer, s); 380862306a36Sopenharmony_ci if (s <= 0) { 380962306a36Sopenharmony_ci if (s < 0) 381062306a36Sopenharmony_ci return s; 381162306a36Sopenharmony_ci break; 381262306a36Sopenharmony_ci } 381362306a36Sopenharmony_ci size -= s; 381462306a36Sopenharmony_ci } 381562306a36Sopenharmony_ci if (size) 381662306a36Sopenharmony_ci return -EIO; 381762306a36Sopenharmony_ci return 0; 381862306a36Sopenharmony_ci} 381962306a36Sopenharmony_ci 382062306a36Sopenharmony_ci/* 382162306a36Sopenharmony_ci * config_unknown_volume - device configuration command for unknown volume 382262306a36Sopenharmony_ci * 382362306a36Sopenharmony_ci * When a device is added to an existing connection, the node on which the 382462306a36Sopenharmony_ci * device is added first will send configuration commands to its peer but the 382562306a36Sopenharmony_ci * peer will not know about the device yet. It will warn and ignore these 382662306a36Sopenharmony_ci * commands. Once the device is added on the second node, the second node will 382762306a36Sopenharmony_ci * send the same device configuration commands, but in the other direction. 382862306a36Sopenharmony_ci * 382962306a36Sopenharmony_ci * (We can also end up here if drbd is misconfigured.) 383062306a36Sopenharmony_ci */ 383162306a36Sopenharmony_cistatic int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi) 383262306a36Sopenharmony_ci{ 383362306a36Sopenharmony_ci drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n", 383462306a36Sopenharmony_ci cmdname(pi->cmd), pi->vnr); 383562306a36Sopenharmony_ci return ignore_remaining_packet(connection, pi); 383662306a36Sopenharmony_ci} 383762306a36Sopenharmony_ci 383862306a36Sopenharmony_cistatic int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi) 383962306a36Sopenharmony_ci{ 384062306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 384162306a36Sopenharmony_ci struct drbd_device *device; 384262306a36Sopenharmony_ci struct p_rs_param_95 *p; 384362306a36Sopenharmony_ci unsigned int header_size, data_size, exp_max_sz; 384462306a36Sopenharmony_ci struct crypto_shash *verify_tfm = NULL; 384562306a36Sopenharmony_ci struct crypto_shash *csums_tfm = NULL; 384662306a36Sopenharmony_ci struct net_conf *old_net_conf, *new_net_conf = NULL; 384762306a36Sopenharmony_ci struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; 384862306a36Sopenharmony_ci const int apv = connection->agreed_pro_version; 384962306a36Sopenharmony_ci struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 385062306a36Sopenharmony_ci unsigned int fifo_size = 0; 385162306a36Sopenharmony_ci int err; 385262306a36Sopenharmony_ci 385362306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 385462306a36Sopenharmony_ci if (!peer_device) 385562306a36Sopenharmony_ci return config_unknown_volume(connection, pi); 385662306a36Sopenharmony_ci device = peer_device->device; 385762306a36Sopenharmony_ci 385862306a36Sopenharmony_ci exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 385962306a36Sopenharmony_ci : apv == 88 ? sizeof(struct p_rs_param) 386062306a36Sopenharmony_ci + SHARED_SECRET_MAX 386162306a36Sopenharmony_ci : apv <= 94 ? sizeof(struct p_rs_param_89) 386262306a36Sopenharmony_ci : /* apv >= 95 */ sizeof(struct p_rs_param_95); 386362306a36Sopenharmony_ci 386462306a36Sopenharmony_ci if (pi->size > exp_max_sz) { 386562306a36Sopenharmony_ci drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n", 386662306a36Sopenharmony_ci pi->size, exp_max_sz); 386762306a36Sopenharmony_ci return -EIO; 386862306a36Sopenharmony_ci } 386962306a36Sopenharmony_ci 387062306a36Sopenharmony_ci if (apv <= 88) { 387162306a36Sopenharmony_ci header_size = sizeof(struct p_rs_param); 387262306a36Sopenharmony_ci data_size = pi->size - header_size; 387362306a36Sopenharmony_ci } else if (apv <= 94) { 387462306a36Sopenharmony_ci header_size = sizeof(struct p_rs_param_89); 387562306a36Sopenharmony_ci data_size = pi->size - header_size; 387662306a36Sopenharmony_ci D_ASSERT(device, data_size == 0); 387762306a36Sopenharmony_ci } else { 387862306a36Sopenharmony_ci header_size = sizeof(struct p_rs_param_95); 387962306a36Sopenharmony_ci data_size = pi->size - header_size; 388062306a36Sopenharmony_ci D_ASSERT(device, data_size == 0); 388162306a36Sopenharmony_ci } 388262306a36Sopenharmony_ci 388362306a36Sopenharmony_ci /* initialize verify_alg and csums_alg */ 388462306a36Sopenharmony_ci p = pi->data; 388562306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX); 388662306a36Sopenharmony_ci memset(&p->algs, 0, sizeof(p->algs)); 388762306a36Sopenharmony_ci 388862306a36Sopenharmony_ci err = drbd_recv_all(peer_device->connection, p, header_size); 388962306a36Sopenharmony_ci if (err) 389062306a36Sopenharmony_ci return err; 389162306a36Sopenharmony_ci 389262306a36Sopenharmony_ci mutex_lock(&connection->resource->conf_update); 389362306a36Sopenharmony_ci old_net_conf = peer_device->connection->net_conf; 389462306a36Sopenharmony_ci if (get_ldev(device)) { 389562306a36Sopenharmony_ci new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 389662306a36Sopenharmony_ci if (!new_disk_conf) { 389762306a36Sopenharmony_ci put_ldev(device); 389862306a36Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 389962306a36Sopenharmony_ci drbd_err(device, "Allocation of new disk_conf failed\n"); 390062306a36Sopenharmony_ci return -ENOMEM; 390162306a36Sopenharmony_ci } 390262306a36Sopenharmony_ci 390362306a36Sopenharmony_ci old_disk_conf = device->ldev->disk_conf; 390462306a36Sopenharmony_ci *new_disk_conf = *old_disk_conf; 390562306a36Sopenharmony_ci 390662306a36Sopenharmony_ci new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); 390762306a36Sopenharmony_ci } 390862306a36Sopenharmony_ci 390962306a36Sopenharmony_ci if (apv >= 88) { 391062306a36Sopenharmony_ci if (apv == 88) { 391162306a36Sopenharmony_ci if (data_size > SHARED_SECRET_MAX || data_size == 0) { 391262306a36Sopenharmony_ci drbd_err(device, "verify-alg of wrong size, " 391362306a36Sopenharmony_ci "peer wants %u, accepting only up to %u byte\n", 391462306a36Sopenharmony_ci data_size, SHARED_SECRET_MAX); 391562306a36Sopenharmony_ci goto reconnect; 391662306a36Sopenharmony_ci } 391762306a36Sopenharmony_ci 391862306a36Sopenharmony_ci err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size); 391962306a36Sopenharmony_ci if (err) 392062306a36Sopenharmony_ci goto reconnect; 392162306a36Sopenharmony_ci /* we expect NUL terminated string */ 392262306a36Sopenharmony_ci /* but just in case someone tries to be evil */ 392362306a36Sopenharmony_ci D_ASSERT(device, p->verify_alg[data_size-1] == 0); 392462306a36Sopenharmony_ci p->verify_alg[data_size-1] = 0; 392562306a36Sopenharmony_ci 392662306a36Sopenharmony_ci } else /* apv >= 89 */ { 392762306a36Sopenharmony_ci /* we still expect NUL terminated strings */ 392862306a36Sopenharmony_ci /* but just in case someone tries to be evil */ 392962306a36Sopenharmony_ci D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0); 393062306a36Sopenharmony_ci D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0); 393162306a36Sopenharmony_ci p->verify_alg[SHARED_SECRET_MAX-1] = 0; 393262306a36Sopenharmony_ci p->csums_alg[SHARED_SECRET_MAX-1] = 0; 393362306a36Sopenharmony_ci } 393462306a36Sopenharmony_ci 393562306a36Sopenharmony_ci if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { 393662306a36Sopenharmony_ci if (device->state.conn == C_WF_REPORT_PARAMS) { 393762306a36Sopenharmony_ci drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", 393862306a36Sopenharmony_ci old_net_conf->verify_alg, p->verify_alg); 393962306a36Sopenharmony_ci goto disconnect; 394062306a36Sopenharmony_ci } 394162306a36Sopenharmony_ci verify_tfm = drbd_crypto_alloc_digest_safe(device, 394262306a36Sopenharmony_ci p->verify_alg, "verify-alg"); 394362306a36Sopenharmony_ci if (IS_ERR(verify_tfm)) { 394462306a36Sopenharmony_ci verify_tfm = NULL; 394562306a36Sopenharmony_ci goto disconnect; 394662306a36Sopenharmony_ci } 394762306a36Sopenharmony_ci } 394862306a36Sopenharmony_ci 394962306a36Sopenharmony_ci if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) { 395062306a36Sopenharmony_ci if (device->state.conn == C_WF_REPORT_PARAMS) { 395162306a36Sopenharmony_ci drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", 395262306a36Sopenharmony_ci old_net_conf->csums_alg, p->csums_alg); 395362306a36Sopenharmony_ci goto disconnect; 395462306a36Sopenharmony_ci } 395562306a36Sopenharmony_ci csums_tfm = drbd_crypto_alloc_digest_safe(device, 395662306a36Sopenharmony_ci p->csums_alg, "csums-alg"); 395762306a36Sopenharmony_ci if (IS_ERR(csums_tfm)) { 395862306a36Sopenharmony_ci csums_tfm = NULL; 395962306a36Sopenharmony_ci goto disconnect; 396062306a36Sopenharmony_ci } 396162306a36Sopenharmony_ci } 396262306a36Sopenharmony_ci 396362306a36Sopenharmony_ci if (apv > 94 && new_disk_conf) { 396462306a36Sopenharmony_ci new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); 396562306a36Sopenharmony_ci new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); 396662306a36Sopenharmony_ci new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); 396762306a36Sopenharmony_ci new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); 396862306a36Sopenharmony_ci 396962306a36Sopenharmony_ci fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; 397062306a36Sopenharmony_ci if (fifo_size != device->rs_plan_s->size) { 397162306a36Sopenharmony_ci new_plan = fifo_alloc(fifo_size); 397262306a36Sopenharmony_ci if (!new_plan) { 397362306a36Sopenharmony_ci drbd_err(device, "kmalloc of fifo_buffer failed"); 397462306a36Sopenharmony_ci put_ldev(device); 397562306a36Sopenharmony_ci goto disconnect; 397662306a36Sopenharmony_ci } 397762306a36Sopenharmony_ci } 397862306a36Sopenharmony_ci } 397962306a36Sopenharmony_ci 398062306a36Sopenharmony_ci if (verify_tfm || csums_tfm) { 398162306a36Sopenharmony_ci new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 398262306a36Sopenharmony_ci if (!new_net_conf) 398362306a36Sopenharmony_ci goto disconnect; 398462306a36Sopenharmony_ci 398562306a36Sopenharmony_ci *new_net_conf = *old_net_conf; 398662306a36Sopenharmony_ci 398762306a36Sopenharmony_ci if (verify_tfm) { 398862306a36Sopenharmony_ci strcpy(new_net_conf->verify_alg, p->verify_alg); 398962306a36Sopenharmony_ci new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; 399062306a36Sopenharmony_ci crypto_free_shash(peer_device->connection->verify_tfm); 399162306a36Sopenharmony_ci peer_device->connection->verify_tfm = verify_tfm; 399262306a36Sopenharmony_ci drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); 399362306a36Sopenharmony_ci } 399462306a36Sopenharmony_ci if (csums_tfm) { 399562306a36Sopenharmony_ci strcpy(new_net_conf->csums_alg, p->csums_alg); 399662306a36Sopenharmony_ci new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; 399762306a36Sopenharmony_ci crypto_free_shash(peer_device->connection->csums_tfm); 399862306a36Sopenharmony_ci peer_device->connection->csums_tfm = csums_tfm; 399962306a36Sopenharmony_ci drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg); 400062306a36Sopenharmony_ci } 400162306a36Sopenharmony_ci rcu_assign_pointer(connection->net_conf, new_net_conf); 400262306a36Sopenharmony_ci } 400362306a36Sopenharmony_ci } 400462306a36Sopenharmony_ci 400562306a36Sopenharmony_ci if (new_disk_conf) { 400662306a36Sopenharmony_ci rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 400762306a36Sopenharmony_ci put_ldev(device); 400862306a36Sopenharmony_ci } 400962306a36Sopenharmony_ci 401062306a36Sopenharmony_ci if (new_plan) { 401162306a36Sopenharmony_ci old_plan = device->rs_plan_s; 401262306a36Sopenharmony_ci rcu_assign_pointer(device->rs_plan_s, new_plan); 401362306a36Sopenharmony_ci } 401462306a36Sopenharmony_ci 401562306a36Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 401662306a36Sopenharmony_ci synchronize_rcu(); 401762306a36Sopenharmony_ci if (new_net_conf) 401862306a36Sopenharmony_ci kfree(old_net_conf); 401962306a36Sopenharmony_ci kfree(old_disk_conf); 402062306a36Sopenharmony_ci kfree(old_plan); 402162306a36Sopenharmony_ci 402262306a36Sopenharmony_ci return 0; 402362306a36Sopenharmony_ci 402462306a36Sopenharmony_cireconnect: 402562306a36Sopenharmony_ci if (new_disk_conf) { 402662306a36Sopenharmony_ci put_ldev(device); 402762306a36Sopenharmony_ci kfree(new_disk_conf); 402862306a36Sopenharmony_ci } 402962306a36Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 403062306a36Sopenharmony_ci return -EIO; 403162306a36Sopenharmony_ci 403262306a36Sopenharmony_cidisconnect: 403362306a36Sopenharmony_ci kfree(new_plan); 403462306a36Sopenharmony_ci if (new_disk_conf) { 403562306a36Sopenharmony_ci put_ldev(device); 403662306a36Sopenharmony_ci kfree(new_disk_conf); 403762306a36Sopenharmony_ci } 403862306a36Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 403962306a36Sopenharmony_ci /* just for completeness: actually not needed, 404062306a36Sopenharmony_ci * as this is not reached if csums_tfm was ok. */ 404162306a36Sopenharmony_ci crypto_free_shash(csums_tfm); 404262306a36Sopenharmony_ci /* but free the verify_tfm again, if csums_tfm did not work out */ 404362306a36Sopenharmony_ci crypto_free_shash(verify_tfm); 404462306a36Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 404562306a36Sopenharmony_ci return -EIO; 404662306a36Sopenharmony_ci} 404762306a36Sopenharmony_ci 404862306a36Sopenharmony_ci/* warn if the arguments differ by more than 12.5% */ 404962306a36Sopenharmony_cistatic void warn_if_differ_considerably(struct drbd_device *device, 405062306a36Sopenharmony_ci const char *s, sector_t a, sector_t b) 405162306a36Sopenharmony_ci{ 405262306a36Sopenharmony_ci sector_t d; 405362306a36Sopenharmony_ci if (a == 0 || b == 0) 405462306a36Sopenharmony_ci return; 405562306a36Sopenharmony_ci d = (a > b) ? (a - b) : (b - a); 405662306a36Sopenharmony_ci if (d > (a>>3) || d > (b>>3)) 405762306a36Sopenharmony_ci drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s, 405862306a36Sopenharmony_ci (unsigned long long)a, (unsigned long long)b); 405962306a36Sopenharmony_ci} 406062306a36Sopenharmony_ci 406162306a36Sopenharmony_cistatic int receive_sizes(struct drbd_connection *connection, struct packet_info *pi) 406262306a36Sopenharmony_ci{ 406362306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 406462306a36Sopenharmony_ci struct drbd_device *device; 406562306a36Sopenharmony_ci struct p_sizes *p = pi->data; 406662306a36Sopenharmony_ci struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL; 406762306a36Sopenharmony_ci enum determine_dev_size dd = DS_UNCHANGED; 406862306a36Sopenharmony_ci sector_t p_size, p_usize, p_csize, my_usize; 406962306a36Sopenharmony_ci sector_t new_size, cur_size; 407062306a36Sopenharmony_ci int ldsc = 0; /* local disk size changed */ 407162306a36Sopenharmony_ci enum dds_flags ddsf; 407262306a36Sopenharmony_ci 407362306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 407462306a36Sopenharmony_ci if (!peer_device) 407562306a36Sopenharmony_ci return config_unknown_volume(connection, pi); 407662306a36Sopenharmony_ci device = peer_device->device; 407762306a36Sopenharmony_ci cur_size = get_capacity(device->vdisk); 407862306a36Sopenharmony_ci 407962306a36Sopenharmony_ci p_size = be64_to_cpu(p->d_size); 408062306a36Sopenharmony_ci p_usize = be64_to_cpu(p->u_size); 408162306a36Sopenharmony_ci p_csize = be64_to_cpu(p->c_size); 408262306a36Sopenharmony_ci 408362306a36Sopenharmony_ci /* just store the peer's disk size for now. 408462306a36Sopenharmony_ci * we still need to figure out whether we accept that. */ 408562306a36Sopenharmony_ci device->p_size = p_size; 408662306a36Sopenharmony_ci 408762306a36Sopenharmony_ci if (get_ldev(device)) { 408862306a36Sopenharmony_ci rcu_read_lock(); 408962306a36Sopenharmony_ci my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; 409062306a36Sopenharmony_ci rcu_read_unlock(); 409162306a36Sopenharmony_ci 409262306a36Sopenharmony_ci warn_if_differ_considerably(device, "lower level device sizes", 409362306a36Sopenharmony_ci p_size, drbd_get_max_capacity(device->ldev)); 409462306a36Sopenharmony_ci warn_if_differ_considerably(device, "user requested size", 409562306a36Sopenharmony_ci p_usize, my_usize); 409662306a36Sopenharmony_ci 409762306a36Sopenharmony_ci /* if this is the first connect, or an otherwise expected 409862306a36Sopenharmony_ci * param exchange, choose the minimum */ 409962306a36Sopenharmony_ci if (device->state.conn == C_WF_REPORT_PARAMS) 410062306a36Sopenharmony_ci p_usize = min_not_zero(my_usize, p_usize); 410162306a36Sopenharmony_ci 410262306a36Sopenharmony_ci /* Never shrink a device with usable data during connect, 410362306a36Sopenharmony_ci * or "attach" on the peer. 410462306a36Sopenharmony_ci * But allow online shrinking if we are connected. */ 410562306a36Sopenharmony_ci new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0); 410662306a36Sopenharmony_ci if (new_size < cur_size && 410762306a36Sopenharmony_ci device->state.disk >= D_OUTDATED && 410862306a36Sopenharmony_ci (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) { 410962306a36Sopenharmony_ci drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n", 411062306a36Sopenharmony_ci (unsigned long long)new_size, (unsigned long long)cur_size); 411162306a36Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 411262306a36Sopenharmony_ci put_ldev(device); 411362306a36Sopenharmony_ci return -EIO; 411462306a36Sopenharmony_ci } 411562306a36Sopenharmony_ci 411662306a36Sopenharmony_ci if (my_usize != p_usize) { 411762306a36Sopenharmony_ci struct disk_conf *old_disk_conf, *new_disk_conf = NULL; 411862306a36Sopenharmony_ci 411962306a36Sopenharmony_ci new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 412062306a36Sopenharmony_ci if (!new_disk_conf) { 412162306a36Sopenharmony_ci put_ldev(device); 412262306a36Sopenharmony_ci return -ENOMEM; 412362306a36Sopenharmony_ci } 412462306a36Sopenharmony_ci 412562306a36Sopenharmony_ci mutex_lock(&connection->resource->conf_update); 412662306a36Sopenharmony_ci old_disk_conf = device->ldev->disk_conf; 412762306a36Sopenharmony_ci *new_disk_conf = *old_disk_conf; 412862306a36Sopenharmony_ci new_disk_conf->disk_size = p_usize; 412962306a36Sopenharmony_ci 413062306a36Sopenharmony_ci rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 413162306a36Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 413262306a36Sopenharmony_ci kvfree_rcu_mightsleep(old_disk_conf); 413362306a36Sopenharmony_ci 413462306a36Sopenharmony_ci drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n", 413562306a36Sopenharmony_ci (unsigned long)p_usize, (unsigned long)my_usize); 413662306a36Sopenharmony_ci } 413762306a36Sopenharmony_ci 413862306a36Sopenharmony_ci put_ldev(device); 413962306a36Sopenharmony_ci } 414062306a36Sopenharmony_ci 414162306a36Sopenharmony_ci device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); 414262306a36Sopenharmony_ci /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size(). 414362306a36Sopenharmony_ci In case we cleared the QUEUE_FLAG_DISCARD from our queue in 414462306a36Sopenharmony_ci drbd_reconsider_queue_parameters(), we can be sure that after 414562306a36Sopenharmony_ci drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ 414662306a36Sopenharmony_ci 414762306a36Sopenharmony_ci ddsf = be16_to_cpu(p->dds_flags); 414862306a36Sopenharmony_ci if (get_ldev(device)) { 414962306a36Sopenharmony_ci drbd_reconsider_queue_parameters(device, device->ldev, o); 415062306a36Sopenharmony_ci dd = drbd_determine_dev_size(device, ddsf, NULL); 415162306a36Sopenharmony_ci put_ldev(device); 415262306a36Sopenharmony_ci if (dd == DS_ERROR) 415362306a36Sopenharmony_ci return -EIO; 415462306a36Sopenharmony_ci drbd_md_sync(device); 415562306a36Sopenharmony_ci } else { 415662306a36Sopenharmony_ci /* 415762306a36Sopenharmony_ci * I am diskless, need to accept the peer's *current* size. 415862306a36Sopenharmony_ci * I must NOT accept the peers backing disk size, 415962306a36Sopenharmony_ci * it may have been larger than mine all along... 416062306a36Sopenharmony_ci * 416162306a36Sopenharmony_ci * At this point, the peer knows more about my disk, or at 416262306a36Sopenharmony_ci * least about what we last agreed upon, than myself. 416362306a36Sopenharmony_ci * So if his c_size is less than his d_size, the most likely 416462306a36Sopenharmony_ci * reason is that *my* d_size was smaller last time we checked. 416562306a36Sopenharmony_ci * 416662306a36Sopenharmony_ci * However, if he sends a zero current size, 416762306a36Sopenharmony_ci * take his (user-capped or) backing disk size anyways. 416862306a36Sopenharmony_ci * 416962306a36Sopenharmony_ci * Unless of course he does not have a disk himself. 417062306a36Sopenharmony_ci * In which case we ignore this completely. 417162306a36Sopenharmony_ci */ 417262306a36Sopenharmony_ci sector_t new_size = p_csize ?: p_usize ?: p_size; 417362306a36Sopenharmony_ci drbd_reconsider_queue_parameters(device, NULL, o); 417462306a36Sopenharmony_ci if (new_size == 0) { 417562306a36Sopenharmony_ci /* Ignore, peer does not know nothing. */ 417662306a36Sopenharmony_ci } else if (new_size == cur_size) { 417762306a36Sopenharmony_ci /* nothing to do */ 417862306a36Sopenharmony_ci } else if (cur_size != 0 && p_size == 0) { 417962306a36Sopenharmony_ci drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n", 418062306a36Sopenharmony_ci (unsigned long long)new_size, (unsigned long long)cur_size); 418162306a36Sopenharmony_ci } else if (new_size < cur_size && device->state.role == R_PRIMARY) { 418262306a36Sopenharmony_ci drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n", 418362306a36Sopenharmony_ci (unsigned long long)new_size, (unsigned long long)cur_size); 418462306a36Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 418562306a36Sopenharmony_ci return -EIO; 418662306a36Sopenharmony_ci } else { 418762306a36Sopenharmony_ci /* I believe the peer, if 418862306a36Sopenharmony_ci * - I don't have a current size myself 418962306a36Sopenharmony_ci * - we agree on the size anyways 419062306a36Sopenharmony_ci * - I do have a current size, am Secondary, 419162306a36Sopenharmony_ci * and he has the only disk 419262306a36Sopenharmony_ci * - I do have a current size, am Primary, 419362306a36Sopenharmony_ci * and he has the only disk, 419462306a36Sopenharmony_ci * which is larger than my current size 419562306a36Sopenharmony_ci */ 419662306a36Sopenharmony_ci drbd_set_my_capacity(device, new_size); 419762306a36Sopenharmony_ci } 419862306a36Sopenharmony_ci } 419962306a36Sopenharmony_ci 420062306a36Sopenharmony_ci if (get_ldev(device)) { 420162306a36Sopenharmony_ci if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { 420262306a36Sopenharmony_ci device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); 420362306a36Sopenharmony_ci ldsc = 1; 420462306a36Sopenharmony_ci } 420562306a36Sopenharmony_ci 420662306a36Sopenharmony_ci put_ldev(device); 420762306a36Sopenharmony_ci } 420862306a36Sopenharmony_ci 420962306a36Sopenharmony_ci if (device->state.conn > C_WF_REPORT_PARAMS) { 421062306a36Sopenharmony_ci if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) || 421162306a36Sopenharmony_ci ldsc) { 421262306a36Sopenharmony_ci /* we have different sizes, probably peer 421362306a36Sopenharmony_ci * needs to know my new size... */ 421462306a36Sopenharmony_ci drbd_send_sizes(peer_device, 0, ddsf); 421562306a36Sopenharmony_ci } 421662306a36Sopenharmony_ci if (test_and_clear_bit(RESIZE_PENDING, &device->flags) || 421762306a36Sopenharmony_ci (dd == DS_GREW && device->state.conn == C_CONNECTED)) { 421862306a36Sopenharmony_ci if (device->state.pdsk >= D_INCONSISTENT && 421962306a36Sopenharmony_ci device->state.disk >= D_INCONSISTENT) { 422062306a36Sopenharmony_ci if (ddsf & DDSF_NO_RESYNC) 422162306a36Sopenharmony_ci drbd_info(device, "Resync of new storage suppressed with --assume-clean\n"); 422262306a36Sopenharmony_ci else 422362306a36Sopenharmony_ci resync_after_online_grow(device); 422462306a36Sopenharmony_ci } else 422562306a36Sopenharmony_ci set_bit(RESYNC_AFTER_NEG, &device->flags); 422662306a36Sopenharmony_ci } 422762306a36Sopenharmony_ci } 422862306a36Sopenharmony_ci 422962306a36Sopenharmony_ci return 0; 423062306a36Sopenharmony_ci} 423162306a36Sopenharmony_ci 423262306a36Sopenharmony_cistatic int receive_uuids(struct drbd_connection *connection, struct packet_info *pi) 423362306a36Sopenharmony_ci{ 423462306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 423562306a36Sopenharmony_ci struct drbd_device *device; 423662306a36Sopenharmony_ci struct p_uuids *p = pi->data; 423762306a36Sopenharmony_ci u64 *p_uuid; 423862306a36Sopenharmony_ci int i, updated_uuids = 0; 423962306a36Sopenharmony_ci 424062306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 424162306a36Sopenharmony_ci if (!peer_device) 424262306a36Sopenharmony_ci return config_unknown_volume(connection, pi); 424362306a36Sopenharmony_ci device = peer_device->device; 424462306a36Sopenharmony_ci 424562306a36Sopenharmony_ci p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO); 424662306a36Sopenharmony_ci if (!p_uuid) 424762306a36Sopenharmony_ci return false; 424862306a36Sopenharmony_ci 424962306a36Sopenharmony_ci for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 425062306a36Sopenharmony_ci p_uuid[i] = be64_to_cpu(p->uuid[i]); 425162306a36Sopenharmony_ci 425262306a36Sopenharmony_ci kfree(device->p_uuid); 425362306a36Sopenharmony_ci device->p_uuid = p_uuid; 425462306a36Sopenharmony_ci 425562306a36Sopenharmony_ci if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) && 425662306a36Sopenharmony_ci device->state.disk < D_INCONSISTENT && 425762306a36Sopenharmony_ci device->state.role == R_PRIMARY && 425862306a36Sopenharmony_ci (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { 425962306a36Sopenharmony_ci drbd_err(device, "Can only connect to data with current UUID=%016llX\n", 426062306a36Sopenharmony_ci (unsigned long long)device->ed_uuid); 426162306a36Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 426262306a36Sopenharmony_ci return -EIO; 426362306a36Sopenharmony_ci } 426462306a36Sopenharmony_ci 426562306a36Sopenharmony_ci if (get_ldev(device)) { 426662306a36Sopenharmony_ci int skip_initial_sync = 426762306a36Sopenharmony_ci device->state.conn == C_CONNECTED && 426862306a36Sopenharmony_ci peer_device->connection->agreed_pro_version >= 90 && 426962306a36Sopenharmony_ci device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && 427062306a36Sopenharmony_ci (p_uuid[UI_FLAGS] & 8); 427162306a36Sopenharmony_ci if (skip_initial_sync) { 427262306a36Sopenharmony_ci drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); 427362306a36Sopenharmony_ci drbd_bitmap_io(device, &drbd_bmio_clear_n_write, 427462306a36Sopenharmony_ci "clear_n_write from receive_uuids", 427562306a36Sopenharmony_ci BM_LOCKED_TEST_ALLOWED, NULL); 427662306a36Sopenharmony_ci _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); 427762306a36Sopenharmony_ci _drbd_uuid_set(device, UI_BITMAP, 0); 427862306a36Sopenharmony_ci _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 427962306a36Sopenharmony_ci CS_VERBOSE, NULL); 428062306a36Sopenharmony_ci drbd_md_sync(device); 428162306a36Sopenharmony_ci updated_uuids = 1; 428262306a36Sopenharmony_ci } 428362306a36Sopenharmony_ci put_ldev(device); 428462306a36Sopenharmony_ci } else if (device->state.disk < D_INCONSISTENT && 428562306a36Sopenharmony_ci device->state.role == R_PRIMARY) { 428662306a36Sopenharmony_ci /* I am a diskless primary, the peer just created a new current UUID 428762306a36Sopenharmony_ci for me. */ 428862306a36Sopenharmony_ci updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 428962306a36Sopenharmony_ci } 429062306a36Sopenharmony_ci 429162306a36Sopenharmony_ci /* Before we test for the disk state, we should wait until an eventually 429262306a36Sopenharmony_ci ongoing cluster wide state change is finished. That is important if 429362306a36Sopenharmony_ci we are primary and are detaching from our disk. We need to see the 429462306a36Sopenharmony_ci new disk state... */ 429562306a36Sopenharmony_ci mutex_lock(device->state_mutex); 429662306a36Sopenharmony_ci mutex_unlock(device->state_mutex); 429762306a36Sopenharmony_ci if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT) 429862306a36Sopenharmony_ci updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 429962306a36Sopenharmony_ci 430062306a36Sopenharmony_ci if (updated_uuids) 430162306a36Sopenharmony_ci drbd_print_uuids(device, "receiver updated UUIDs to"); 430262306a36Sopenharmony_ci 430362306a36Sopenharmony_ci return 0; 430462306a36Sopenharmony_ci} 430562306a36Sopenharmony_ci 430662306a36Sopenharmony_ci/** 430762306a36Sopenharmony_ci * convert_state() - Converts the peer's view of the cluster state to our point of view 430862306a36Sopenharmony_ci * @ps: The state as seen by the peer. 430962306a36Sopenharmony_ci */ 431062306a36Sopenharmony_cistatic union drbd_state convert_state(union drbd_state ps) 431162306a36Sopenharmony_ci{ 431262306a36Sopenharmony_ci union drbd_state ms; 431362306a36Sopenharmony_ci 431462306a36Sopenharmony_ci static enum drbd_conns c_tab[] = { 431562306a36Sopenharmony_ci [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS, 431662306a36Sopenharmony_ci [C_CONNECTED] = C_CONNECTED, 431762306a36Sopenharmony_ci 431862306a36Sopenharmony_ci [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, 431962306a36Sopenharmony_ci [C_STARTING_SYNC_T] = C_STARTING_SYNC_S, 432062306a36Sopenharmony_ci [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */ 432162306a36Sopenharmony_ci [C_VERIFY_S] = C_VERIFY_T, 432262306a36Sopenharmony_ci [C_MASK] = C_MASK, 432362306a36Sopenharmony_ci }; 432462306a36Sopenharmony_ci 432562306a36Sopenharmony_ci ms.i = ps.i; 432662306a36Sopenharmony_ci 432762306a36Sopenharmony_ci ms.conn = c_tab[ps.conn]; 432862306a36Sopenharmony_ci ms.peer = ps.role; 432962306a36Sopenharmony_ci ms.role = ps.peer; 433062306a36Sopenharmony_ci ms.pdsk = ps.disk; 433162306a36Sopenharmony_ci ms.disk = ps.pdsk; 433262306a36Sopenharmony_ci ms.peer_isp = (ps.aftr_isp | ps.user_isp); 433362306a36Sopenharmony_ci 433462306a36Sopenharmony_ci return ms; 433562306a36Sopenharmony_ci} 433662306a36Sopenharmony_ci 433762306a36Sopenharmony_cistatic int receive_req_state(struct drbd_connection *connection, struct packet_info *pi) 433862306a36Sopenharmony_ci{ 433962306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 434062306a36Sopenharmony_ci struct drbd_device *device; 434162306a36Sopenharmony_ci struct p_req_state *p = pi->data; 434262306a36Sopenharmony_ci union drbd_state mask, val; 434362306a36Sopenharmony_ci enum drbd_state_rv rv; 434462306a36Sopenharmony_ci 434562306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 434662306a36Sopenharmony_ci if (!peer_device) 434762306a36Sopenharmony_ci return -EIO; 434862306a36Sopenharmony_ci device = peer_device->device; 434962306a36Sopenharmony_ci 435062306a36Sopenharmony_ci mask.i = be32_to_cpu(p->mask); 435162306a36Sopenharmony_ci val.i = be32_to_cpu(p->val); 435262306a36Sopenharmony_ci 435362306a36Sopenharmony_ci if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) && 435462306a36Sopenharmony_ci mutex_is_locked(device->state_mutex)) { 435562306a36Sopenharmony_ci drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG); 435662306a36Sopenharmony_ci return 0; 435762306a36Sopenharmony_ci } 435862306a36Sopenharmony_ci 435962306a36Sopenharmony_ci mask = convert_state(mask); 436062306a36Sopenharmony_ci val = convert_state(val); 436162306a36Sopenharmony_ci 436262306a36Sopenharmony_ci rv = drbd_change_state(device, CS_VERBOSE, mask, val); 436362306a36Sopenharmony_ci drbd_send_sr_reply(peer_device, rv); 436462306a36Sopenharmony_ci 436562306a36Sopenharmony_ci drbd_md_sync(device); 436662306a36Sopenharmony_ci 436762306a36Sopenharmony_ci return 0; 436862306a36Sopenharmony_ci} 436962306a36Sopenharmony_ci 437062306a36Sopenharmony_cistatic int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi) 437162306a36Sopenharmony_ci{ 437262306a36Sopenharmony_ci struct p_req_state *p = pi->data; 437362306a36Sopenharmony_ci union drbd_state mask, val; 437462306a36Sopenharmony_ci enum drbd_state_rv rv; 437562306a36Sopenharmony_ci 437662306a36Sopenharmony_ci mask.i = be32_to_cpu(p->mask); 437762306a36Sopenharmony_ci val.i = be32_to_cpu(p->val); 437862306a36Sopenharmony_ci 437962306a36Sopenharmony_ci if (test_bit(RESOLVE_CONFLICTS, &connection->flags) && 438062306a36Sopenharmony_ci mutex_is_locked(&connection->cstate_mutex)) { 438162306a36Sopenharmony_ci conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG); 438262306a36Sopenharmony_ci return 0; 438362306a36Sopenharmony_ci } 438462306a36Sopenharmony_ci 438562306a36Sopenharmony_ci mask = convert_state(mask); 438662306a36Sopenharmony_ci val = convert_state(val); 438762306a36Sopenharmony_ci 438862306a36Sopenharmony_ci rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL); 438962306a36Sopenharmony_ci conn_send_sr_reply(connection, rv); 439062306a36Sopenharmony_ci 439162306a36Sopenharmony_ci return 0; 439262306a36Sopenharmony_ci} 439362306a36Sopenharmony_ci 439462306a36Sopenharmony_cistatic int receive_state(struct drbd_connection *connection, struct packet_info *pi) 439562306a36Sopenharmony_ci{ 439662306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 439762306a36Sopenharmony_ci struct drbd_device *device; 439862306a36Sopenharmony_ci struct p_state *p = pi->data; 439962306a36Sopenharmony_ci union drbd_state os, ns, peer_state; 440062306a36Sopenharmony_ci enum drbd_disk_state real_peer_disk; 440162306a36Sopenharmony_ci enum chg_state_flags cs_flags; 440262306a36Sopenharmony_ci int rv; 440362306a36Sopenharmony_ci 440462306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 440562306a36Sopenharmony_ci if (!peer_device) 440662306a36Sopenharmony_ci return config_unknown_volume(connection, pi); 440762306a36Sopenharmony_ci device = peer_device->device; 440862306a36Sopenharmony_ci 440962306a36Sopenharmony_ci peer_state.i = be32_to_cpu(p->state); 441062306a36Sopenharmony_ci 441162306a36Sopenharmony_ci real_peer_disk = peer_state.disk; 441262306a36Sopenharmony_ci if (peer_state.disk == D_NEGOTIATING) { 441362306a36Sopenharmony_ci real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; 441462306a36Sopenharmony_ci drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); 441562306a36Sopenharmony_ci } 441662306a36Sopenharmony_ci 441762306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 441862306a36Sopenharmony_ci retry: 441962306a36Sopenharmony_ci os = ns = drbd_read_state(device); 442062306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 442162306a36Sopenharmony_ci 442262306a36Sopenharmony_ci /* If some other part of the code (ack_receiver thread, timeout) 442362306a36Sopenharmony_ci * already decided to close the connection again, 442462306a36Sopenharmony_ci * we must not "re-establish" it here. */ 442562306a36Sopenharmony_ci if (os.conn <= C_TEAR_DOWN) 442662306a36Sopenharmony_ci return -ECONNRESET; 442762306a36Sopenharmony_ci 442862306a36Sopenharmony_ci /* If this is the "end of sync" confirmation, usually the peer disk 442962306a36Sopenharmony_ci * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits 443062306a36Sopenharmony_ci * set) resync started in PausedSyncT, or if the timing of pause-/ 443162306a36Sopenharmony_ci * unpause-sync events has been "just right", the peer disk may 443262306a36Sopenharmony_ci * transition from D_CONSISTENT to D_UP_TO_DATE as well. 443362306a36Sopenharmony_ci */ 443462306a36Sopenharmony_ci if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && 443562306a36Sopenharmony_ci real_peer_disk == D_UP_TO_DATE && 443662306a36Sopenharmony_ci os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { 443762306a36Sopenharmony_ci /* If we are (becoming) SyncSource, but peer is still in sync 443862306a36Sopenharmony_ci * preparation, ignore its uptodate-ness to avoid flapping, it 443962306a36Sopenharmony_ci * will change to inconsistent once the peer reaches active 444062306a36Sopenharmony_ci * syncing states. 444162306a36Sopenharmony_ci * It may have changed syncer-paused flags, however, so we 444262306a36Sopenharmony_ci * cannot ignore this completely. */ 444362306a36Sopenharmony_ci if (peer_state.conn > C_CONNECTED && 444462306a36Sopenharmony_ci peer_state.conn < C_SYNC_SOURCE) 444562306a36Sopenharmony_ci real_peer_disk = D_INCONSISTENT; 444662306a36Sopenharmony_ci 444762306a36Sopenharmony_ci /* if peer_state changes to connected at the same time, 444862306a36Sopenharmony_ci * it explicitly notifies us that it finished resync. 444962306a36Sopenharmony_ci * Maybe we should finish it up, too? */ 445062306a36Sopenharmony_ci else if (os.conn >= C_SYNC_SOURCE && 445162306a36Sopenharmony_ci peer_state.conn == C_CONNECTED) { 445262306a36Sopenharmony_ci if (drbd_bm_total_weight(device) <= device->rs_failed) 445362306a36Sopenharmony_ci drbd_resync_finished(peer_device); 445462306a36Sopenharmony_ci return 0; 445562306a36Sopenharmony_ci } 445662306a36Sopenharmony_ci } 445762306a36Sopenharmony_ci 445862306a36Sopenharmony_ci /* explicit verify finished notification, stop sector reached. */ 445962306a36Sopenharmony_ci if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && 446062306a36Sopenharmony_ci peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { 446162306a36Sopenharmony_ci ov_out_of_sync_print(peer_device); 446262306a36Sopenharmony_ci drbd_resync_finished(peer_device); 446362306a36Sopenharmony_ci return 0; 446462306a36Sopenharmony_ci } 446562306a36Sopenharmony_ci 446662306a36Sopenharmony_ci /* peer says his disk is inconsistent, while we think it is uptodate, 446762306a36Sopenharmony_ci * and this happens while the peer still thinks we have a sync going on, 446862306a36Sopenharmony_ci * but we think we are already done with the sync. 446962306a36Sopenharmony_ci * We ignore this to avoid flapping pdsk. 447062306a36Sopenharmony_ci * This should not happen, if the peer is a recent version of drbd. */ 447162306a36Sopenharmony_ci if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT && 447262306a36Sopenharmony_ci os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE) 447362306a36Sopenharmony_ci real_peer_disk = D_UP_TO_DATE; 447462306a36Sopenharmony_ci 447562306a36Sopenharmony_ci if (ns.conn == C_WF_REPORT_PARAMS) 447662306a36Sopenharmony_ci ns.conn = C_CONNECTED; 447762306a36Sopenharmony_ci 447862306a36Sopenharmony_ci if (peer_state.conn == C_AHEAD) 447962306a36Sopenharmony_ci ns.conn = C_BEHIND; 448062306a36Sopenharmony_ci 448162306a36Sopenharmony_ci /* TODO: 448262306a36Sopenharmony_ci * if (primary and diskless and peer uuid != effective uuid) 448362306a36Sopenharmony_ci * abort attach on peer; 448462306a36Sopenharmony_ci * 448562306a36Sopenharmony_ci * If this node does not have good data, was already connected, but 448662306a36Sopenharmony_ci * the peer did a late attach only now, trying to "negotiate" with me, 448762306a36Sopenharmony_ci * AND I am currently Primary, possibly frozen, with some specific 448862306a36Sopenharmony_ci * "effective" uuid, this should never be reached, really, because 448962306a36Sopenharmony_ci * we first send the uuids, then the current state. 449062306a36Sopenharmony_ci * 449162306a36Sopenharmony_ci * In this scenario, we already dropped the connection hard 449262306a36Sopenharmony_ci * when we received the unsuitable uuids (receive_uuids(). 449362306a36Sopenharmony_ci * 449462306a36Sopenharmony_ci * Should we want to change this, that is: not drop the connection in 449562306a36Sopenharmony_ci * receive_uuids() already, then we would need to add a branch here 449662306a36Sopenharmony_ci * that aborts the attach of "unsuitable uuids" on the peer in case 449762306a36Sopenharmony_ci * this node is currently Diskless Primary. 449862306a36Sopenharmony_ci */ 449962306a36Sopenharmony_ci 450062306a36Sopenharmony_ci if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && 450162306a36Sopenharmony_ci get_ldev_if_state(device, D_NEGOTIATING)) { 450262306a36Sopenharmony_ci int cr; /* consider resync */ 450362306a36Sopenharmony_ci 450462306a36Sopenharmony_ci /* if we established a new connection */ 450562306a36Sopenharmony_ci cr = (os.conn < C_CONNECTED); 450662306a36Sopenharmony_ci /* if we had an established connection 450762306a36Sopenharmony_ci * and one of the nodes newly attaches a disk */ 450862306a36Sopenharmony_ci cr |= (os.conn == C_CONNECTED && 450962306a36Sopenharmony_ci (peer_state.disk == D_NEGOTIATING || 451062306a36Sopenharmony_ci os.disk == D_NEGOTIATING)); 451162306a36Sopenharmony_ci /* if we have both been inconsistent, and the peer has been 451262306a36Sopenharmony_ci * forced to be UpToDate with --force */ 451362306a36Sopenharmony_ci cr |= test_bit(CONSIDER_RESYNC, &device->flags); 451462306a36Sopenharmony_ci /* if we had been plain connected, and the admin requested to 451562306a36Sopenharmony_ci * start a sync by "invalidate" or "invalidate-remote" */ 451662306a36Sopenharmony_ci cr |= (os.conn == C_CONNECTED && 451762306a36Sopenharmony_ci (peer_state.conn >= C_STARTING_SYNC_S && 451862306a36Sopenharmony_ci peer_state.conn <= C_WF_BITMAP_T)); 451962306a36Sopenharmony_ci 452062306a36Sopenharmony_ci if (cr) 452162306a36Sopenharmony_ci ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk); 452262306a36Sopenharmony_ci 452362306a36Sopenharmony_ci put_ldev(device); 452462306a36Sopenharmony_ci if (ns.conn == C_MASK) { 452562306a36Sopenharmony_ci ns.conn = C_CONNECTED; 452662306a36Sopenharmony_ci if (device->state.disk == D_NEGOTIATING) { 452762306a36Sopenharmony_ci drbd_force_state(device, NS(disk, D_FAILED)); 452862306a36Sopenharmony_ci } else if (peer_state.disk == D_NEGOTIATING) { 452962306a36Sopenharmony_ci drbd_err(device, "Disk attach process on the peer node was aborted.\n"); 453062306a36Sopenharmony_ci peer_state.disk = D_DISKLESS; 453162306a36Sopenharmony_ci real_peer_disk = D_DISKLESS; 453262306a36Sopenharmony_ci } else { 453362306a36Sopenharmony_ci if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags)) 453462306a36Sopenharmony_ci return -EIO; 453562306a36Sopenharmony_ci D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS); 453662306a36Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 453762306a36Sopenharmony_ci return -EIO; 453862306a36Sopenharmony_ci } 453962306a36Sopenharmony_ci } 454062306a36Sopenharmony_ci } 454162306a36Sopenharmony_ci 454262306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 454362306a36Sopenharmony_ci if (os.i != drbd_read_state(device).i) 454462306a36Sopenharmony_ci goto retry; 454562306a36Sopenharmony_ci clear_bit(CONSIDER_RESYNC, &device->flags); 454662306a36Sopenharmony_ci ns.peer = peer_state.role; 454762306a36Sopenharmony_ci ns.pdsk = real_peer_disk; 454862306a36Sopenharmony_ci ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); 454962306a36Sopenharmony_ci if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) 455062306a36Sopenharmony_ci ns.disk = device->new_state_tmp.disk; 455162306a36Sopenharmony_ci cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); 455262306a36Sopenharmony_ci if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && 455362306a36Sopenharmony_ci test_bit(NEW_CUR_UUID, &device->flags)) { 455462306a36Sopenharmony_ci /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this 455562306a36Sopenharmony_ci for temporal network outages! */ 455662306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 455762306a36Sopenharmony_ci drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); 455862306a36Sopenharmony_ci tl_clear(peer_device->connection); 455962306a36Sopenharmony_ci drbd_uuid_new_current(device); 456062306a36Sopenharmony_ci clear_bit(NEW_CUR_UUID, &device->flags); 456162306a36Sopenharmony_ci conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); 456262306a36Sopenharmony_ci return -EIO; 456362306a36Sopenharmony_ci } 456462306a36Sopenharmony_ci rv = _drbd_set_state(device, ns, cs_flags, NULL); 456562306a36Sopenharmony_ci ns = drbd_read_state(device); 456662306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 456762306a36Sopenharmony_ci 456862306a36Sopenharmony_ci if (rv < SS_SUCCESS) { 456962306a36Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 457062306a36Sopenharmony_ci return -EIO; 457162306a36Sopenharmony_ci } 457262306a36Sopenharmony_ci 457362306a36Sopenharmony_ci if (os.conn > C_WF_REPORT_PARAMS) { 457462306a36Sopenharmony_ci if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED && 457562306a36Sopenharmony_ci peer_state.disk != D_NEGOTIATING ) { 457662306a36Sopenharmony_ci /* we want resync, peer has not yet decided to sync... */ 457762306a36Sopenharmony_ci /* Nowadays only used when forcing a node into primary role and 457862306a36Sopenharmony_ci setting its disk to UpToDate with that */ 457962306a36Sopenharmony_ci drbd_send_uuids(peer_device); 458062306a36Sopenharmony_ci drbd_send_current_state(peer_device); 458162306a36Sopenharmony_ci } 458262306a36Sopenharmony_ci } 458362306a36Sopenharmony_ci 458462306a36Sopenharmony_ci clear_bit(DISCARD_MY_DATA, &device->flags); 458562306a36Sopenharmony_ci 458662306a36Sopenharmony_ci drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */ 458762306a36Sopenharmony_ci 458862306a36Sopenharmony_ci return 0; 458962306a36Sopenharmony_ci} 459062306a36Sopenharmony_ci 459162306a36Sopenharmony_cistatic int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi) 459262306a36Sopenharmony_ci{ 459362306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 459462306a36Sopenharmony_ci struct drbd_device *device; 459562306a36Sopenharmony_ci struct p_rs_uuid *p = pi->data; 459662306a36Sopenharmony_ci 459762306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 459862306a36Sopenharmony_ci if (!peer_device) 459962306a36Sopenharmony_ci return -EIO; 460062306a36Sopenharmony_ci device = peer_device->device; 460162306a36Sopenharmony_ci 460262306a36Sopenharmony_ci wait_event(device->misc_wait, 460362306a36Sopenharmony_ci device->state.conn == C_WF_SYNC_UUID || 460462306a36Sopenharmony_ci device->state.conn == C_BEHIND || 460562306a36Sopenharmony_ci device->state.conn < C_CONNECTED || 460662306a36Sopenharmony_ci device->state.disk < D_NEGOTIATING); 460762306a36Sopenharmony_ci 460862306a36Sopenharmony_ci /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */ 460962306a36Sopenharmony_ci 461062306a36Sopenharmony_ci /* Here the _drbd_uuid_ functions are right, current should 461162306a36Sopenharmony_ci _not_ be rotated into the history */ 461262306a36Sopenharmony_ci if (get_ldev_if_state(device, D_NEGOTIATING)) { 461362306a36Sopenharmony_ci _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid)); 461462306a36Sopenharmony_ci _drbd_uuid_set(device, UI_BITMAP, 0UL); 461562306a36Sopenharmony_ci 461662306a36Sopenharmony_ci drbd_print_uuids(device, "updated sync uuid"); 461762306a36Sopenharmony_ci drbd_start_resync(device, C_SYNC_TARGET); 461862306a36Sopenharmony_ci 461962306a36Sopenharmony_ci put_ldev(device); 462062306a36Sopenharmony_ci } else 462162306a36Sopenharmony_ci drbd_err(device, "Ignoring SyncUUID packet!\n"); 462262306a36Sopenharmony_ci 462362306a36Sopenharmony_ci return 0; 462462306a36Sopenharmony_ci} 462562306a36Sopenharmony_ci 462662306a36Sopenharmony_ci/* 462762306a36Sopenharmony_ci * receive_bitmap_plain 462862306a36Sopenharmony_ci * 462962306a36Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error 463062306a36Sopenharmony_ci * code upon failure. 463162306a36Sopenharmony_ci */ 463262306a36Sopenharmony_cistatic int 463362306a36Sopenharmony_cireceive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size, 463462306a36Sopenharmony_ci unsigned long *p, struct bm_xfer_ctx *c) 463562306a36Sopenharmony_ci{ 463662306a36Sopenharmony_ci unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - 463762306a36Sopenharmony_ci drbd_header_size(peer_device->connection); 463862306a36Sopenharmony_ci unsigned int num_words = min_t(size_t, data_size / sizeof(*p), 463962306a36Sopenharmony_ci c->bm_words - c->word_offset); 464062306a36Sopenharmony_ci unsigned int want = num_words * sizeof(*p); 464162306a36Sopenharmony_ci int err; 464262306a36Sopenharmony_ci 464362306a36Sopenharmony_ci if (want != size) { 464462306a36Sopenharmony_ci drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size); 464562306a36Sopenharmony_ci return -EIO; 464662306a36Sopenharmony_ci } 464762306a36Sopenharmony_ci if (want == 0) 464862306a36Sopenharmony_ci return 0; 464962306a36Sopenharmony_ci err = drbd_recv_all(peer_device->connection, p, want); 465062306a36Sopenharmony_ci if (err) 465162306a36Sopenharmony_ci return err; 465262306a36Sopenharmony_ci 465362306a36Sopenharmony_ci drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p); 465462306a36Sopenharmony_ci 465562306a36Sopenharmony_ci c->word_offset += num_words; 465662306a36Sopenharmony_ci c->bit_offset = c->word_offset * BITS_PER_LONG; 465762306a36Sopenharmony_ci if (c->bit_offset > c->bm_bits) 465862306a36Sopenharmony_ci c->bit_offset = c->bm_bits; 465962306a36Sopenharmony_ci 466062306a36Sopenharmony_ci return 1; 466162306a36Sopenharmony_ci} 466262306a36Sopenharmony_ci 466362306a36Sopenharmony_cistatic enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p) 466462306a36Sopenharmony_ci{ 466562306a36Sopenharmony_ci return (enum drbd_bitmap_code)(p->encoding & 0x0f); 466662306a36Sopenharmony_ci} 466762306a36Sopenharmony_ci 466862306a36Sopenharmony_cistatic int dcbp_get_start(struct p_compressed_bm *p) 466962306a36Sopenharmony_ci{ 467062306a36Sopenharmony_ci return (p->encoding & 0x80) != 0; 467162306a36Sopenharmony_ci} 467262306a36Sopenharmony_ci 467362306a36Sopenharmony_cistatic int dcbp_get_pad_bits(struct p_compressed_bm *p) 467462306a36Sopenharmony_ci{ 467562306a36Sopenharmony_ci return (p->encoding >> 4) & 0x7; 467662306a36Sopenharmony_ci} 467762306a36Sopenharmony_ci 467862306a36Sopenharmony_ci/* 467962306a36Sopenharmony_ci * recv_bm_rle_bits 468062306a36Sopenharmony_ci * 468162306a36Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error 468262306a36Sopenharmony_ci * code upon failure. 468362306a36Sopenharmony_ci */ 468462306a36Sopenharmony_cistatic int 468562306a36Sopenharmony_cirecv_bm_rle_bits(struct drbd_peer_device *peer_device, 468662306a36Sopenharmony_ci struct p_compressed_bm *p, 468762306a36Sopenharmony_ci struct bm_xfer_ctx *c, 468862306a36Sopenharmony_ci unsigned int len) 468962306a36Sopenharmony_ci{ 469062306a36Sopenharmony_ci struct bitstream bs; 469162306a36Sopenharmony_ci u64 look_ahead; 469262306a36Sopenharmony_ci u64 rl; 469362306a36Sopenharmony_ci u64 tmp; 469462306a36Sopenharmony_ci unsigned long s = c->bit_offset; 469562306a36Sopenharmony_ci unsigned long e; 469662306a36Sopenharmony_ci int toggle = dcbp_get_start(p); 469762306a36Sopenharmony_ci int have; 469862306a36Sopenharmony_ci int bits; 469962306a36Sopenharmony_ci 470062306a36Sopenharmony_ci bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p)); 470162306a36Sopenharmony_ci 470262306a36Sopenharmony_ci bits = bitstream_get_bits(&bs, &look_ahead, 64); 470362306a36Sopenharmony_ci if (bits < 0) 470462306a36Sopenharmony_ci return -EIO; 470562306a36Sopenharmony_ci 470662306a36Sopenharmony_ci for (have = bits; have > 0; s += rl, toggle = !toggle) { 470762306a36Sopenharmony_ci bits = vli_decode_bits(&rl, look_ahead); 470862306a36Sopenharmony_ci if (bits <= 0) 470962306a36Sopenharmony_ci return -EIO; 471062306a36Sopenharmony_ci 471162306a36Sopenharmony_ci if (toggle) { 471262306a36Sopenharmony_ci e = s + rl -1; 471362306a36Sopenharmony_ci if (e >= c->bm_bits) { 471462306a36Sopenharmony_ci drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); 471562306a36Sopenharmony_ci return -EIO; 471662306a36Sopenharmony_ci } 471762306a36Sopenharmony_ci _drbd_bm_set_bits(peer_device->device, s, e); 471862306a36Sopenharmony_ci } 471962306a36Sopenharmony_ci 472062306a36Sopenharmony_ci if (have < bits) { 472162306a36Sopenharmony_ci drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", 472262306a36Sopenharmony_ci have, bits, look_ahead, 472362306a36Sopenharmony_ci (unsigned int)(bs.cur.b - p->code), 472462306a36Sopenharmony_ci (unsigned int)bs.buf_len); 472562306a36Sopenharmony_ci return -EIO; 472662306a36Sopenharmony_ci } 472762306a36Sopenharmony_ci /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */ 472862306a36Sopenharmony_ci if (likely(bits < 64)) 472962306a36Sopenharmony_ci look_ahead >>= bits; 473062306a36Sopenharmony_ci else 473162306a36Sopenharmony_ci look_ahead = 0; 473262306a36Sopenharmony_ci have -= bits; 473362306a36Sopenharmony_ci 473462306a36Sopenharmony_ci bits = bitstream_get_bits(&bs, &tmp, 64 - have); 473562306a36Sopenharmony_ci if (bits < 0) 473662306a36Sopenharmony_ci return -EIO; 473762306a36Sopenharmony_ci look_ahead |= tmp << have; 473862306a36Sopenharmony_ci have += bits; 473962306a36Sopenharmony_ci } 474062306a36Sopenharmony_ci 474162306a36Sopenharmony_ci c->bit_offset = s; 474262306a36Sopenharmony_ci bm_xfer_ctx_bit_to_word_offset(c); 474362306a36Sopenharmony_ci 474462306a36Sopenharmony_ci return (s != c->bm_bits); 474562306a36Sopenharmony_ci} 474662306a36Sopenharmony_ci 474762306a36Sopenharmony_ci/* 474862306a36Sopenharmony_ci * decode_bitmap_c 474962306a36Sopenharmony_ci * 475062306a36Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error 475162306a36Sopenharmony_ci * code upon failure. 475262306a36Sopenharmony_ci */ 475362306a36Sopenharmony_cistatic int 475462306a36Sopenharmony_cidecode_bitmap_c(struct drbd_peer_device *peer_device, 475562306a36Sopenharmony_ci struct p_compressed_bm *p, 475662306a36Sopenharmony_ci struct bm_xfer_ctx *c, 475762306a36Sopenharmony_ci unsigned int len) 475862306a36Sopenharmony_ci{ 475962306a36Sopenharmony_ci if (dcbp_get_code(p) == RLE_VLI_Bits) 476062306a36Sopenharmony_ci return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p)); 476162306a36Sopenharmony_ci 476262306a36Sopenharmony_ci /* other variants had been implemented for evaluation, 476362306a36Sopenharmony_ci * but have been dropped as this one turned out to be "best" 476462306a36Sopenharmony_ci * during all our tests. */ 476562306a36Sopenharmony_ci 476662306a36Sopenharmony_ci drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding); 476762306a36Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 476862306a36Sopenharmony_ci return -EIO; 476962306a36Sopenharmony_ci} 477062306a36Sopenharmony_ci 477162306a36Sopenharmony_civoid INFO_bm_xfer_stats(struct drbd_peer_device *peer_device, 477262306a36Sopenharmony_ci const char *direction, struct bm_xfer_ctx *c) 477362306a36Sopenharmony_ci{ 477462306a36Sopenharmony_ci /* what would it take to transfer it "plaintext" */ 477562306a36Sopenharmony_ci unsigned int header_size = drbd_header_size(peer_device->connection); 477662306a36Sopenharmony_ci unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; 477762306a36Sopenharmony_ci unsigned int plain = 477862306a36Sopenharmony_ci header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + 477962306a36Sopenharmony_ci c->bm_words * sizeof(unsigned long); 478062306a36Sopenharmony_ci unsigned int total = c->bytes[0] + c->bytes[1]; 478162306a36Sopenharmony_ci unsigned int r; 478262306a36Sopenharmony_ci 478362306a36Sopenharmony_ci /* total can not be zero. but just in case: */ 478462306a36Sopenharmony_ci if (total == 0) 478562306a36Sopenharmony_ci return; 478662306a36Sopenharmony_ci 478762306a36Sopenharmony_ci /* don't report if not compressed */ 478862306a36Sopenharmony_ci if (total >= plain) 478962306a36Sopenharmony_ci return; 479062306a36Sopenharmony_ci 479162306a36Sopenharmony_ci /* total < plain. check for overflow, still */ 479262306a36Sopenharmony_ci r = (total > UINT_MAX/1000) ? (total / (plain/1000)) 479362306a36Sopenharmony_ci : (1000 * total / plain); 479462306a36Sopenharmony_ci 479562306a36Sopenharmony_ci if (r > 1000) 479662306a36Sopenharmony_ci r = 1000; 479762306a36Sopenharmony_ci 479862306a36Sopenharmony_ci r = 1000 - r; 479962306a36Sopenharmony_ci drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " 480062306a36Sopenharmony_ci "total %u; compression: %u.%u%%\n", 480162306a36Sopenharmony_ci direction, 480262306a36Sopenharmony_ci c->bytes[1], c->packets[1], 480362306a36Sopenharmony_ci c->bytes[0], c->packets[0], 480462306a36Sopenharmony_ci total, r/10, r % 10); 480562306a36Sopenharmony_ci} 480662306a36Sopenharmony_ci 480762306a36Sopenharmony_ci/* Since we are processing the bitfield from lower addresses to higher, 480862306a36Sopenharmony_ci it does not matter if the process it in 32 bit chunks or 64 bit 480962306a36Sopenharmony_ci chunks as long as it is little endian. (Understand it as byte stream, 481062306a36Sopenharmony_ci beginning with the lowest byte...) If we would use big endian 481162306a36Sopenharmony_ci we would need to process it from the highest address to the lowest, 481262306a36Sopenharmony_ci in order to be agnostic to the 32 vs 64 bits issue. 481362306a36Sopenharmony_ci 481462306a36Sopenharmony_ci returns 0 on failure, 1 if we successfully received it. */ 481562306a36Sopenharmony_cistatic int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi) 481662306a36Sopenharmony_ci{ 481762306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 481862306a36Sopenharmony_ci struct drbd_device *device; 481962306a36Sopenharmony_ci struct bm_xfer_ctx c; 482062306a36Sopenharmony_ci int err; 482162306a36Sopenharmony_ci 482262306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 482362306a36Sopenharmony_ci if (!peer_device) 482462306a36Sopenharmony_ci return -EIO; 482562306a36Sopenharmony_ci device = peer_device->device; 482662306a36Sopenharmony_ci 482762306a36Sopenharmony_ci drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED); 482862306a36Sopenharmony_ci /* you are supposed to send additional out-of-sync information 482962306a36Sopenharmony_ci * if you actually set bits during this phase */ 483062306a36Sopenharmony_ci 483162306a36Sopenharmony_ci c = (struct bm_xfer_ctx) { 483262306a36Sopenharmony_ci .bm_bits = drbd_bm_bits(device), 483362306a36Sopenharmony_ci .bm_words = drbd_bm_words(device), 483462306a36Sopenharmony_ci }; 483562306a36Sopenharmony_ci 483662306a36Sopenharmony_ci for(;;) { 483762306a36Sopenharmony_ci if (pi->cmd == P_BITMAP) 483862306a36Sopenharmony_ci err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c); 483962306a36Sopenharmony_ci else if (pi->cmd == P_COMPRESSED_BITMAP) { 484062306a36Sopenharmony_ci /* MAYBE: sanity check that we speak proto >= 90, 484162306a36Sopenharmony_ci * and the feature is enabled! */ 484262306a36Sopenharmony_ci struct p_compressed_bm *p = pi->data; 484362306a36Sopenharmony_ci 484462306a36Sopenharmony_ci if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) { 484562306a36Sopenharmony_ci drbd_err(device, "ReportCBitmap packet too large\n"); 484662306a36Sopenharmony_ci err = -EIO; 484762306a36Sopenharmony_ci goto out; 484862306a36Sopenharmony_ci } 484962306a36Sopenharmony_ci if (pi->size <= sizeof(*p)) { 485062306a36Sopenharmony_ci drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size); 485162306a36Sopenharmony_ci err = -EIO; 485262306a36Sopenharmony_ci goto out; 485362306a36Sopenharmony_ci } 485462306a36Sopenharmony_ci err = drbd_recv_all(peer_device->connection, p, pi->size); 485562306a36Sopenharmony_ci if (err) 485662306a36Sopenharmony_ci goto out; 485762306a36Sopenharmony_ci err = decode_bitmap_c(peer_device, p, &c, pi->size); 485862306a36Sopenharmony_ci } else { 485962306a36Sopenharmony_ci drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); 486062306a36Sopenharmony_ci err = -EIO; 486162306a36Sopenharmony_ci goto out; 486262306a36Sopenharmony_ci } 486362306a36Sopenharmony_ci 486462306a36Sopenharmony_ci c.packets[pi->cmd == P_BITMAP]++; 486562306a36Sopenharmony_ci c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size; 486662306a36Sopenharmony_ci 486762306a36Sopenharmony_ci if (err <= 0) { 486862306a36Sopenharmony_ci if (err < 0) 486962306a36Sopenharmony_ci goto out; 487062306a36Sopenharmony_ci break; 487162306a36Sopenharmony_ci } 487262306a36Sopenharmony_ci err = drbd_recv_header(peer_device->connection, pi); 487362306a36Sopenharmony_ci if (err) 487462306a36Sopenharmony_ci goto out; 487562306a36Sopenharmony_ci } 487662306a36Sopenharmony_ci 487762306a36Sopenharmony_ci INFO_bm_xfer_stats(peer_device, "receive", &c); 487862306a36Sopenharmony_ci 487962306a36Sopenharmony_ci if (device->state.conn == C_WF_BITMAP_T) { 488062306a36Sopenharmony_ci enum drbd_state_rv rv; 488162306a36Sopenharmony_ci 488262306a36Sopenharmony_ci err = drbd_send_bitmap(device, peer_device); 488362306a36Sopenharmony_ci if (err) 488462306a36Sopenharmony_ci goto out; 488562306a36Sopenharmony_ci /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ 488662306a36Sopenharmony_ci rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); 488762306a36Sopenharmony_ci D_ASSERT(device, rv == SS_SUCCESS); 488862306a36Sopenharmony_ci } else if (device->state.conn != C_WF_BITMAP_S) { 488962306a36Sopenharmony_ci /* admin may have requested C_DISCONNECTING, 489062306a36Sopenharmony_ci * other threads may have noticed network errors */ 489162306a36Sopenharmony_ci drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n", 489262306a36Sopenharmony_ci drbd_conn_str(device->state.conn)); 489362306a36Sopenharmony_ci } 489462306a36Sopenharmony_ci err = 0; 489562306a36Sopenharmony_ci 489662306a36Sopenharmony_ci out: 489762306a36Sopenharmony_ci drbd_bm_unlock(device); 489862306a36Sopenharmony_ci if (!err && device->state.conn == C_WF_BITMAP_S) 489962306a36Sopenharmony_ci drbd_start_resync(device, C_SYNC_SOURCE); 490062306a36Sopenharmony_ci return err; 490162306a36Sopenharmony_ci} 490262306a36Sopenharmony_ci 490362306a36Sopenharmony_cistatic int receive_skip(struct drbd_connection *connection, struct packet_info *pi) 490462306a36Sopenharmony_ci{ 490562306a36Sopenharmony_ci drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n", 490662306a36Sopenharmony_ci pi->cmd, pi->size); 490762306a36Sopenharmony_ci 490862306a36Sopenharmony_ci return ignore_remaining_packet(connection, pi); 490962306a36Sopenharmony_ci} 491062306a36Sopenharmony_ci 491162306a36Sopenharmony_cistatic int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi) 491262306a36Sopenharmony_ci{ 491362306a36Sopenharmony_ci /* Make sure we've acked all the TCP data associated 491462306a36Sopenharmony_ci * with the data requests being unplugged */ 491562306a36Sopenharmony_ci tcp_sock_set_quickack(connection->data.socket->sk, 2); 491662306a36Sopenharmony_ci return 0; 491762306a36Sopenharmony_ci} 491862306a36Sopenharmony_ci 491962306a36Sopenharmony_cistatic int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi) 492062306a36Sopenharmony_ci{ 492162306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 492262306a36Sopenharmony_ci struct drbd_device *device; 492362306a36Sopenharmony_ci struct p_block_desc *p = pi->data; 492462306a36Sopenharmony_ci 492562306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 492662306a36Sopenharmony_ci if (!peer_device) 492762306a36Sopenharmony_ci return -EIO; 492862306a36Sopenharmony_ci device = peer_device->device; 492962306a36Sopenharmony_ci 493062306a36Sopenharmony_ci switch (device->state.conn) { 493162306a36Sopenharmony_ci case C_WF_SYNC_UUID: 493262306a36Sopenharmony_ci case C_WF_BITMAP_T: 493362306a36Sopenharmony_ci case C_BEHIND: 493462306a36Sopenharmony_ci break; 493562306a36Sopenharmony_ci default: 493662306a36Sopenharmony_ci drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", 493762306a36Sopenharmony_ci drbd_conn_str(device->state.conn)); 493862306a36Sopenharmony_ci } 493962306a36Sopenharmony_ci 494062306a36Sopenharmony_ci drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); 494162306a36Sopenharmony_ci 494262306a36Sopenharmony_ci return 0; 494362306a36Sopenharmony_ci} 494462306a36Sopenharmony_ci 494562306a36Sopenharmony_cistatic int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi) 494662306a36Sopenharmony_ci{ 494762306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 494862306a36Sopenharmony_ci struct p_block_desc *p = pi->data; 494962306a36Sopenharmony_ci struct drbd_device *device; 495062306a36Sopenharmony_ci sector_t sector; 495162306a36Sopenharmony_ci int size, err = 0; 495262306a36Sopenharmony_ci 495362306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 495462306a36Sopenharmony_ci if (!peer_device) 495562306a36Sopenharmony_ci return -EIO; 495662306a36Sopenharmony_ci device = peer_device->device; 495762306a36Sopenharmony_ci 495862306a36Sopenharmony_ci sector = be64_to_cpu(p->sector); 495962306a36Sopenharmony_ci size = be32_to_cpu(p->blksize); 496062306a36Sopenharmony_ci 496162306a36Sopenharmony_ci dec_rs_pending(peer_device); 496262306a36Sopenharmony_ci 496362306a36Sopenharmony_ci if (get_ldev(device)) { 496462306a36Sopenharmony_ci struct drbd_peer_request *peer_req; 496562306a36Sopenharmony_ci 496662306a36Sopenharmony_ci peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, 496762306a36Sopenharmony_ci size, 0, GFP_NOIO); 496862306a36Sopenharmony_ci if (!peer_req) { 496962306a36Sopenharmony_ci put_ldev(device); 497062306a36Sopenharmony_ci return -ENOMEM; 497162306a36Sopenharmony_ci } 497262306a36Sopenharmony_ci 497362306a36Sopenharmony_ci peer_req->w.cb = e_end_resync_block; 497462306a36Sopenharmony_ci peer_req->opf = REQ_OP_DISCARD; 497562306a36Sopenharmony_ci peer_req->submit_jif = jiffies; 497662306a36Sopenharmony_ci peer_req->flags |= EE_TRIM; 497762306a36Sopenharmony_ci 497862306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 497962306a36Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->sync_ee); 498062306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 498162306a36Sopenharmony_ci 498262306a36Sopenharmony_ci atomic_add(pi->size >> 9, &device->rs_sect_ev); 498362306a36Sopenharmony_ci err = drbd_submit_peer_request(peer_req); 498462306a36Sopenharmony_ci 498562306a36Sopenharmony_ci if (err) { 498662306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 498762306a36Sopenharmony_ci list_del(&peer_req->w.list); 498862306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 498962306a36Sopenharmony_ci 499062306a36Sopenharmony_ci drbd_free_peer_req(device, peer_req); 499162306a36Sopenharmony_ci put_ldev(device); 499262306a36Sopenharmony_ci err = 0; 499362306a36Sopenharmony_ci goto fail; 499462306a36Sopenharmony_ci } 499562306a36Sopenharmony_ci 499662306a36Sopenharmony_ci inc_unacked(device); 499762306a36Sopenharmony_ci 499862306a36Sopenharmony_ci /* No put_ldev() here. Gets called in drbd_endio_write_sec_final(), 499962306a36Sopenharmony_ci as well as drbd_rs_complete_io() */ 500062306a36Sopenharmony_ci } else { 500162306a36Sopenharmony_ci fail: 500262306a36Sopenharmony_ci drbd_rs_complete_io(device, sector); 500362306a36Sopenharmony_ci drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER); 500462306a36Sopenharmony_ci } 500562306a36Sopenharmony_ci 500662306a36Sopenharmony_ci atomic_add(size >> 9, &device->rs_sect_in); 500762306a36Sopenharmony_ci 500862306a36Sopenharmony_ci return err; 500962306a36Sopenharmony_ci} 501062306a36Sopenharmony_ci 501162306a36Sopenharmony_cistruct data_cmd { 501262306a36Sopenharmony_ci int expect_payload; 501362306a36Sopenharmony_ci unsigned int pkt_size; 501462306a36Sopenharmony_ci int (*fn)(struct drbd_connection *, struct packet_info *); 501562306a36Sopenharmony_ci}; 501662306a36Sopenharmony_ci 501762306a36Sopenharmony_cistatic struct data_cmd drbd_cmd_handler[] = { 501862306a36Sopenharmony_ci [P_DATA] = { 1, sizeof(struct p_data), receive_Data }, 501962306a36Sopenharmony_ci [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, 502062306a36Sopenharmony_ci [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , 502162306a36Sopenharmony_ci [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , 502262306a36Sopenharmony_ci [P_BITMAP] = { 1, 0, receive_bitmap } , 502362306a36Sopenharmony_ci [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } , 502462306a36Sopenharmony_ci [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote }, 502562306a36Sopenharmony_ci [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 502662306a36Sopenharmony_ci [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 502762306a36Sopenharmony_ci [P_SYNC_PARAM] = { 1, 0, receive_SyncParam }, 502862306a36Sopenharmony_ci [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam }, 502962306a36Sopenharmony_ci [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, 503062306a36Sopenharmony_ci [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, 503162306a36Sopenharmony_ci [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, 503262306a36Sopenharmony_ci [P_STATE] = { 0, sizeof(struct p_state), receive_state }, 503362306a36Sopenharmony_ci [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, 503462306a36Sopenharmony_ci [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid }, 503562306a36Sopenharmony_ci [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 503662306a36Sopenharmony_ci [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 503762306a36Sopenharmony_ci [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 503862306a36Sopenharmony_ci [P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 503962306a36Sopenharmony_ci [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, 504062306a36Sopenharmony_ci [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, 504162306a36Sopenharmony_ci [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, 504262306a36Sopenharmony_ci [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, 504362306a36Sopenharmony_ci [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, 504462306a36Sopenharmony_ci [P_ZEROES] = { 0, sizeof(struct p_trim), receive_Data }, 504562306a36Sopenharmony_ci [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated }, 504662306a36Sopenharmony_ci}; 504762306a36Sopenharmony_ci 504862306a36Sopenharmony_cistatic void drbdd(struct drbd_connection *connection) 504962306a36Sopenharmony_ci{ 505062306a36Sopenharmony_ci struct packet_info pi; 505162306a36Sopenharmony_ci size_t shs; /* sub header size */ 505262306a36Sopenharmony_ci int err; 505362306a36Sopenharmony_ci 505462306a36Sopenharmony_ci while (get_t_state(&connection->receiver) == RUNNING) { 505562306a36Sopenharmony_ci struct data_cmd const *cmd; 505662306a36Sopenharmony_ci 505762306a36Sopenharmony_ci drbd_thread_current_set_cpu(&connection->receiver); 505862306a36Sopenharmony_ci update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug); 505962306a36Sopenharmony_ci if (drbd_recv_header_maybe_unplug(connection, &pi)) 506062306a36Sopenharmony_ci goto err_out; 506162306a36Sopenharmony_ci 506262306a36Sopenharmony_ci cmd = &drbd_cmd_handler[pi.cmd]; 506362306a36Sopenharmony_ci if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) { 506462306a36Sopenharmony_ci drbd_err(connection, "Unexpected data packet %s (0x%04x)", 506562306a36Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 506662306a36Sopenharmony_ci goto err_out; 506762306a36Sopenharmony_ci } 506862306a36Sopenharmony_ci 506962306a36Sopenharmony_ci shs = cmd->pkt_size; 507062306a36Sopenharmony_ci if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME) 507162306a36Sopenharmony_ci shs += sizeof(struct o_qlim); 507262306a36Sopenharmony_ci if (pi.size > shs && !cmd->expect_payload) { 507362306a36Sopenharmony_ci drbd_err(connection, "No payload expected %s l:%d\n", 507462306a36Sopenharmony_ci cmdname(pi.cmd), pi.size); 507562306a36Sopenharmony_ci goto err_out; 507662306a36Sopenharmony_ci } 507762306a36Sopenharmony_ci if (pi.size < shs) { 507862306a36Sopenharmony_ci drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n", 507962306a36Sopenharmony_ci cmdname(pi.cmd), (int)shs, pi.size); 508062306a36Sopenharmony_ci goto err_out; 508162306a36Sopenharmony_ci } 508262306a36Sopenharmony_ci 508362306a36Sopenharmony_ci if (shs) { 508462306a36Sopenharmony_ci update_receiver_timing_details(connection, drbd_recv_all_warn); 508562306a36Sopenharmony_ci err = drbd_recv_all_warn(connection, pi.data, shs); 508662306a36Sopenharmony_ci if (err) 508762306a36Sopenharmony_ci goto err_out; 508862306a36Sopenharmony_ci pi.size -= shs; 508962306a36Sopenharmony_ci } 509062306a36Sopenharmony_ci 509162306a36Sopenharmony_ci update_receiver_timing_details(connection, cmd->fn); 509262306a36Sopenharmony_ci err = cmd->fn(connection, &pi); 509362306a36Sopenharmony_ci if (err) { 509462306a36Sopenharmony_ci drbd_err(connection, "error receiving %s, e: %d l: %d!\n", 509562306a36Sopenharmony_ci cmdname(pi.cmd), err, pi.size); 509662306a36Sopenharmony_ci goto err_out; 509762306a36Sopenharmony_ci } 509862306a36Sopenharmony_ci } 509962306a36Sopenharmony_ci return; 510062306a36Sopenharmony_ci 510162306a36Sopenharmony_ci err_out: 510262306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 510362306a36Sopenharmony_ci} 510462306a36Sopenharmony_ci 510562306a36Sopenharmony_cistatic void conn_disconnect(struct drbd_connection *connection) 510662306a36Sopenharmony_ci{ 510762306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 510862306a36Sopenharmony_ci enum drbd_conns oc; 510962306a36Sopenharmony_ci int vnr; 511062306a36Sopenharmony_ci 511162306a36Sopenharmony_ci if (connection->cstate == C_STANDALONE) 511262306a36Sopenharmony_ci return; 511362306a36Sopenharmony_ci 511462306a36Sopenharmony_ci /* We are about to start the cleanup after connection loss. 511562306a36Sopenharmony_ci * Make sure drbd_make_request knows about that. 511662306a36Sopenharmony_ci * Usually we should be in some network failure state already, 511762306a36Sopenharmony_ci * but just in case we are not, we fix it up here. 511862306a36Sopenharmony_ci */ 511962306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 512062306a36Sopenharmony_ci 512162306a36Sopenharmony_ci /* ack_receiver does not clean up anything. it must not interfere, either */ 512262306a36Sopenharmony_ci drbd_thread_stop(&connection->ack_receiver); 512362306a36Sopenharmony_ci if (connection->ack_sender) { 512462306a36Sopenharmony_ci destroy_workqueue(connection->ack_sender); 512562306a36Sopenharmony_ci connection->ack_sender = NULL; 512662306a36Sopenharmony_ci } 512762306a36Sopenharmony_ci drbd_free_sock(connection); 512862306a36Sopenharmony_ci 512962306a36Sopenharmony_ci rcu_read_lock(); 513062306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 513162306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 513262306a36Sopenharmony_ci kref_get(&device->kref); 513362306a36Sopenharmony_ci rcu_read_unlock(); 513462306a36Sopenharmony_ci drbd_disconnected(peer_device); 513562306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 513662306a36Sopenharmony_ci rcu_read_lock(); 513762306a36Sopenharmony_ci } 513862306a36Sopenharmony_ci rcu_read_unlock(); 513962306a36Sopenharmony_ci 514062306a36Sopenharmony_ci if (!list_empty(&connection->current_epoch->list)) 514162306a36Sopenharmony_ci drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n"); 514262306a36Sopenharmony_ci /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ 514362306a36Sopenharmony_ci atomic_set(&connection->current_epoch->epoch_size, 0); 514462306a36Sopenharmony_ci connection->send.seen_any_write_yet = false; 514562306a36Sopenharmony_ci 514662306a36Sopenharmony_ci drbd_info(connection, "Connection closed\n"); 514762306a36Sopenharmony_ci 514862306a36Sopenharmony_ci if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN) 514962306a36Sopenharmony_ci conn_try_outdate_peer_async(connection); 515062306a36Sopenharmony_ci 515162306a36Sopenharmony_ci spin_lock_irq(&connection->resource->req_lock); 515262306a36Sopenharmony_ci oc = connection->cstate; 515362306a36Sopenharmony_ci if (oc >= C_UNCONNECTED) 515462306a36Sopenharmony_ci _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); 515562306a36Sopenharmony_ci 515662306a36Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 515762306a36Sopenharmony_ci 515862306a36Sopenharmony_ci if (oc == C_DISCONNECTING) 515962306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); 516062306a36Sopenharmony_ci} 516162306a36Sopenharmony_ci 516262306a36Sopenharmony_cistatic int drbd_disconnected(struct drbd_peer_device *peer_device) 516362306a36Sopenharmony_ci{ 516462306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 516562306a36Sopenharmony_ci unsigned int i; 516662306a36Sopenharmony_ci 516762306a36Sopenharmony_ci /* wait for current activity to cease. */ 516862306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 516962306a36Sopenharmony_ci _drbd_wait_ee_list_empty(device, &device->active_ee); 517062306a36Sopenharmony_ci _drbd_wait_ee_list_empty(device, &device->sync_ee); 517162306a36Sopenharmony_ci _drbd_wait_ee_list_empty(device, &device->read_ee); 517262306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 517362306a36Sopenharmony_ci 517462306a36Sopenharmony_ci /* We do not have data structures that would allow us to 517562306a36Sopenharmony_ci * get the rs_pending_cnt down to 0 again. 517662306a36Sopenharmony_ci * * On C_SYNC_TARGET we do not have any data structures describing 517762306a36Sopenharmony_ci * the pending RSDataRequest's we have sent. 517862306a36Sopenharmony_ci * * On C_SYNC_SOURCE there is no data structure that tracks 517962306a36Sopenharmony_ci * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget. 518062306a36Sopenharmony_ci * And no, it is not the sum of the reference counts in the 518162306a36Sopenharmony_ci * resync_LRU. The resync_LRU tracks the whole operation including 518262306a36Sopenharmony_ci * the disk-IO, while the rs_pending_cnt only tracks the blocks 518362306a36Sopenharmony_ci * on the fly. */ 518462306a36Sopenharmony_ci drbd_rs_cancel_all(device); 518562306a36Sopenharmony_ci device->rs_total = 0; 518662306a36Sopenharmony_ci device->rs_failed = 0; 518762306a36Sopenharmony_ci atomic_set(&device->rs_pending_cnt, 0); 518862306a36Sopenharmony_ci wake_up(&device->misc_wait); 518962306a36Sopenharmony_ci 519062306a36Sopenharmony_ci del_timer_sync(&device->resync_timer); 519162306a36Sopenharmony_ci resync_timer_fn(&device->resync_timer); 519262306a36Sopenharmony_ci 519362306a36Sopenharmony_ci /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, 519462306a36Sopenharmony_ci * w_make_resync_request etc. which may still be on the worker queue 519562306a36Sopenharmony_ci * to be "canceled" */ 519662306a36Sopenharmony_ci drbd_flush_workqueue(&peer_device->connection->sender_work); 519762306a36Sopenharmony_ci 519862306a36Sopenharmony_ci drbd_finish_peer_reqs(device); 519962306a36Sopenharmony_ci 520062306a36Sopenharmony_ci /* This second workqueue flush is necessary, since drbd_finish_peer_reqs() 520162306a36Sopenharmony_ci might have issued a work again. The one before drbd_finish_peer_reqs() is 520262306a36Sopenharmony_ci necessary to reclain net_ee in drbd_finish_peer_reqs(). */ 520362306a36Sopenharmony_ci drbd_flush_workqueue(&peer_device->connection->sender_work); 520462306a36Sopenharmony_ci 520562306a36Sopenharmony_ci /* need to do it again, drbd_finish_peer_reqs() may have populated it 520662306a36Sopenharmony_ci * again via drbd_try_clear_on_disk_bm(). */ 520762306a36Sopenharmony_ci drbd_rs_cancel_all(device); 520862306a36Sopenharmony_ci 520962306a36Sopenharmony_ci kfree(device->p_uuid); 521062306a36Sopenharmony_ci device->p_uuid = NULL; 521162306a36Sopenharmony_ci 521262306a36Sopenharmony_ci if (!drbd_suspended(device)) 521362306a36Sopenharmony_ci tl_clear(peer_device->connection); 521462306a36Sopenharmony_ci 521562306a36Sopenharmony_ci drbd_md_sync(device); 521662306a36Sopenharmony_ci 521762306a36Sopenharmony_ci if (get_ldev(device)) { 521862306a36Sopenharmony_ci drbd_bitmap_io(device, &drbd_bm_write_copy_pages, 521962306a36Sopenharmony_ci "write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL); 522062306a36Sopenharmony_ci put_ldev(device); 522162306a36Sopenharmony_ci } 522262306a36Sopenharmony_ci 522362306a36Sopenharmony_ci /* tcp_close and release of sendpage pages can be deferred. I don't 522462306a36Sopenharmony_ci * want to use SO_LINGER, because apparently it can be deferred for 522562306a36Sopenharmony_ci * more than 20 seconds (longest time I checked). 522662306a36Sopenharmony_ci * 522762306a36Sopenharmony_ci * Actually we don't care for exactly when the network stack does its 522862306a36Sopenharmony_ci * put_page(), but release our reference on these pages right here. 522962306a36Sopenharmony_ci */ 523062306a36Sopenharmony_ci i = drbd_free_peer_reqs(device, &device->net_ee); 523162306a36Sopenharmony_ci if (i) 523262306a36Sopenharmony_ci drbd_info(device, "net_ee not empty, killed %u entries\n", i); 523362306a36Sopenharmony_ci i = atomic_read(&device->pp_in_use_by_net); 523462306a36Sopenharmony_ci if (i) 523562306a36Sopenharmony_ci drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); 523662306a36Sopenharmony_ci i = atomic_read(&device->pp_in_use); 523762306a36Sopenharmony_ci if (i) 523862306a36Sopenharmony_ci drbd_info(device, "pp_in_use = %d, expected 0\n", i); 523962306a36Sopenharmony_ci 524062306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->read_ee)); 524162306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->active_ee)); 524262306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->sync_ee)); 524362306a36Sopenharmony_ci D_ASSERT(device, list_empty(&device->done_ee)); 524462306a36Sopenharmony_ci 524562306a36Sopenharmony_ci return 0; 524662306a36Sopenharmony_ci} 524762306a36Sopenharmony_ci 524862306a36Sopenharmony_ci/* 524962306a36Sopenharmony_ci * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version 525062306a36Sopenharmony_ci * we can agree on is stored in agreed_pro_version. 525162306a36Sopenharmony_ci * 525262306a36Sopenharmony_ci * feature flags and the reserved array should be enough room for future 525362306a36Sopenharmony_ci * enhancements of the handshake protocol, and possible plugins... 525462306a36Sopenharmony_ci * 525562306a36Sopenharmony_ci * for now, they are expected to be zero, but ignored. 525662306a36Sopenharmony_ci */ 525762306a36Sopenharmony_cistatic int drbd_send_features(struct drbd_connection *connection) 525862306a36Sopenharmony_ci{ 525962306a36Sopenharmony_ci struct drbd_socket *sock; 526062306a36Sopenharmony_ci struct p_connection_features *p; 526162306a36Sopenharmony_ci 526262306a36Sopenharmony_ci sock = &connection->data; 526362306a36Sopenharmony_ci p = conn_prepare_command(connection, sock); 526462306a36Sopenharmony_ci if (!p) 526562306a36Sopenharmony_ci return -EIO; 526662306a36Sopenharmony_ci memset(p, 0, sizeof(*p)); 526762306a36Sopenharmony_ci p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); 526862306a36Sopenharmony_ci p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); 526962306a36Sopenharmony_ci p->feature_flags = cpu_to_be32(PRO_FEATURES); 527062306a36Sopenharmony_ci return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); 527162306a36Sopenharmony_ci} 527262306a36Sopenharmony_ci 527362306a36Sopenharmony_ci/* 527462306a36Sopenharmony_ci * return values: 527562306a36Sopenharmony_ci * 1 yes, we have a valid connection 527662306a36Sopenharmony_ci * 0 oops, did not work out, please try again 527762306a36Sopenharmony_ci * -1 peer talks different language, 527862306a36Sopenharmony_ci * no point in trying again, please go standalone. 527962306a36Sopenharmony_ci */ 528062306a36Sopenharmony_cistatic int drbd_do_features(struct drbd_connection *connection) 528162306a36Sopenharmony_ci{ 528262306a36Sopenharmony_ci /* ASSERT current == connection->receiver ... */ 528362306a36Sopenharmony_ci struct p_connection_features *p; 528462306a36Sopenharmony_ci const int expect = sizeof(struct p_connection_features); 528562306a36Sopenharmony_ci struct packet_info pi; 528662306a36Sopenharmony_ci int err; 528762306a36Sopenharmony_ci 528862306a36Sopenharmony_ci err = drbd_send_features(connection); 528962306a36Sopenharmony_ci if (err) 529062306a36Sopenharmony_ci return 0; 529162306a36Sopenharmony_ci 529262306a36Sopenharmony_ci err = drbd_recv_header(connection, &pi); 529362306a36Sopenharmony_ci if (err) 529462306a36Sopenharmony_ci return 0; 529562306a36Sopenharmony_ci 529662306a36Sopenharmony_ci if (pi.cmd != P_CONNECTION_FEATURES) { 529762306a36Sopenharmony_ci drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", 529862306a36Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 529962306a36Sopenharmony_ci return -1; 530062306a36Sopenharmony_ci } 530162306a36Sopenharmony_ci 530262306a36Sopenharmony_ci if (pi.size != expect) { 530362306a36Sopenharmony_ci drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n", 530462306a36Sopenharmony_ci expect, pi.size); 530562306a36Sopenharmony_ci return -1; 530662306a36Sopenharmony_ci } 530762306a36Sopenharmony_ci 530862306a36Sopenharmony_ci p = pi.data; 530962306a36Sopenharmony_ci err = drbd_recv_all_warn(connection, p, expect); 531062306a36Sopenharmony_ci if (err) 531162306a36Sopenharmony_ci return 0; 531262306a36Sopenharmony_ci 531362306a36Sopenharmony_ci p->protocol_min = be32_to_cpu(p->protocol_min); 531462306a36Sopenharmony_ci p->protocol_max = be32_to_cpu(p->protocol_max); 531562306a36Sopenharmony_ci if (p->protocol_max == 0) 531662306a36Sopenharmony_ci p->protocol_max = p->protocol_min; 531762306a36Sopenharmony_ci 531862306a36Sopenharmony_ci if (PRO_VERSION_MAX < p->protocol_min || 531962306a36Sopenharmony_ci PRO_VERSION_MIN > p->protocol_max) 532062306a36Sopenharmony_ci goto incompat; 532162306a36Sopenharmony_ci 532262306a36Sopenharmony_ci connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); 532362306a36Sopenharmony_ci connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags); 532462306a36Sopenharmony_ci 532562306a36Sopenharmony_ci drbd_info(connection, "Handshake successful: " 532662306a36Sopenharmony_ci "Agreed network protocol version %d\n", connection->agreed_pro_version); 532762306a36Sopenharmony_ci 532862306a36Sopenharmony_ci drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n", 532962306a36Sopenharmony_ci connection->agreed_features, 533062306a36Sopenharmony_ci connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "", 533162306a36Sopenharmony_ci connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "", 533262306a36Sopenharmony_ci connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "", 533362306a36Sopenharmony_ci connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" : 533462306a36Sopenharmony_ci connection->agreed_features ? "" : " none"); 533562306a36Sopenharmony_ci 533662306a36Sopenharmony_ci return 1; 533762306a36Sopenharmony_ci 533862306a36Sopenharmony_ci incompat: 533962306a36Sopenharmony_ci drbd_err(connection, "incompatible DRBD dialects: " 534062306a36Sopenharmony_ci "I support %d-%d, peer supports %d-%d\n", 534162306a36Sopenharmony_ci PRO_VERSION_MIN, PRO_VERSION_MAX, 534262306a36Sopenharmony_ci p->protocol_min, p->protocol_max); 534362306a36Sopenharmony_ci return -1; 534462306a36Sopenharmony_ci} 534562306a36Sopenharmony_ci 534662306a36Sopenharmony_ci#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) 534762306a36Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection) 534862306a36Sopenharmony_ci{ 534962306a36Sopenharmony_ci drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); 535062306a36Sopenharmony_ci drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); 535162306a36Sopenharmony_ci return -1; 535262306a36Sopenharmony_ci} 535362306a36Sopenharmony_ci#else 535462306a36Sopenharmony_ci#define CHALLENGE_LEN 64 535562306a36Sopenharmony_ci 535662306a36Sopenharmony_ci/* Return value: 535762306a36Sopenharmony_ci 1 - auth succeeded, 535862306a36Sopenharmony_ci 0 - failed, try again (network error), 535962306a36Sopenharmony_ci -1 - auth failed, don't try again. 536062306a36Sopenharmony_ci*/ 536162306a36Sopenharmony_ci 536262306a36Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection) 536362306a36Sopenharmony_ci{ 536462306a36Sopenharmony_ci struct drbd_socket *sock; 536562306a36Sopenharmony_ci char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ 536662306a36Sopenharmony_ci char *response = NULL; 536762306a36Sopenharmony_ci char *right_response = NULL; 536862306a36Sopenharmony_ci char *peers_ch = NULL; 536962306a36Sopenharmony_ci unsigned int key_len; 537062306a36Sopenharmony_ci char secret[SHARED_SECRET_MAX]; /* 64 byte */ 537162306a36Sopenharmony_ci unsigned int resp_size; 537262306a36Sopenharmony_ci struct shash_desc *desc; 537362306a36Sopenharmony_ci struct packet_info pi; 537462306a36Sopenharmony_ci struct net_conf *nc; 537562306a36Sopenharmony_ci int err, rv; 537662306a36Sopenharmony_ci 537762306a36Sopenharmony_ci /* FIXME: Put the challenge/response into the preallocated socket buffer. */ 537862306a36Sopenharmony_ci 537962306a36Sopenharmony_ci rcu_read_lock(); 538062306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 538162306a36Sopenharmony_ci key_len = strlen(nc->shared_secret); 538262306a36Sopenharmony_ci memcpy(secret, nc->shared_secret, key_len); 538362306a36Sopenharmony_ci rcu_read_unlock(); 538462306a36Sopenharmony_ci 538562306a36Sopenharmony_ci desc = kmalloc(sizeof(struct shash_desc) + 538662306a36Sopenharmony_ci crypto_shash_descsize(connection->cram_hmac_tfm), 538762306a36Sopenharmony_ci GFP_KERNEL); 538862306a36Sopenharmony_ci if (!desc) { 538962306a36Sopenharmony_ci rv = -1; 539062306a36Sopenharmony_ci goto fail; 539162306a36Sopenharmony_ci } 539262306a36Sopenharmony_ci desc->tfm = connection->cram_hmac_tfm; 539362306a36Sopenharmony_ci 539462306a36Sopenharmony_ci rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); 539562306a36Sopenharmony_ci if (rv) { 539662306a36Sopenharmony_ci drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv); 539762306a36Sopenharmony_ci rv = -1; 539862306a36Sopenharmony_ci goto fail; 539962306a36Sopenharmony_ci } 540062306a36Sopenharmony_ci 540162306a36Sopenharmony_ci get_random_bytes(my_challenge, CHALLENGE_LEN); 540262306a36Sopenharmony_ci 540362306a36Sopenharmony_ci sock = &connection->data; 540462306a36Sopenharmony_ci if (!conn_prepare_command(connection, sock)) { 540562306a36Sopenharmony_ci rv = 0; 540662306a36Sopenharmony_ci goto fail; 540762306a36Sopenharmony_ci } 540862306a36Sopenharmony_ci rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0, 540962306a36Sopenharmony_ci my_challenge, CHALLENGE_LEN); 541062306a36Sopenharmony_ci if (!rv) 541162306a36Sopenharmony_ci goto fail; 541262306a36Sopenharmony_ci 541362306a36Sopenharmony_ci err = drbd_recv_header(connection, &pi); 541462306a36Sopenharmony_ci if (err) { 541562306a36Sopenharmony_ci rv = 0; 541662306a36Sopenharmony_ci goto fail; 541762306a36Sopenharmony_ci } 541862306a36Sopenharmony_ci 541962306a36Sopenharmony_ci if (pi.cmd != P_AUTH_CHALLENGE) { 542062306a36Sopenharmony_ci drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n", 542162306a36Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 542262306a36Sopenharmony_ci rv = -1; 542362306a36Sopenharmony_ci goto fail; 542462306a36Sopenharmony_ci } 542562306a36Sopenharmony_ci 542662306a36Sopenharmony_ci if (pi.size > CHALLENGE_LEN * 2) { 542762306a36Sopenharmony_ci drbd_err(connection, "expected AuthChallenge payload too big.\n"); 542862306a36Sopenharmony_ci rv = -1; 542962306a36Sopenharmony_ci goto fail; 543062306a36Sopenharmony_ci } 543162306a36Sopenharmony_ci 543262306a36Sopenharmony_ci if (pi.size < CHALLENGE_LEN) { 543362306a36Sopenharmony_ci drbd_err(connection, "AuthChallenge payload too small.\n"); 543462306a36Sopenharmony_ci rv = -1; 543562306a36Sopenharmony_ci goto fail; 543662306a36Sopenharmony_ci } 543762306a36Sopenharmony_ci 543862306a36Sopenharmony_ci peers_ch = kmalloc(pi.size, GFP_NOIO); 543962306a36Sopenharmony_ci if (!peers_ch) { 544062306a36Sopenharmony_ci rv = -1; 544162306a36Sopenharmony_ci goto fail; 544262306a36Sopenharmony_ci } 544362306a36Sopenharmony_ci 544462306a36Sopenharmony_ci err = drbd_recv_all_warn(connection, peers_ch, pi.size); 544562306a36Sopenharmony_ci if (err) { 544662306a36Sopenharmony_ci rv = 0; 544762306a36Sopenharmony_ci goto fail; 544862306a36Sopenharmony_ci } 544962306a36Sopenharmony_ci 545062306a36Sopenharmony_ci if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) { 545162306a36Sopenharmony_ci drbd_err(connection, "Peer presented the same challenge!\n"); 545262306a36Sopenharmony_ci rv = -1; 545362306a36Sopenharmony_ci goto fail; 545462306a36Sopenharmony_ci } 545562306a36Sopenharmony_ci 545662306a36Sopenharmony_ci resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm); 545762306a36Sopenharmony_ci response = kmalloc(resp_size, GFP_NOIO); 545862306a36Sopenharmony_ci if (!response) { 545962306a36Sopenharmony_ci rv = -1; 546062306a36Sopenharmony_ci goto fail; 546162306a36Sopenharmony_ci } 546262306a36Sopenharmony_ci 546362306a36Sopenharmony_ci rv = crypto_shash_digest(desc, peers_ch, pi.size, response); 546462306a36Sopenharmony_ci if (rv) { 546562306a36Sopenharmony_ci drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 546662306a36Sopenharmony_ci rv = -1; 546762306a36Sopenharmony_ci goto fail; 546862306a36Sopenharmony_ci } 546962306a36Sopenharmony_ci 547062306a36Sopenharmony_ci if (!conn_prepare_command(connection, sock)) { 547162306a36Sopenharmony_ci rv = 0; 547262306a36Sopenharmony_ci goto fail; 547362306a36Sopenharmony_ci } 547462306a36Sopenharmony_ci rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0, 547562306a36Sopenharmony_ci response, resp_size); 547662306a36Sopenharmony_ci if (!rv) 547762306a36Sopenharmony_ci goto fail; 547862306a36Sopenharmony_ci 547962306a36Sopenharmony_ci err = drbd_recv_header(connection, &pi); 548062306a36Sopenharmony_ci if (err) { 548162306a36Sopenharmony_ci rv = 0; 548262306a36Sopenharmony_ci goto fail; 548362306a36Sopenharmony_ci } 548462306a36Sopenharmony_ci 548562306a36Sopenharmony_ci if (pi.cmd != P_AUTH_RESPONSE) { 548662306a36Sopenharmony_ci drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n", 548762306a36Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 548862306a36Sopenharmony_ci rv = 0; 548962306a36Sopenharmony_ci goto fail; 549062306a36Sopenharmony_ci } 549162306a36Sopenharmony_ci 549262306a36Sopenharmony_ci if (pi.size != resp_size) { 549362306a36Sopenharmony_ci drbd_err(connection, "expected AuthResponse payload of wrong size\n"); 549462306a36Sopenharmony_ci rv = 0; 549562306a36Sopenharmony_ci goto fail; 549662306a36Sopenharmony_ci } 549762306a36Sopenharmony_ci 549862306a36Sopenharmony_ci err = drbd_recv_all_warn(connection, response , resp_size); 549962306a36Sopenharmony_ci if (err) { 550062306a36Sopenharmony_ci rv = 0; 550162306a36Sopenharmony_ci goto fail; 550262306a36Sopenharmony_ci } 550362306a36Sopenharmony_ci 550462306a36Sopenharmony_ci right_response = kmalloc(resp_size, GFP_NOIO); 550562306a36Sopenharmony_ci if (!right_response) { 550662306a36Sopenharmony_ci rv = -1; 550762306a36Sopenharmony_ci goto fail; 550862306a36Sopenharmony_ci } 550962306a36Sopenharmony_ci 551062306a36Sopenharmony_ci rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN, 551162306a36Sopenharmony_ci right_response); 551262306a36Sopenharmony_ci if (rv) { 551362306a36Sopenharmony_ci drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 551462306a36Sopenharmony_ci rv = -1; 551562306a36Sopenharmony_ci goto fail; 551662306a36Sopenharmony_ci } 551762306a36Sopenharmony_ci 551862306a36Sopenharmony_ci rv = !memcmp(response, right_response, resp_size); 551962306a36Sopenharmony_ci 552062306a36Sopenharmony_ci if (rv) 552162306a36Sopenharmony_ci drbd_info(connection, "Peer authenticated using %d bytes HMAC\n", 552262306a36Sopenharmony_ci resp_size); 552362306a36Sopenharmony_ci else 552462306a36Sopenharmony_ci rv = -1; 552562306a36Sopenharmony_ci 552662306a36Sopenharmony_ci fail: 552762306a36Sopenharmony_ci kfree(peers_ch); 552862306a36Sopenharmony_ci kfree(response); 552962306a36Sopenharmony_ci kfree(right_response); 553062306a36Sopenharmony_ci if (desc) { 553162306a36Sopenharmony_ci shash_desc_zero(desc); 553262306a36Sopenharmony_ci kfree(desc); 553362306a36Sopenharmony_ci } 553462306a36Sopenharmony_ci 553562306a36Sopenharmony_ci return rv; 553662306a36Sopenharmony_ci} 553762306a36Sopenharmony_ci#endif 553862306a36Sopenharmony_ci 553962306a36Sopenharmony_ciint drbd_receiver(struct drbd_thread *thi) 554062306a36Sopenharmony_ci{ 554162306a36Sopenharmony_ci struct drbd_connection *connection = thi->connection; 554262306a36Sopenharmony_ci int h; 554362306a36Sopenharmony_ci 554462306a36Sopenharmony_ci drbd_info(connection, "receiver (re)started\n"); 554562306a36Sopenharmony_ci 554662306a36Sopenharmony_ci do { 554762306a36Sopenharmony_ci h = conn_connect(connection); 554862306a36Sopenharmony_ci if (h == 0) { 554962306a36Sopenharmony_ci conn_disconnect(connection); 555062306a36Sopenharmony_ci schedule_timeout_interruptible(HZ); 555162306a36Sopenharmony_ci } 555262306a36Sopenharmony_ci if (h == -1) { 555362306a36Sopenharmony_ci drbd_warn(connection, "Discarding network configuration.\n"); 555462306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 555562306a36Sopenharmony_ci } 555662306a36Sopenharmony_ci } while (h == 0); 555762306a36Sopenharmony_ci 555862306a36Sopenharmony_ci if (h > 0) { 555962306a36Sopenharmony_ci blk_start_plug(&connection->receiver_plug); 556062306a36Sopenharmony_ci drbdd(connection); 556162306a36Sopenharmony_ci blk_finish_plug(&connection->receiver_plug); 556262306a36Sopenharmony_ci } 556362306a36Sopenharmony_ci 556462306a36Sopenharmony_ci conn_disconnect(connection); 556562306a36Sopenharmony_ci 556662306a36Sopenharmony_ci drbd_info(connection, "receiver terminated\n"); 556762306a36Sopenharmony_ci return 0; 556862306a36Sopenharmony_ci} 556962306a36Sopenharmony_ci 557062306a36Sopenharmony_ci/* ********* acknowledge sender ******** */ 557162306a36Sopenharmony_ci 557262306a36Sopenharmony_cistatic int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 557362306a36Sopenharmony_ci{ 557462306a36Sopenharmony_ci struct p_req_state_reply *p = pi->data; 557562306a36Sopenharmony_ci int retcode = be32_to_cpu(p->retcode); 557662306a36Sopenharmony_ci 557762306a36Sopenharmony_ci if (retcode >= SS_SUCCESS) { 557862306a36Sopenharmony_ci set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags); 557962306a36Sopenharmony_ci } else { 558062306a36Sopenharmony_ci set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags); 558162306a36Sopenharmony_ci drbd_err(connection, "Requested state change failed by peer: %s (%d)\n", 558262306a36Sopenharmony_ci drbd_set_st_err_str(retcode), retcode); 558362306a36Sopenharmony_ci } 558462306a36Sopenharmony_ci wake_up(&connection->ping_wait); 558562306a36Sopenharmony_ci 558662306a36Sopenharmony_ci return 0; 558762306a36Sopenharmony_ci} 558862306a36Sopenharmony_ci 558962306a36Sopenharmony_cistatic int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 559062306a36Sopenharmony_ci{ 559162306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 559262306a36Sopenharmony_ci struct drbd_device *device; 559362306a36Sopenharmony_ci struct p_req_state_reply *p = pi->data; 559462306a36Sopenharmony_ci int retcode = be32_to_cpu(p->retcode); 559562306a36Sopenharmony_ci 559662306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 559762306a36Sopenharmony_ci if (!peer_device) 559862306a36Sopenharmony_ci return -EIO; 559962306a36Sopenharmony_ci device = peer_device->device; 560062306a36Sopenharmony_ci 560162306a36Sopenharmony_ci if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) { 560262306a36Sopenharmony_ci D_ASSERT(device, connection->agreed_pro_version < 100); 560362306a36Sopenharmony_ci return got_conn_RqSReply(connection, pi); 560462306a36Sopenharmony_ci } 560562306a36Sopenharmony_ci 560662306a36Sopenharmony_ci if (retcode >= SS_SUCCESS) { 560762306a36Sopenharmony_ci set_bit(CL_ST_CHG_SUCCESS, &device->flags); 560862306a36Sopenharmony_ci } else { 560962306a36Sopenharmony_ci set_bit(CL_ST_CHG_FAIL, &device->flags); 561062306a36Sopenharmony_ci drbd_err(device, "Requested state change failed by peer: %s (%d)\n", 561162306a36Sopenharmony_ci drbd_set_st_err_str(retcode), retcode); 561262306a36Sopenharmony_ci } 561362306a36Sopenharmony_ci wake_up(&device->state_wait); 561462306a36Sopenharmony_ci 561562306a36Sopenharmony_ci return 0; 561662306a36Sopenharmony_ci} 561762306a36Sopenharmony_ci 561862306a36Sopenharmony_cistatic int got_Ping(struct drbd_connection *connection, struct packet_info *pi) 561962306a36Sopenharmony_ci{ 562062306a36Sopenharmony_ci return drbd_send_ping_ack(connection); 562162306a36Sopenharmony_ci 562262306a36Sopenharmony_ci} 562362306a36Sopenharmony_ci 562462306a36Sopenharmony_cistatic int got_PingAck(struct drbd_connection *connection, struct packet_info *pi) 562562306a36Sopenharmony_ci{ 562662306a36Sopenharmony_ci /* restore idle timeout */ 562762306a36Sopenharmony_ci connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ; 562862306a36Sopenharmony_ci if (!test_and_set_bit(GOT_PING_ACK, &connection->flags)) 562962306a36Sopenharmony_ci wake_up(&connection->ping_wait); 563062306a36Sopenharmony_ci 563162306a36Sopenharmony_ci return 0; 563262306a36Sopenharmony_ci} 563362306a36Sopenharmony_ci 563462306a36Sopenharmony_cistatic int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi) 563562306a36Sopenharmony_ci{ 563662306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 563762306a36Sopenharmony_ci struct drbd_device *device; 563862306a36Sopenharmony_ci struct p_block_ack *p = pi->data; 563962306a36Sopenharmony_ci sector_t sector = be64_to_cpu(p->sector); 564062306a36Sopenharmony_ci int blksize = be32_to_cpu(p->blksize); 564162306a36Sopenharmony_ci 564262306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 564362306a36Sopenharmony_ci if (!peer_device) 564462306a36Sopenharmony_ci return -EIO; 564562306a36Sopenharmony_ci device = peer_device->device; 564662306a36Sopenharmony_ci 564762306a36Sopenharmony_ci D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 564862306a36Sopenharmony_ci 564962306a36Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 565062306a36Sopenharmony_ci 565162306a36Sopenharmony_ci if (get_ldev(device)) { 565262306a36Sopenharmony_ci drbd_rs_complete_io(device, sector); 565362306a36Sopenharmony_ci drbd_set_in_sync(peer_device, sector, blksize); 565462306a36Sopenharmony_ci /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ 565562306a36Sopenharmony_ci device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); 565662306a36Sopenharmony_ci put_ldev(device); 565762306a36Sopenharmony_ci } 565862306a36Sopenharmony_ci dec_rs_pending(peer_device); 565962306a36Sopenharmony_ci atomic_add(blksize >> 9, &device->rs_sect_in); 566062306a36Sopenharmony_ci 566162306a36Sopenharmony_ci return 0; 566262306a36Sopenharmony_ci} 566362306a36Sopenharmony_ci 566462306a36Sopenharmony_cistatic int 566562306a36Sopenharmony_civalidate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 566662306a36Sopenharmony_ci struct rb_root *root, const char *func, 566762306a36Sopenharmony_ci enum drbd_req_event what, bool missing_ok) 566862306a36Sopenharmony_ci{ 566962306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 567062306a36Sopenharmony_ci struct drbd_request *req; 567162306a36Sopenharmony_ci struct bio_and_error m; 567262306a36Sopenharmony_ci 567362306a36Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 567462306a36Sopenharmony_ci req = find_request(device, root, id, sector, missing_ok, func); 567562306a36Sopenharmony_ci if (unlikely(!req)) { 567662306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 567762306a36Sopenharmony_ci return -EIO; 567862306a36Sopenharmony_ci } 567962306a36Sopenharmony_ci __req_mod(req, what, peer_device, &m); 568062306a36Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 568162306a36Sopenharmony_ci 568262306a36Sopenharmony_ci if (m.bio) 568362306a36Sopenharmony_ci complete_master_bio(device, &m); 568462306a36Sopenharmony_ci return 0; 568562306a36Sopenharmony_ci} 568662306a36Sopenharmony_ci 568762306a36Sopenharmony_cistatic int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi) 568862306a36Sopenharmony_ci{ 568962306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 569062306a36Sopenharmony_ci struct drbd_device *device; 569162306a36Sopenharmony_ci struct p_block_ack *p = pi->data; 569262306a36Sopenharmony_ci sector_t sector = be64_to_cpu(p->sector); 569362306a36Sopenharmony_ci int blksize = be32_to_cpu(p->blksize); 569462306a36Sopenharmony_ci enum drbd_req_event what; 569562306a36Sopenharmony_ci 569662306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 569762306a36Sopenharmony_ci if (!peer_device) 569862306a36Sopenharmony_ci return -EIO; 569962306a36Sopenharmony_ci device = peer_device->device; 570062306a36Sopenharmony_ci 570162306a36Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 570262306a36Sopenharmony_ci 570362306a36Sopenharmony_ci if (p->block_id == ID_SYNCER) { 570462306a36Sopenharmony_ci drbd_set_in_sync(peer_device, sector, blksize); 570562306a36Sopenharmony_ci dec_rs_pending(peer_device); 570662306a36Sopenharmony_ci return 0; 570762306a36Sopenharmony_ci } 570862306a36Sopenharmony_ci switch (pi->cmd) { 570962306a36Sopenharmony_ci case P_RS_WRITE_ACK: 571062306a36Sopenharmony_ci what = WRITE_ACKED_BY_PEER_AND_SIS; 571162306a36Sopenharmony_ci break; 571262306a36Sopenharmony_ci case P_WRITE_ACK: 571362306a36Sopenharmony_ci what = WRITE_ACKED_BY_PEER; 571462306a36Sopenharmony_ci break; 571562306a36Sopenharmony_ci case P_RECV_ACK: 571662306a36Sopenharmony_ci what = RECV_ACKED_BY_PEER; 571762306a36Sopenharmony_ci break; 571862306a36Sopenharmony_ci case P_SUPERSEDED: 571962306a36Sopenharmony_ci what = CONFLICT_RESOLVED; 572062306a36Sopenharmony_ci break; 572162306a36Sopenharmony_ci case P_RETRY_WRITE: 572262306a36Sopenharmony_ci what = POSTPONE_WRITE; 572362306a36Sopenharmony_ci break; 572462306a36Sopenharmony_ci default: 572562306a36Sopenharmony_ci BUG(); 572662306a36Sopenharmony_ci } 572762306a36Sopenharmony_ci 572862306a36Sopenharmony_ci return validate_req_change_req_state(peer_device, p->block_id, sector, 572962306a36Sopenharmony_ci &device->write_requests, __func__, 573062306a36Sopenharmony_ci what, false); 573162306a36Sopenharmony_ci} 573262306a36Sopenharmony_ci 573362306a36Sopenharmony_cistatic int got_NegAck(struct drbd_connection *connection, struct packet_info *pi) 573462306a36Sopenharmony_ci{ 573562306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 573662306a36Sopenharmony_ci struct drbd_device *device; 573762306a36Sopenharmony_ci struct p_block_ack *p = pi->data; 573862306a36Sopenharmony_ci sector_t sector = be64_to_cpu(p->sector); 573962306a36Sopenharmony_ci int size = be32_to_cpu(p->blksize); 574062306a36Sopenharmony_ci int err; 574162306a36Sopenharmony_ci 574262306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 574362306a36Sopenharmony_ci if (!peer_device) 574462306a36Sopenharmony_ci return -EIO; 574562306a36Sopenharmony_ci device = peer_device->device; 574662306a36Sopenharmony_ci 574762306a36Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 574862306a36Sopenharmony_ci 574962306a36Sopenharmony_ci if (p->block_id == ID_SYNCER) { 575062306a36Sopenharmony_ci dec_rs_pending(peer_device); 575162306a36Sopenharmony_ci drbd_rs_failed_io(peer_device, sector, size); 575262306a36Sopenharmony_ci return 0; 575362306a36Sopenharmony_ci } 575462306a36Sopenharmony_ci 575562306a36Sopenharmony_ci err = validate_req_change_req_state(peer_device, p->block_id, sector, 575662306a36Sopenharmony_ci &device->write_requests, __func__, 575762306a36Sopenharmony_ci NEG_ACKED, true); 575862306a36Sopenharmony_ci if (err) { 575962306a36Sopenharmony_ci /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. 576062306a36Sopenharmony_ci The master bio might already be completed, therefore the 576162306a36Sopenharmony_ci request is no longer in the collision hash. */ 576262306a36Sopenharmony_ci /* In Protocol B we might already have got a P_RECV_ACK 576362306a36Sopenharmony_ci but then get a P_NEG_ACK afterwards. */ 576462306a36Sopenharmony_ci drbd_set_out_of_sync(peer_device, sector, size); 576562306a36Sopenharmony_ci } 576662306a36Sopenharmony_ci return 0; 576762306a36Sopenharmony_ci} 576862306a36Sopenharmony_ci 576962306a36Sopenharmony_cistatic int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi) 577062306a36Sopenharmony_ci{ 577162306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 577262306a36Sopenharmony_ci struct drbd_device *device; 577362306a36Sopenharmony_ci struct p_block_ack *p = pi->data; 577462306a36Sopenharmony_ci sector_t sector = be64_to_cpu(p->sector); 577562306a36Sopenharmony_ci 577662306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 577762306a36Sopenharmony_ci if (!peer_device) 577862306a36Sopenharmony_ci return -EIO; 577962306a36Sopenharmony_ci device = peer_device->device; 578062306a36Sopenharmony_ci 578162306a36Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 578262306a36Sopenharmony_ci 578362306a36Sopenharmony_ci drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", 578462306a36Sopenharmony_ci (unsigned long long)sector, be32_to_cpu(p->blksize)); 578562306a36Sopenharmony_ci 578662306a36Sopenharmony_ci return validate_req_change_req_state(peer_device, p->block_id, sector, 578762306a36Sopenharmony_ci &device->read_requests, __func__, 578862306a36Sopenharmony_ci NEG_ACKED, false); 578962306a36Sopenharmony_ci} 579062306a36Sopenharmony_ci 579162306a36Sopenharmony_cistatic int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi) 579262306a36Sopenharmony_ci{ 579362306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 579462306a36Sopenharmony_ci struct drbd_device *device; 579562306a36Sopenharmony_ci sector_t sector; 579662306a36Sopenharmony_ci int size; 579762306a36Sopenharmony_ci struct p_block_ack *p = pi->data; 579862306a36Sopenharmony_ci 579962306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 580062306a36Sopenharmony_ci if (!peer_device) 580162306a36Sopenharmony_ci return -EIO; 580262306a36Sopenharmony_ci device = peer_device->device; 580362306a36Sopenharmony_ci 580462306a36Sopenharmony_ci sector = be64_to_cpu(p->sector); 580562306a36Sopenharmony_ci size = be32_to_cpu(p->blksize); 580662306a36Sopenharmony_ci 580762306a36Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 580862306a36Sopenharmony_ci 580962306a36Sopenharmony_ci dec_rs_pending(peer_device); 581062306a36Sopenharmony_ci 581162306a36Sopenharmony_ci if (get_ldev_if_state(device, D_FAILED)) { 581262306a36Sopenharmony_ci drbd_rs_complete_io(device, sector); 581362306a36Sopenharmony_ci switch (pi->cmd) { 581462306a36Sopenharmony_ci case P_NEG_RS_DREPLY: 581562306a36Sopenharmony_ci drbd_rs_failed_io(peer_device, sector, size); 581662306a36Sopenharmony_ci break; 581762306a36Sopenharmony_ci case P_RS_CANCEL: 581862306a36Sopenharmony_ci break; 581962306a36Sopenharmony_ci default: 582062306a36Sopenharmony_ci BUG(); 582162306a36Sopenharmony_ci } 582262306a36Sopenharmony_ci put_ldev(device); 582362306a36Sopenharmony_ci } 582462306a36Sopenharmony_ci 582562306a36Sopenharmony_ci return 0; 582662306a36Sopenharmony_ci} 582762306a36Sopenharmony_ci 582862306a36Sopenharmony_cistatic int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi) 582962306a36Sopenharmony_ci{ 583062306a36Sopenharmony_ci struct p_barrier_ack *p = pi->data; 583162306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 583262306a36Sopenharmony_ci int vnr; 583362306a36Sopenharmony_ci 583462306a36Sopenharmony_ci tl_release(connection, p->barrier, be32_to_cpu(p->set_size)); 583562306a36Sopenharmony_ci 583662306a36Sopenharmony_ci rcu_read_lock(); 583762306a36Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 583862306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 583962306a36Sopenharmony_ci 584062306a36Sopenharmony_ci if (device->state.conn == C_AHEAD && 584162306a36Sopenharmony_ci atomic_read(&device->ap_in_flight) == 0 && 584262306a36Sopenharmony_ci !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) { 584362306a36Sopenharmony_ci device->start_resync_timer.expires = jiffies + HZ; 584462306a36Sopenharmony_ci add_timer(&device->start_resync_timer); 584562306a36Sopenharmony_ci } 584662306a36Sopenharmony_ci } 584762306a36Sopenharmony_ci rcu_read_unlock(); 584862306a36Sopenharmony_ci 584962306a36Sopenharmony_ci return 0; 585062306a36Sopenharmony_ci} 585162306a36Sopenharmony_ci 585262306a36Sopenharmony_cistatic int got_OVResult(struct drbd_connection *connection, struct packet_info *pi) 585362306a36Sopenharmony_ci{ 585462306a36Sopenharmony_ci struct drbd_peer_device *peer_device; 585562306a36Sopenharmony_ci struct drbd_device *device; 585662306a36Sopenharmony_ci struct p_block_ack *p = pi->data; 585762306a36Sopenharmony_ci struct drbd_device_work *dw; 585862306a36Sopenharmony_ci sector_t sector; 585962306a36Sopenharmony_ci int size; 586062306a36Sopenharmony_ci 586162306a36Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 586262306a36Sopenharmony_ci if (!peer_device) 586362306a36Sopenharmony_ci return -EIO; 586462306a36Sopenharmony_ci device = peer_device->device; 586562306a36Sopenharmony_ci 586662306a36Sopenharmony_ci sector = be64_to_cpu(p->sector); 586762306a36Sopenharmony_ci size = be32_to_cpu(p->blksize); 586862306a36Sopenharmony_ci 586962306a36Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 587062306a36Sopenharmony_ci 587162306a36Sopenharmony_ci if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) 587262306a36Sopenharmony_ci drbd_ov_out_of_sync_found(peer_device, sector, size); 587362306a36Sopenharmony_ci else 587462306a36Sopenharmony_ci ov_out_of_sync_print(peer_device); 587562306a36Sopenharmony_ci 587662306a36Sopenharmony_ci if (!get_ldev(device)) 587762306a36Sopenharmony_ci return 0; 587862306a36Sopenharmony_ci 587962306a36Sopenharmony_ci drbd_rs_complete_io(device, sector); 588062306a36Sopenharmony_ci dec_rs_pending(peer_device); 588162306a36Sopenharmony_ci 588262306a36Sopenharmony_ci --device->ov_left; 588362306a36Sopenharmony_ci 588462306a36Sopenharmony_ci /* let's advance progress step marks only for every other megabyte */ 588562306a36Sopenharmony_ci if ((device->ov_left & 0x200) == 0x200) 588662306a36Sopenharmony_ci drbd_advance_rs_marks(peer_device, device->ov_left); 588762306a36Sopenharmony_ci 588862306a36Sopenharmony_ci if (device->ov_left == 0) { 588962306a36Sopenharmony_ci dw = kmalloc(sizeof(*dw), GFP_NOIO); 589062306a36Sopenharmony_ci if (dw) { 589162306a36Sopenharmony_ci dw->w.cb = w_ov_finished; 589262306a36Sopenharmony_ci dw->device = device; 589362306a36Sopenharmony_ci drbd_queue_work(&peer_device->connection->sender_work, &dw->w); 589462306a36Sopenharmony_ci } else { 589562306a36Sopenharmony_ci drbd_err(device, "kmalloc(dw) failed."); 589662306a36Sopenharmony_ci ov_out_of_sync_print(peer_device); 589762306a36Sopenharmony_ci drbd_resync_finished(peer_device); 589862306a36Sopenharmony_ci } 589962306a36Sopenharmony_ci } 590062306a36Sopenharmony_ci put_ldev(device); 590162306a36Sopenharmony_ci return 0; 590262306a36Sopenharmony_ci} 590362306a36Sopenharmony_ci 590462306a36Sopenharmony_cistatic int got_skip(struct drbd_connection *connection, struct packet_info *pi) 590562306a36Sopenharmony_ci{ 590662306a36Sopenharmony_ci return 0; 590762306a36Sopenharmony_ci} 590862306a36Sopenharmony_ci 590962306a36Sopenharmony_cistruct meta_sock_cmd { 591062306a36Sopenharmony_ci size_t pkt_size; 591162306a36Sopenharmony_ci int (*fn)(struct drbd_connection *connection, struct packet_info *); 591262306a36Sopenharmony_ci}; 591362306a36Sopenharmony_ci 591462306a36Sopenharmony_cistatic void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout) 591562306a36Sopenharmony_ci{ 591662306a36Sopenharmony_ci long t; 591762306a36Sopenharmony_ci struct net_conf *nc; 591862306a36Sopenharmony_ci 591962306a36Sopenharmony_ci rcu_read_lock(); 592062306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 592162306a36Sopenharmony_ci t = ping_timeout ? nc->ping_timeo : nc->ping_int; 592262306a36Sopenharmony_ci rcu_read_unlock(); 592362306a36Sopenharmony_ci 592462306a36Sopenharmony_ci t *= HZ; 592562306a36Sopenharmony_ci if (ping_timeout) 592662306a36Sopenharmony_ci t /= 10; 592762306a36Sopenharmony_ci 592862306a36Sopenharmony_ci connection->meta.socket->sk->sk_rcvtimeo = t; 592962306a36Sopenharmony_ci} 593062306a36Sopenharmony_ci 593162306a36Sopenharmony_cistatic void set_ping_timeout(struct drbd_connection *connection) 593262306a36Sopenharmony_ci{ 593362306a36Sopenharmony_ci set_rcvtimeo(connection, 1); 593462306a36Sopenharmony_ci} 593562306a36Sopenharmony_ci 593662306a36Sopenharmony_cistatic void set_idle_timeout(struct drbd_connection *connection) 593762306a36Sopenharmony_ci{ 593862306a36Sopenharmony_ci set_rcvtimeo(connection, 0); 593962306a36Sopenharmony_ci} 594062306a36Sopenharmony_ci 594162306a36Sopenharmony_cistatic struct meta_sock_cmd ack_receiver_tbl[] = { 594262306a36Sopenharmony_ci [P_PING] = { 0, got_Ping }, 594362306a36Sopenharmony_ci [P_PING_ACK] = { 0, got_PingAck }, 594462306a36Sopenharmony_ci [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 594562306a36Sopenharmony_ci [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 594662306a36Sopenharmony_ci [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 594762306a36Sopenharmony_ci [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck }, 594862306a36Sopenharmony_ci [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, 594962306a36Sopenharmony_ci [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, 595062306a36Sopenharmony_ci [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply }, 595162306a36Sopenharmony_ci [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, 595262306a36Sopenharmony_ci [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, 595362306a36Sopenharmony_ci [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 595462306a36Sopenharmony_ci [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 595562306a36Sopenharmony_ci [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, 595662306a36Sopenharmony_ci [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply }, 595762306a36Sopenharmony_ci [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply }, 595862306a36Sopenharmony_ci [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, 595962306a36Sopenharmony_ci}; 596062306a36Sopenharmony_ci 596162306a36Sopenharmony_ciint drbd_ack_receiver(struct drbd_thread *thi) 596262306a36Sopenharmony_ci{ 596362306a36Sopenharmony_ci struct drbd_connection *connection = thi->connection; 596462306a36Sopenharmony_ci struct meta_sock_cmd *cmd = NULL; 596562306a36Sopenharmony_ci struct packet_info pi; 596662306a36Sopenharmony_ci unsigned long pre_recv_jif; 596762306a36Sopenharmony_ci int rv; 596862306a36Sopenharmony_ci void *buf = connection->meta.rbuf; 596962306a36Sopenharmony_ci int received = 0; 597062306a36Sopenharmony_ci unsigned int header_size = drbd_header_size(connection); 597162306a36Sopenharmony_ci int expect = header_size; 597262306a36Sopenharmony_ci bool ping_timeout_active = false; 597362306a36Sopenharmony_ci 597462306a36Sopenharmony_ci sched_set_fifo_low(current); 597562306a36Sopenharmony_ci 597662306a36Sopenharmony_ci while (get_t_state(thi) == RUNNING) { 597762306a36Sopenharmony_ci drbd_thread_current_set_cpu(thi); 597862306a36Sopenharmony_ci 597962306a36Sopenharmony_ci conn_reclaim_net_peer_reqs(connection); 598062306a36Sopenharmony_ci 598162306a36Sopenharmony_ci if (test_and_clear_bit(SEND_PING, &connection->flags)) { 598262306a36Sopenharmony_ci if (drbd_send_ping(connection)) { 598362306a36Sopenharmony_ci drbd_err(connection, "drbd_send_ping has failed\n"); 598462306a36Sopenharmony_ci goto reconnect; 598562306a36Sopenharmony_ci } 598662306a36Sopenharmony_ci set_ping_timeout(connection); 598762306a36Sopenharmony_ci ping_timeout_active = true; 598862306a36Sopenharmony_ci } 598962306a36Sopenharmony_ci 599062306a36Sopenharmony_ci pre_recv_jif = jiffies; 599162306a36Sopenharmony_ci rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0); 599262306a36Sopenharmony_ci 599362306a36Sopenharmony_ci /* Note: 599462306a36Sopenharmony_ci * -EINTR (on meta) we got a signal 599562306a36Sopenharmony_ci * -EAGAIN (on meta) rcvtimeo expired 599662306a36Sopenharmony_ci * -ECONNRESET other side closed the connection 599762306a36Sopenharmony_ci * -ERESTARTSYS (on data) we got a signal 599862306a36Sopenharmony_ci * rv < 0 other than above: unexpected error! 599962306a36Sopenharmony_ci * rv == expected: full header or command 600062306a36Sopenharmony_ci * rv < expected: "woken" by signal during receive 600162306a36Sopenharmony_ci * rv == 0 : "connection shut down by peer" 600262306a36Sopenharmony_ci */ 600362306a36Sopenharmony_ci if (likely(rv > 0)) { 600462306a36Sopenharmony_ci received += rv; 600562306a36Sopenharmony_ci buf += rv; 600662306a36Sopenharmony_ci } else if (rv == 0) { 600762306a36Sopenharmony_ci if (test_bit(DISCONNECT_SENT, &connection->flags)) { 600862306a36Sopenharmony_ci long t; 600962306a36Sopenharmony_ci rcu_read_lock(); 601062306a36Sopenharmony_ci t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 601162306a36Sopenharmony_ci rcu_read_unlock(); 601262306a36Sopenharmony_ci 601362306a36Sopenharmony_ci t = wait_event_timeout(connection->ping_wait, 601462306a36Sopenharmony_ci connection->cstate < C_WF_REPORT_PARAMS, 601562306a36Sopenharmony_ci t); 601662306a36Sopenharmony_ci if (t) 601762306a36Sopenharmony_ci break; 601862306a36Sopenharmony_ci } 601962306a36Sopenharmony_ci drbd_err(connection, "meta connection shut down by peer.\n"); 602062306a36Sopenharmony_ci goto reconnect; 602162306a36Sopenharmony_ci } else if (rv == -EAGAIN) { 602262306a36Sopenharmony_ci /* If the data socket received something meanwhile, 602362306a36Sopenharmony_ci * that is good enough: peer is still alive. */ 602462306a36Sopenharmony_ci if (time_after(connection->last_received, pre_recv_jif)) 602562306a36Sopenharmony_ci continue; 602662306a36Sopenharmony_ci if (ping_timeout_active) { 602762306a36Sopenharmony_ci drbd_err(connection, "PingAck did not arrive in time.\n"); 602862306a36Sopenharmony_ci goto reconnect; 602962306a36Sopenharmony_ci } 603062306a36Sopenharmony_ci set_bit(SEND_PING, &connection->flags); 603162306a36Sopenharmony_ci continue; 603262306a36Sopenharmony_ci } else if (rv == -EINTR) { 603362306a36Sopenharmony_ci /* maybe drbd_thread_stop(): the while condition will notice. 603462306a36Sopenharmony_ci * maybe woken for send_ping: we'll send a ping above, 603562306a36Sopenharmony_ci * and change the rcvtimeo */ 603662306a36Sopenharmony_ci flush_signals(current); 603762306a36Sopenharmony_ci continue; 603862306a36Sopenharmony_ci } else { 603962306a36Sopenharmony_ci drbd_err(connection, "sock_recvmsg returned %d\n", rv); 604062306a36Sopenharmony_ci goto reconnect; 604162306a36Sopenharmony_ci } 604262306a36Sopenharmony_ci 604362306a36Sopenharmony_ci if (received == expect && cmd == NULL) { 604462306a36Sopenharmony_ci if (decode_header(connection, connection->meta.rbuf, &pi)) 604562306a36Sopenharmony_ci goto reconnect; 604662306a36Sopenharmony_ci cmd = &ack_receiver_tbl[pi.cmd]; 604762306a36Sopenharmony_ci if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) { 604862306a36Sopenharmony_ci drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n", 604962306a36Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 605062306a36Sopenharmony_ci goto disconnect; 605162306a36Sopenharmony_ci } 605262306a36Sopenharmony_ci expect = header_size + cmd->pkt_size; 605362306a36Sopenharmony_ci if (pi.size != expect - header_size) { 605462306a36Sopenharmony_ci drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n", 605562306a36Sopenharmony_ci pi.cmd, pi.size); 605662306a36Sopenharmony_ci goto reconnect; 605762306a36Sopenharmony_ci } 605862306a36Sopenharmony_ci } 605962306a36Sopenharmony_ci if (received == expect) { 606062306a36Sopenharmony_ci bool err; 606162306a36Sopenharmony_ci 606262306a36Sopenharmony_ci err = cmd->fn(connection, &pi); 606362306a36Sopenharmony_ci if (err) { 606462306a36Sopenharmony_ci drbd_err(connection, "%ps failed\n", cmd->fn); 606562306a36Sopenharmony_ci goto reconnect; 606662306a36Sopenharmony_ci } 606762306a36Sopenharmony_ci 606862306a36Sopenharmony_ci connection->last_received = jiffies; 606962306a36Sopenharmony_ci 607062306a36Sopenharmony_ci if (cmd == &ack_receiver_tbl[P_PING_ACK]) { 607162306a36Sopenharmony_ci set_idle_timeout(connection); 607262306a36Sopenharmony_ci ping_timeout_active = false; 607362306a36Sopenharmony_ci } 607462306a36Sopenharmony_ci 607562306a36Sopenharmony_ci buf = connection->meta.rbuf; 607662306a36Sopenharmony_ci received = 0; 607762306a36Sopenharmony_ci expect = header_size; 607862306a36Sopenharmony_ci cmd = NULL; 607962306a36Sopenharmony_ci } 608062306a36Sopenharmony_ci } 608162306a36Sopenharmony_ci 608262306a36Sopenharmony_ci if (0) { 608362306a36Sopenharmony_cireconnect: 608462306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 608562306a36Sopenharmony_ci conn_md_sync(connection); 608662306a36Sopenharmony_ci } 608762306a36Sopenharmony_ci if (0) { 608862306a36Sopenharmony_cidisconnect: 608962306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 609062306a36Sopenharmony_ci } 609162306a36Sopenharmony_ci 609262306a36Sopenharmony_ci drbd_info(connection, "ack_receiver terminated\n"); 609362306a36Sopenharmony_ci 609462306a36Sopenharmony_ci return 0; 609562306a36Sopenharmony_ci} 609662306a36Sopenharmony_ci 609762306a36Sopenharmony_civoid drbd_send_acks_wf(struct work_struct *ws) 609862306a36Sopenharmony_ci{ 609962306a36Sopenharmony_ci struct drbd_peer_device *peer_device = 610062306a36Sopenharmony_ci container_of(ws, struct drbd_peer_device, send_acks_work); 610162306a36Sopenharmony_ci struct drbd_connection *connection = peer_device->connection; 610262306a36Sopenharmony_ci struct drbd_device *device = peer_device->device; 610362306a36Sopenharmony_ci struct net_conf *nc; 610462306a36Sopenharmony_ci int tcp_cork, err; 610562306a36Sopenharmony_ci 610662306a36Sopenharmony_ci rcu_read_lock(); 610762306a36Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 610862306a36Sopenharmony_ci tcp_cork = nc->tcp_cork; 610962306a36Sopenharmony_ci rcu_read_unlock(); 611062306a36Sopenharmony_ci 611162306a36Sopenharmony_ci if (tcp_cork) 611262306a36Sopenharmony_ci tcp_sock_set_cork(connection->meta.socket->sk, true); 611362306a36Sopenharmony_ci 611462306a36Sopenharmony_ci err = drbd_finish_peer_reqs(device); 611562306a36Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 611662306a36Sopenharmony_ci /* get is in drbd_endio_write_sec_final(). That is necessary to keep the 611762306a36Sopenharmony_ci struct work_struct send_acks_work alive, which is in the peer_device object */ 611862306a36Sopenharmony_ci 611962306a36Sopenharmony_ci if (err) { 612062306a36Sopenharmony_ci conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 612162306a36Sopenharmony_ci return; 612262306a36Sopenharmony_ci } 612362306a36Sopenharmony_ci 612462306a36Sopenharmony_ci if (tcp_cork) 612562306a36Sopenharmony_ci tcp_sock_set_cork(connection->meta.socket->sk, false); 612662306a36Sopenharmony_ci 612762306a36Sopenharmony_ci return; 612862306a36Sopenharmony_ci} 6129