18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci drbd_receiver.c 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 88c2ecf20Sopenharmony_ci Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 98c2ecf20Sopenharmony_ci Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <linux/module.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 178c2ecf20Sopenharmony_ci#include <net/sock.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#include <linux/drbd.h> 208c2ecf20Sopenharmony_ci#include <linux/fs.h> 218c2ecf20Sopenharmony_ci#include <linux/file.h> 228c2ecf20Sopenharmony_ci#include <linux/in.h> 238c2ecf20Sopenharmony_ci#include <linux/mm.h> 248c2ecf20Sopenharmony_ci#include <linux/memcontrol.h> 258c2ecf20Sopenharmony_ci#include <linux/mm_inline.h> 268c2ecf20Sopenharmony_ci#include <linux/slab.h> 278c2ecf20Sopenharmony_ci#include <uapi/linux/sched/types.h> 288c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 298c2ecf20Sopenharmony_ci#include <linux/pkt_sched.h> 308c2ecf20Sopenharmony_ci#define __KERNEL_SYSCALLS__ 318c2ecf20Sopenharmony_ci#include <linux/unistd.h> 328c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 338c2ecf20Sopenharmony_ci#include <linux/random.h> 348c2ecf20Sopenharmony_ci#include <linux/string.h> 358c2ecf20Sopenharmony_ci#include <linux/scatterlist.h> 368c2ecf20Sopenharmony_ci#include <linux/part_stat.h> 378c2ecf20Sopenharmony_ci#include "drbd_int.h" 388c2ecf20Sopenharmony_ci#include "drbd_protocol.h" 398c2ecf20Sopenharmony_ci#include "drbd_req.h" 408c2ecf20Sopenharmony_ci#include "drbd_vli.h" 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES) 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_cistruct packet_info { 458c2ecf20Sopenharmony_ci enum drbd_packet cmd; 468c2ecf20Sopenharmony_ci unsigned int size; 478c2ecf20Sopenharmony_ci unsigned int vnr; 488c2ecf20Sopenharmony_ci void *data; 498c2ecf20Sopenharmony_ci}; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cienum finish_epoch { 528c2ecf20Sopenharmony_ci FE_STILL_LIVE, 538c2ecf20Sopenharmony_ci FE_DESTROYED, 548c2ecf20Sopenharmony_ci FE_RECYCLED, 558c2ecf20Sopenharmony_ci}; 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_cistatic int drbd_do_features(struct drbd_connection *connection); 588c2ecf20Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection); 598c2ecf20Sopenharmony_cistatic int drbd_disconnected(struct drbd_peer_device *); 608c2ecf20Sopenharmony_cistatic void conn_wait_active_ee_empty(struct drbd_connection *connection); 618c2ecf20Sopenharmony_cistatic enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); 628c2ecf20Sopenharmony_cistatic int e_end_block(struct drbd_work *, int); 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci/* 688c2ecf20Sopenharmony_ci * some helper functions to deal with single linked page lists, 698c2ecf20Sopenharmony_ci * page->private being our "next" pointer. 708c2ecf20Sopenharmony_ci */ 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci/* If at least n pages are linked at head, get n pages off. 738c2ecf20Sopenharmony_ci * Otherwise, don't modify head, and return NULL. 748c2ecf20Sopenharmony_ci * Locking is the responsibility of the caller. 758c2ecf20Sopenharmony_ci */ 768c2ecf20Sopenharmony_cistatic struct page *page_chain_del(struct page **head, int n) 778c2ecf20Sopenharmony_ci{ 788c2ecf20Sopenharmony_ci struct page *page; 798c2ecf20Sopenharmony_ci struct page *tmp; 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci BUG_ON(!n); 828c2ecf20Sopenharmony_ci BUG_ON(!head); 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci page = *head; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci if (!page) 878c2ecf20Sopenharmony_ci return NULL; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci while (page) { 908c2ecf20Sopenharmony_ci tmp = page_chain_next(page); 918c2ecf20Sopenharmony_ci if (--n == 0) 928c2ecf20Sopenharmony_ci break; /* found sufficient pages */ 938c2ecf20Sopenharmony_ci if (tmp == NULL) 948c2ecf20Sopenharmony_ci /* insufficient pages, don't use any of them. */ 958c2ecf20Sopenharmony_ci return NULL; 968c2ecf20Sopenharmony_ci page = tmp; 978c2ecf20Sopenharmony_ci } 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci /* add end of list marker for the returned list */ 1008c2ecf20Sopenharmony_ci set_page_private(page, 0); 1018c2ecf20Sopenharmony_ci /* actual return value, and adjustment of head */ 1028c2ecf20Sopenharmony_ci page = *head; 1038c2ecf20Sopenharmony_ci *head = tmp; 1048c2ecf20Sopenharmony_ci return page; 1058c2ecf20Sopenharmony_ci} 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci/* may be used outside of locks to find the tail of a (usually short) 1088c2ecf20Sopenharmony_ci * "private" page chain, before adding it back to a global chain head 1098c2ecf20Sopenharmony_ci * with page_chain_add() under a spinlock. */ 1108c2ecf20Sopenharmony_cistatic struct page *page_chain_tail(struct page *page, int *len) 1118c2ecf20Sopenharmony_ci{ 1128c2ecf20Sopenharmony_ci struct page *tmp; 1138c2ecf20Sopenharmony_ci int i = 1; 1148c2ecf20Sopenharmony_ci while ((tmp = page_chain_next(page))) 1158c2ecf20Sopenharmony_ci ++i, page = tmp; 1168c2ecf20Sopenharmony_ci if (len) 1178c2ecf20Sopenharmony_ci *len = i; 1188c2ecf20Sopenharmony_ci return page; 1198c2ecf20Sopenharmony_ci} 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_cistatic int page_chain_free(struct page *page) 1228c2ecf20Sopenharmony_ci{ 1238c2ecf20Sopenharmony_ci struct page *tmp; 1248c2ecf20Sopenharmony_ci int i = 0; 1258c2ecf20Sopenharmony_ci page_chain_for_each_safe(page, tmp) { 1268c2ecf20Sopenharmony_ci put_page(page); 1278c2ecf20Sopenharmony_ci ++i; 1288c2ecf20Sopenharmony_ci } 1298c2ecf20Sopenharmony_ci return i; 1308c2ecf20Sopenharmony_ci} 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_cistatic void page_chain_add(struct page **head, 1338c2ecf20Sopenharmony_ci struct page *chain_first, struct page *chain_last) 1348c2ecf20Sopenharmony_ci{ 1358c2ecf20Sopenharmony_ci#if 1 1368c2ecf20Sopenharmony_ci struct page *tmp; 1378c2ecf20Sopenharmony_ci tmp = page_chain_tail(chain_first, NULL); 1388c2ecf20Sopenharmony_ci BUG_ON(tmp != chain_last); 1398c2ecf20Sopenharmony_ci#endif 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci /* add chain to head */ 1428c2ecf20Sopenharmony_ci set_page_private(chain_last, (unsigned long)*head); 1438c2ecf20Sopenharmony_ci *head = chain_first; 1448c2ecf20Sopenharmony_ci} 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_cistatic struct page *__drbd_alloc_pages(struct drbd_device *device, 1478c2ecf20Sopenharmony_ci unsigned int number) 1488c2ecf20Sopenharmony_ci{ 1498c2ecf20Sopenharmony_ci struct page *page = NULL; 1508c2ecf20Sopenharmony_ci struct page *tmp = NULL; 1518c2ecf20Sopenharmony_ci unsigned int i = 0; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci /* Yes, testing drbd_pp_vacant outside the lock is racy. 1548c2ecf20Sopenharmony_ci * So what. It saves a spin_lock. */ 1558c2ecf20Sopenharmony_ci if (drbd_pp_vacant >= number) { 1568c2ecf20Sopenharmony_ci spin_lock(&drbd_pp_lock); 1578c2ecf20Sopenharmony_ci page = page_chain_del(&drbd_pp_pool, number); 1588c2ecf20Sopenharmony_ci if (page) 1598c2ecf20Sopenharmony_ci drbd_pp_vacant -= number; 1608c2ecf20Sopenharmony_ci spin_unlock(&drbd_pp_lock); 1618c2ecf20Sopenharmony_ci if (page) 1628c2ecf20Sopenharmony_ci return page; 1638c2ecf20Sopenharmony_ci } 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD 1668c2ecf20Sopenharmony_ci * "criss-cross" setup, that might cause write-out on some other DRBD, 1678c2ecf20Sopenharmony_ci * which in turn might block on the other node at this very place. */ 1688c2ecf20Sopenharmony_ci for (i = 0; i < number; i++) { 1698c2ecf20Sopenharmony_ci tmp = alloc_page(GFP_TRY); 1708c2ecf20Sopenharmony_ci if (!tmp) 1718c2ecf20Sopenharmony_ci break; 1728c2ecf20Sopenharmony_ci set_page_private(tmp, (unsigned long)page); 1738c2ecf20Sopenharmony_ci page = tmp; 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci if (i == number) 1778c2ecf20Sopenharmony_ci return page; 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci /* Not enough pages immediately available this time. 1808c2ecf20Sopenharmony_ci * No need to jump around here, drbd_alloc_pages will retry this 1818c2ecf20Sopenharmony_ci * function "soon". */ 1828c2ecf20Sopenharmony_ci if (page) { 1838c2ecf20Sopenharmony_ci tmp = page_chain_tail(page, NULL); 1848c2ecf20Sopenharmony_ci spin_lock(&drbd_pp_lock); 1858c2ecf20Sopenharmony_ci page_chain_add(&drbd_pp_pool, page, tmp); 1868c2ecf20Sopenharmony_ci drbd_pp_vacant += i; 1878c2ecf20Sopenharmony_ci spin_unlock(&drbd_pp_lock); 1888c2ecf20Sopenharmony_ci } 1898c2ecf20Sopenharmony_ci return NULL; 1908c2ecf20Sopenharmony_ci} 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_cistatic void reclaim_finished_net_peer_reqs(struct drbd_device *device, 1938c2ecf20Sopenharmony_ci struct list_head *to_be_freed) 1948c2ecf20Sopenharmony_ci{ 1958c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req, *tmp; 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci /* The EEs are always appended to the end of the list. Since 1988c2ecf20Sopenharmony_ci they are sent in order over the wire, they have to finish 1998c2ecf20Sopenharmony_ci in order. As soon as we see the first not finished we can 2008c2ecf20Sopenharmony_ci stop to examine the list... */ 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { 2038c2ecf20Sopenharmony_ci if (drbd_peer_req_has_active_page(peer_req)) 2048c2ecf20Sopenharmony_ci break; 2058c2ecf20Sopenharmony_ci list_move(&peer_req->w.list, to_be_freed); 2068c2ecf20Sopenharmony_ci } 2078c2ecf20Sopenharmony_ci} 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_cistatic void drbd_reclaim_net_peer_reqs(struct drbd_device *device) 2108c2ecf20Sopenharmony_ci{ 2118c2ecf20Sopenharmony_ci LIST_HEAD(reclaimed); 2128c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req, *t; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 2158c2ecf20Sopenharmony_ci reclaim_finished_net_peer_reqs(device, &reclaimed); 2168c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 2178c2ecf20Sopenharmony_ci list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 2188c2ecf20Sopenharmony_ci drbd_free_net_peer_req(device, peer_req); 2198c2ecf20Sopenharmony_ci} 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_cistatic void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) 2228c2ecf20Sopenharmony_ci{ 2238c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 2248c2ecf20Sopenharmony_ci int vnr; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci rcu_read_lock(); 2278c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2288c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 2298c2ecf20Sopenharmony_ci if (!atomic_read(&device->pp_in_use_by_net)) 2308c2ecf20Sopenharmony_ci continue; 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci kref_get(&device->kref); 2338c2ecf20Sopenharmony_ci rcu_read_unlock(); 2348c2ecf20Sopenharmony_ci drbd_reclaim_net_peer_reqs(device); 2358c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 2368c2ecf20Sopenharmony_ci rcu_read_lock(); 2378c2ecf20Sopenharmony_ci } 2388c2ecf20Sopenharmony_ci rcu_read_unlock(); 2398c2ecf20Sopenharmony_ci} 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci/** 2428c2ecf20Sopenharmony_ci * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) 2438c2ecf20Sopenharmony_ci * @device: DRBD device. 2448c2ecf20Sopenharmony_ci * @number: number of pages requested 2458c2ecf20Sopenharmony_ci * @retry: whether to retry, if not enough pages are available right now 2468c2ecf20Sopenharmony_ci * 2478c2ecf20Sopenharmony_ci * Tries to allocate number pages, first from our own page pool, then from 2488c2ecf20Sopenharmony_ci * the kernel. 2498c2ecf20Sopenharmony_ci * Possibly retry until DRBD frees sufficient pages somewhere else. 2508c2ecf20Sopenharmony_ci * 2518c2ecf20Sopenharmony_ci * If this allocation would exceed the max_buffers setting, we throttle 2528c2ecf20Sopenharmony_ci * allocation (schedule_timeout) to give the system some room to breathe. 2538c2ecf20Sopenharmony_ci * 2548c2ecf20Sopenharmony_ci * We do not use max-buffers as hard limit, because it could lead to 2558c2ecf20Sopenharmony_ci * congestion and further to a distributed deadlock during online-verify or 2568c2ecf20Sopenharmony_ci * (checksum based) resync, if the max-buffers, socket buffer sizes and 2578c2ecf20Sopenharmony_ci * resync-rate settings are mis-configured. 2588c2ecf20Sopenharmony_ci * 2598c2ecf20Sopenharmony_ci * Returns a page chain linked via page->private. 2608c2ecf20Sopenharmony_ci */ 2618c2ecf20Sopenharmony_cistruct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, 2628c2ecf20Sopenharmony_ci bool retry) 2638c2ecf20Sopenharmony_ci{ 2648c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 2658c2ecf20Sopenharmony_ci struct page *page = NULL; 2668c2ecf20Sopenharmony_ci struct net_conf *nc; 2678c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 2688c2ecf20Sopenharmony_ci unsigned int mxb; 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci rcu_read_lock(); 2718c2ecf20Sopenharmony_ci nc = rcu_dereference(peer_device->connection->net_conf); 2728c2ecf20Sopenharmony_ci mxb = nc ? nc->max_buffers : 1000000; 2738c2ecf20Sopenharmony_ci rcu_read_unlock(); 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci if (atomic_read(&device->pp_in_use) < mxb) 2768c2ecf20Sopenharmony_ci page = __drbd_alloc_pages(device, number); 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci /* Try to keep the fast path fast, but occasionally we need 2798c2ecf20Sopenharmony_ci * to reclaim the pages we lended to the network stack. */ 2808c2ecf20Sopenharmony_ci if (page && atomic_read(&device->pp_in_use_by_net) > 512) 2818c2ecf20Sopenharmony_ci drbd_reclaim_net_peer_reqs(device); 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci while (page == NULL) { 2848c2ecf20Sopenharmony_ci prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci drbd_reclaim_net_peer_reqs(device); 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci if (atomic_read(&device->pp_in_use) < mxb) { 2898c2ecf20Sopenharmony_ci page = __drbd_alloc_pages(device, number); 2908c2ecf20Sopenharmony_ci if (page) 2918c2ecf20Sopenharmony_ci break; 2928c2ecf20Sopenharmony_ci } 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci if (!retry) 2958c2ecf20Sopenharmony_ci break; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci if (signal_pending(current)) { 2988c2ecf20Sopenharmony_ci drbd_warn(device, "drbd_alloc_pages interrupted!\n"); 2998c2ecf20Sopenharmony_ci break; 3008c2ecf20Sopenharmony_ci } 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci if (schedule_timeout(HZ/10) == 0) 3038c2ecf20Sopenharmony_ci mxb = UINT_MAX; 3048c2ecf20Sopenharmony_ci } 3058c2ecf20Sopenharmony_ci finish_wait(&drbd_pp_wait, &wait); 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci if (page) 3088c2ecf20Sopenharmony_ci atomic_add(number, &device->pp_in_use); 3098c2ecf20Sopenharmony_ci return page; 3108c2ecf20Sopenharmony_ci} 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages. 3138c2ecf20Sopenharmony_ci * Is also used from inside an other spin_lock_irq(&resource->req_lock); 3148c2ecf20Sopenharmony_ci * Either links the page chain back to the global pool, 3158c2ecf20Sopenharmony_ci * or returns all pages to the system. */ 3168c2ecf20Sopenharmony_cistatic void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) 3178c2ecf20Sopenharmony_ci{ 3188c2ecf20Sopenharmony_ci atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; 3198c2ecf20Sopenharmony_ci int i; 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci if (page == NULL) 3228c2ecf20Sopenharmony_ci return; 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count) 3258c2ecf20Sopenharmony_ci i = page_chain_free(page); 3268c2ecf20Sopenharmony_ci else { 3278c2ecf20Sopenharmony_ci struct page *tmp; 3288c2ecf20Sopenharmony_ci tmp = page_chain_tail(page, &i); 3298c2ecf20Sopenharmony_ci spin_lock(&drbd_pp_lock); 3308c2ecf20Sopenharmony_ci page_chain_add(&drbd_pp_pool, page, tmp); 3318c2ecf20Sopenharmony_ci drbd_pp_vacant += i; 3328c2ecf20Sopenharmony_ci spin_unlock(&drbd_pp_lock); 3338c2ecf20Sopenharmony_ci } 3348c2ecf20Sopenharmony_ci i = atomic_sub_return(i, a); 3358c2ecf20Sopenharmony_ci if (i < 0) 3368c2ecf20Sopenharmony_ci drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", 3378c2ecf20Sopenharmony_ci is_net ? "pp_in_use_by_net" : "pp_in_use", i); 3388c2ecf20Sopenharmony_ci wake_up(&drbd_pp_wait); 3398c2ecf20Sopenharmony_ci} 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci/* 3428c2ecf20Sopenharmony_ciYou need to hold the req_lock: 3438c2ecf20Sopenharmony_ci _drbd_wait_ee_list_empty() 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ciYou must not have the req_lock: 3468c2ecf20Sopenharmony_ci drbd_free_peer_req() 3478c2ecf20Sopenharmony_ci drbd_alloc_peer_req() 3488c2ecf20Sopenharmony_ci drbd_free_peer_reqs() 3498c2ecf20Sopenharmony_ci drbd_ee_fix_bhs() 3508c2ecf20Sopenharmony_ci drbd_finish_peer_reqs() 3518c2ecf20Sopenharmony_ci drbd_clear_done_ee() 3528c2ecf20Sopenharmony_ci drbd_wait_ee_list_empty() 3538c2ecf20Sopenharmony_ci*/ 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci/* normal: payload_size == request size (bi_size) 3568c2ecf20Sopenharmony_ci * w_same: payload_size == logical_block_size 3578c2ecf20Sopenharmony_ci * trim: payload_size == 0 */ 3588c2ecf20Sopenharmony_cistruct drbd_peer_request * 3598c2ecf20Sopenharmony_cidrbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 3608c2ecf20Sopenharmony_ci unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local) 3618c2ecf20Sopenharmony_ci{ 3628c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 3638c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req; 3648c2ecf20Sopenharmony_ci struct page *page = NULL; 3658c2ecf20Sopenharmony_ci unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT; 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) 3688c2ecf20Sopenharmony_ci return NULL; 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); 3718c2ecf20Sopenharmony_ci if (!peer_req) { 3728c2ecf20Sopenharmony_ci if (!(gfp_mask & __GFP_NOWARN)) 3738c2ecf20Sopenharmony_ci drbd_err(device, "%s: allocation failed\n", __func__); 3748c2ecf20Sopenharmony_ci return NULL; 3758c2ecf20Sopenharmony_ci } 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci if (nr_pages) { 3788c2ecf20Sopenharmony_ci page = drbd_alloc_pages(peer_device, nr_pages, 3798c2ecf20Sopenharmony_ci gfpflags_allow_blocking(gfp_mask)); 3808c2ecf20Sopenharmony_ci if (!page) 3818c2ecf20Sopenharmony_ci goto fail; 3828c2ecf20Sopenharmony_ci } 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci memset(peer_req, 0, sizeof(*peer_req)); 3858c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&peer_req->w.list); 3868c2ecf20Sopenharmony_ci drbd_clear_interval(&peer_req->i); 3878c2ecf20Sopenharmony_ci peer_req->i.size = request_size; 3888c2ecf20Sopenharmony_ci peer_req->i.sector = sector; 3898c2ecf20Sopenharmony_ci peer_req->submit_jif = jiffies; 3908c2ecf20Sopenharmony_ci peer_req->peer_device = peer_device; 3918c2ecf20Sopenharmony_ci peer_req->pages = page; 3928c2ecf20Sopenharmony_ci /* 3938c2ecf20Sopenharmony_ci * The block_id is opaque to the receiver. It is not endianness 3948c2ecf20Sopenharmony_ci * converted, and sent back to the sender unchanged. 3958c2ecf20Sopenharmony_ci */ 3968c2ecf20Sopenharmony_ci peer_req->block_id = id; 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci return peer_req; 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci fail: 4018c2ecf20Sopenharmony_ci mempool_free(peer_req, &drbd_ee_mempool); 4028c2ecf20Sopenharmony_ci return NULL; 4038c2ecf20Sopenharmony_ci} 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_civoid __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, 4068c2ecf20Sopenharmony_ci int is_net) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci might_sleep(); 4098c2ecf20Sopenharmony_ci if (peer_req->flags & EE_HAS_DIGEST) 4108c2ecf20Sopenharmony_ci kfree(peer_req->digest); 4118c2ecf20Sopenharmony_ci drbd_free_pages(device, peer_req->pages, is_net); 4128c2ecf20Sopenharmony_ci D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); 4138c2ecf20Sopenharmony_ci D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 4148c2ecf20Sopenharmony_ci if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { 4158c2ecf20Sopenharmony_ci peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 4168c2ecf20Sopenharmony_ci drbd_al_complete_io(device, &peer_req->i); 4178c2ecf20Sopenharmony_ci } 4188c2ecf20Sopenharmony_ci mempool_free(peer_req, &drbd_ee_mempool); 4198c2ecf20Sopenharmony_ci} 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ciint drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) 4228c2ecf20Sopenharmony_ci{ 4238c2ecf20Sopenharmony_ci LIST_HEAD(work_list); 4248c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req, *t; 4258c2ecf20Sopenharmony_ci int count = 0; 4268c2ecf20Sopenharmony_ci int is_net = list == &device->net_ee; 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 4298c2ecf20Sopenharmony_ci list_splice_init(list, &work_list); 4308c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 4338c2ecf20Sopenharmony_ci __drbd_free_peer_req(device, peer_req, is_net); 4348c2ecf20Sopenharmony_ci count++; 4358c2ecf20Sopenharmony_ci } 4368c2ecf20Sopenharmony_ci return count; 4378c2ecf20Sopenharmony_ci} 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci/* 4408c2ecf20Sopenharmony_ci * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier. 4418c2ecf20Sopenharmony_ci */ 4428c2ecf20Sopenharmony_cistatic int drbd_finish_peer_reqs(struct drbd_device *device) 4438c2ecf20Sopenharmony_ci{ 4448c2ecf20Sopenharmony_ci LIST_HEAD(work_list); 4458c2ecf20Sopenharmony_ci LIST_HEAD(reclaimed); 4468c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req, *t; 4478c2ecf20Sopenharmony_ci int err = 0; 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 4508c2ecf20Sopenharmony_ci reclaim_finished_net_peer_reqs(device, &reclaimed); 4518c2ecf20Sopenharmony_ci list_splice_init(&device->done_ee, &work_list); 4528c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 4558c2ecf20Sopenharmony_ci drbd_free_net_peer_req(device, peer_req); 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci /* possible callbacks here: 4588c2ecf20Sopenharmony_ci * e_end_block, and e_end_resync_block, e_send_superseded. 4598c2ecf20Sopenharmony_ci * all ignore the last argument. 4608c2ecf20Sopenharmony_ci */ 4618c2ecf20Sopenharmony_ci list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 4628c2ecf20Sopenharmony_ci int err2; 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci /* list_del not necessary, next/prev members not touched */ 4658c2ecf20Sopenharmony_ci err2 = peer_req->w.cb(&peer_req->w, !!err); 4668c2ecf20Sopenharmony_ci if (!err) 4678c2ecf20Sopenharmony_ci err = err2; 4688c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 4698c2ecf20Sopenharmony_ci } 4708c2ecf20Sopenharmony_ci wake_up(&device->ee_wait); 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci return err; 4738c2ecf20Sopenharmony_ci} 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_cistatic void _drbd_wait_ee_list_empty(struct drbd_device *device, 4768c2ecf20Sopenharmony_ci struct list_head *head) 4778c2ecf20Sopenharmony_ci{ 4788c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ci /* avoids spin_lock/unlock 4818c2ecf20Sopenharmony_ci * and calling prepare_to_wait in the fast path */ 4828c2ecf20Sopenharmony_ci while (!list_empty(head)) { 4838c2ecf20Sopenharmony_ci prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE); 4848c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 4858c2ecf20Sopenharmony_ci io_schedule(); 4868c2ecf20Sopenharmony_ci finish_wait(&device->ee_wait, &wait); 4878c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 4888c2ecf20Sopenharmony_ci } 4898c2ecf20Sopenharmony_ci} 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_cistatic void drbd_wait_ee_list_empty(struct drbd_device *device, 4928c2ecf20Sopenharmony_ci struct list_head *head) 4938c2ecf20Sopenharmony_ci{ 4948c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 4958c2ecf20Sopenharmony_ci _drbd_wait_ee_list_empty(device, head); 4968c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 4978c2ecf20Sopenharmony_ci} 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_cistatic int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) 5008c2ecf20Sopenharmony_ci{ 5018c2ecf20Sopenharmony_ci struct kvec iov = { 5028c2ecf20Sopenharmony_ci .iov_base = buf, 5038c2ecf20Sopenharmony_ci .iov_len = size, 5048c2ecf20Sopenharmony_ci }; 5058c2ecf20Sopenharmony_ci struct msghdr msg = { 5068c2ecf20Sopenharmony_ci .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) 5078c2ecf20Sopenharmony_ci }; 5088c2ecf20Sopenharmony_ci iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size); 5098c2ecf20Sopenharmony_ci return sock_recvmsg(sock, &msg, msg.msg_flags); 5108c2ecf20Sopenharmony_ci} 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_cistatic int drbd_recv(struct drbd_connection *connection, void *buf, size_t size) 5138c2ecf20Sopenharmony_ci{ 5148c2ecf20Sopenharmony_ci int rv; 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci rv = drbd_recv_short(connection->data.socket, buf, size, 0); 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci if (rv < 0) { 5198c2ecf20Sopenharmony_ci if (rv == -ECONNRESET) 5208c2ecf20Sopenharmony_ci drbd_info(connection, "sock was reset by peer\n"); 5218c2ecf20Sopenharmony_ci else if (rv != -ERESTARTSYS) 5228c2ecf20Sopenharmony_ci drbd_err(connection, "sock_recvmsg returned %d\n", rv); 5238c2ecf20Sopenharmony_ci } else if (rv == 0) { 5248c2ecf20Sopenharmony_ci if (test_bit(DISCONNECT_SENT, &connection->flags)) { 5258c2ecf20Sopenharmony_ci long t; 5268c2ecf20Sopenharmony_ci rcu_read_lock(); 5278c2ecf20Sopenharmony_ci t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 5288c2ecf20Sopenharmony_ci rcu_read_unlock(); 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t); 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci if (t) 5338c2ecf20Sopenharmony_ci goto out; 5348c2ecf20Sopenharmony_ci } 5358c2ecf20Sopenharmony_ci drbd_info(connection, "sock was shut down by peer\n"); 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci if (rv != size) 5398c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD); 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ciout: 5428c2ecf20Sopenharmony_ci return rv; 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_cistatic int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size) 5468c2ecf20Sopenharmony_ci{ 5478c2ecf20Sopenharmony_ci int err; 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci err = drbd_recv(connection, buf, size); 5508c2ecf20Sopenharmony_ci if (err != size) { 5518c2ecf20Sopenharmony_ci if (err >= 0) 5528c2ecf20Sopenharmony_ci err = -EIO; 5538c2ecf20Sopenharmony_ci } else 5548c2ecf20Sopenharmony_ci err = 0; 5558c2ecf20Sopenharmony_ci return err; 5568c2ecf20Sopenharmony_ci} 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_cistatic int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size) 5598c2ecf20Sopenharmony_ci{ 5608c2ecf20Sopenharmony_ci int err; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci err = drbd_recv_all(connection, buf, size); 5638c2ecf20Sopenharmony_ci if (err && !signal_pending(current)) 5648c2ecf20Sopenharmony_ci drbd_warn(connection, "short read (expected size %d)\n", (int)size); 5658c2ecf20Sopenharmony_ci return err; 5668c2ecf20Sopenharmony_ci} 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci/* quoting tcp(7): 5698c2ecf20Sopenharmony_ci * On individual connections, the socket buffer size must be set prior to the 5708c2ecf20Sopenharmony_ci * listen(2) or connect(2) calls in order to have it take effect. 5718c2ecf20Sopenharmony_ci * This is our wrapper to do so. 5728c2ecf20Sopenharmony_ci */ 5738c2ecf20Sopenharmony_cistatic void drbd_setbufsize(struct socket *sock, unsigned int snd, 5748c2ecf20Sopenharmony_ci unsigned int rcv) 5758c2ecf20Sopenharmony_ci{ 5768c2ecf20Sopenharmony_ci /* open coded SO_SNDBUF, SO_RCVBUF */ 5778c2ecf20Sopenharmony_ci if (snd) { 5788c2ecf20Sopenharmony_ci sock->sk->sk_sndbuf = snd; 5798c2ecf20Sopenharmony_ci sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 5808c2ecf20Sopenharmony_ci } 5818c2ecf20Sopenharmony_ci if (rcv) { 5828c2ecf20Sopenharmony_ci sock->sk->sk_rcvbuf = rcv; 5838c2ecf20Sopenharmony_ci sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 5848c2ecf20Sopenharmony_ci } 5858c2ecf20Sopenharmony_ci} 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_cistatic struct socket *drbd_try_connect(struct drbd_connection *connection) 5888c2ecf20Sopenharmony_ci{ 5898c2ecf20Sopenharmony_ci const char *what; 5908c2ecf20Sopenharmony_ci struct socket *sock; 5918c2ecf20Sopenharmony_ci struct sockaddr_in6 src_in6; 5928c2ecf20Sopenharmony_ci struct sockaddr_in6 peer_in6; 5938c2ecf20Sopenharmony_ci struct net_conf *nc; 5948c2ecf20Sopenharmony_ci int err, peer_addr_len, my_addr_len; 5958c2ecf20Sopenharmony_ci int sndbuf_size, rcvbuf_size, connect_int; 5968c2ecf20Sopenharmony_ci int disconnect_on_error = 1; 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci rcu_read_lock(); 5998c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 6008c2ecf20Sopenharmony_ci if (!nc) { 6018c2ecf20Sopenharmony_ci rcu_read_unlock(); 6028c2ecf20Sopenharmony_ci return NULL; 6038c2ecf20Sopenharmony_ci } 6048c2ecf20Sopenharmony_ci sndbuf_size = nc->sndbuf_size; 6058c2ecf20Sopenharmony_ci rcvbuf_size = nc->rcvbuf_size; 6068c2ecf20Sopenharmony_ci connect_int = nc->connect_int; 6078c2ecf20Sopenharmony_ci rcu_read_unlock(); 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ci my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6)); 6108c2ecf20Sopenharmony_ci memcpy(&src_in6, &connection->my_addr, my_addr_len); 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6) 6138c2ecf20Sopenharmony_ci src_in6.sin6_port = 0; 6148c2ecf20Sopenharmony_ci else 6158c2ecf20Sopenharmony_ci ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6)); 6188c2ecf20Sopenharmony_ci memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci what = "sock_create_kern"; 6218c2ecf20Sopenharmony_ci err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family, 6228c2ecf20Sopenharmony_ci SOCK_STREAM, IPPROTO_TCP, &sock); 6238c2ecf20Sopenharmony_ci if (err < 0) { 6248c2ecf20Sopenharmony_ci sock = NULL; 6258c2ecf20Sopenharmony_ci goto out; 6268c2ecf20Sopenharmony_ci } 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci sock->sk->sk_rcvtimeo = 6298c2ecf20Sopenharmony_ci sock->sk->sk_sndtimeo = connect_int * HZ; 6308c2ecf20Sopenharmony_ci drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci /* explicitly bind to the configured IP as source IP 6338c2ecf20Sopenharmony_ci * for the outgoing connections. 6348c2ecf20Sopenharmony_ci * This is needed for multihomed hosts and to be 6358c2ecf20Sopenharmony_ci * able to use lo: interfaces for drbd. 6368c2ecf20Sopenharmony_ci * Make sure to use 0 as port number, so linux selects 6378c2ecf20Sopenharmony_ci * a free one dynamically. 6388c2ecf20Sopenharmony_ci */ 6398c2ecf20Sopenharmony_ci what = "bind before connect"; 6408c2ecf20Sopenharmony_ci err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len); 6418c2ecf20Sopenharmony_ci if (err < 0) 6428c2ecf20Sopenharmony_ci goto out; 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci /* connect may fail, peer not yet available. 6458c2ecf20Sopenharmony_ci * stay C_WF_CONNECTION, don't go Disconnecting! */ 6468c2ecf20Sopenharmony_ci disconnect_on_error = 0; 6478c2ecf20Sopenharmony_ci what = "connect"; 6488c2ecf20Sopenharmony_ci err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0); 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ciout: 6518c2ecf20Sopenharmony_ci if (err < 0) { 6528c2ecf20Sopenharmony_ci if (sock) { 6538c2ecf20Sopenharmony_ci sock_release(sock); 6548c2ecf20Sopenharmony_ci sock = NULL; 6558c2ecf20Sopenharmony_ci } 6568c2ecf20Sopenharmony_ci switch (-err) { 6578c2ecf20Sopenharmony_ci /* timeout, busy, signal pending */ 6588c2ecf20Sopenharmony_ci case ETIMEDOUT: case EAGAIN: case EINPROGRESS: 6598c2ecf20Sopenharmony_ci case EINTR: case ERESTARTSYS: 6608c2ecf20Sopenharmony_ci /* peer not (yet) available, network problem */ 6618c2ecf20Sopenharmony_ci case ECONNREFUSED: case ENETUNREACH: 6628c2ecf20Sopenharmony_ci case EHOSTDOWN: case EHOSTUNREACH: 6638c2ecf20Sopenharmony_ci disconnect_on_error = 0; 6648c2ecf20Sopenharmony_ci break; 6658c2ecf20Sopenharmony_ci default: 6668c2ecf20Sopenharmony_ci drbd_err(connection, "%s failed, err = %d\n", what, err); 6678c2ecf20Sopenharmony_ci } 6688c2ecf20Sopenharmony_ci if (disconnect_on_error) 6698c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 6708c2ecf20Sopenharmony_ci } 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci return sock; 6738c2ecf20Sopenharmony_ci} 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_cistruct accept_wait_data { 6768c2ecf20Sopenharmony_ci struct drbd_connection *connection; 6778c2ecf20Sopenharmony_ci struct socket *s_listen; 6788c2ecf20Sopenharmony_ci struct completion door_bell; 6798c2ecf20Sopenharmony_ci void (*original_sk_state_change)(struct sock *sk); 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci}; 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_cistatic void drbd_incoming_connection(struct sock *sk) 6848c2ecf20Sopenharmony_ci{ 6858c2ecf20Sopenharmony_ci struct accept_wait_data *ad = sk->sk_user_data; 6868c2ecf20Sopenharmony_ci void (*state_change)(struct sock *sk); 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci state_change = ad->original_sk_state_change; 6898c2ecf20Sopenharmony_ci if (sk->sk_state == TCP_ESTABLISHED) 6908c2ecf20Sopenharmony_ci complete(&ad->door_bell); 6918c2ecf20Sopenharmony_ci state_change(sk); 6928c2ecf20Sopenharmony_ci} 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_cistatic int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad) 6958c2ecf20Sopenharmony_ci{ 6968c2ecf20Sopenharmony_ci int err, sndbuf_size, rcvbuf_size, my_addr_len; 6978c2ecf20Sopenharmony_ci struct sockaddr_in6 my_addr; 6988c2ecf20Sopenharmony_ci struct socket *s_listen; 6998c2ecf20Sopenharmony_ci struct net_conf *nc; 7008c2ecf20Sopenharmony_ci const char *what; 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci rcu_read_lock(); 7038c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 7048c2ecf20Sopenharmony_ci if (!nc) { 7058c2ecf20Sopenharmony_ci rcu_read_unlock(); 7068c2ecf20Sopenharmony_ci return -EIO; 7078c2ecf20Sopenharmony_ci } 7088c2ecf20Sopenharmony_ci sndbuf_size = nc->sndbuf_size; 7098c2ecf20Sopenharmony_ci rcvbuf_size = nc->rcvbuf_size; 7108c2ecf20Sopenharmony_ci rcu_read_unlock(); 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6)); 7138c2ecf20Sopenharmony_ci memcpy(&my_addr, &connection->my_addr, my_addr_len); 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ci what = "sock_create_kern"; 7168c2ecf20Sopenharmony_ci err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family, 7178c2ecf20Sopenharmony_ci SOCK_STREAM, IPPROTO_TCP, &s_listen); 7188c2ecf20Sopenharmony_ci if (err) { 7198c2ecf20Sopenharmony_ci s_listen = NULL; 7208c2ecf20Sopenharmony_ci goto out; 7218c2ecf20Sopenharmony_ci } 7228c2ecf20Sopenharmony_ci 7238c2ecf20Sopenharmony_ci s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 7248c2ecf20Sopenharmony_ci drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size); 7258c2ecf20Sopenharmony_ci 7268c2ecf20Sopenharmony_ci what = "bind before listen"; 7278c2ecf20Sopenharmony_ci err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len); 7288c2ecf20Sopenharmony_ci if (err < 0) 7298c2ecf20Sopenharmony_ci goto out; 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_ci ad->s_listen = s_listen; 7328c2ecf20Sopenharmony_ci write_lock_bh(&s_listen->sk->sk_callback_lock); 7338c2ecf20Sopenharmony_ci ad->original_sk_state_change = s_listen->sk->sk_state_change; 7348c2ecf20Sopenharmony_ci s_listen->sk->sk_state_change = drbd_incoming_connection; 7358c2ecf20Sopenharmony_ci s_listen->sk->sk_user_data = ad; 7368c2ecf20Sopenharmony_ci write_unlock_bh(&s_listen->sk->sk_callback_lock); 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci what = "listen"; 7398c2ecf20Sopenharmony_ci err = s_listen->ops->listen(s_listen, 5); 7408c2ecf20Sopenharmony_ci if (err < 0) 7418c2ecf20Sopenharmony_ci goto out; 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_ci return 0; 7448c2ecf20Sopenharmony_ciout: 7458c2ecf20Sopenharmony_ci if (s_listen) 7468c2ecf20Sopenharmony_ci sock_release(s_listen); 7478c2ecf20Sopenharmony_ci if (err < 0) { 7488c2ecf20Sopenharmony_ci if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 7498c2ecf20Sopenharmony_ci drbd_err(connection, "%s failed, err = %d\n", what, err); 7508c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 7518c2ecf20Sopenharmony_ci } 7528c2ecf20Sopenharmony_ci } 7538c2ecf20Sopenharmony_ci 7548c2ecf20Sopenharmony_ci return -EIO; 7558c2ecf20Sopenharmony_ci} 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_cistatic void unregister_state_change(struct sock *sk, struct accept_wait_data *ad) 7588c2ecf20Sopenharmony_ci{ 7598c2ecf20Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 7608c2ecf20Sopenharmony_ci sk->sk_state_change = ad->original_sk_state_change; 7618c2ecf20Sopenharmony_ci sk->sk_user_data = NULL; 7628c2ecf20Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 7638c2ecf20Sopenharmony_ci} 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_cistatic struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad) 7668c2ecf20Sopenharmony_ci{ 7678c2ecf20Sopenharmony_ci int timeo, connect_int, err = 0; 7688c2ecf20Sopenharmony_ci struct socket *s_estab = NULL; 7698c2ecf20Sopenharmony_ci struct net_conf *nc; 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci rcu_read_lock(); 7728c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 7738c2ecf20Sopenharmony_ci if (!nc) { 7748c2ecf20Sopenharmony_ci rcu_read_unlock(); 7758c2ecf20Sopenharmony_ci return NULL; 7768c2ecf20Sopenharmony_ci } 7778c2ecf20Sopenharmony_ci connect_int = nc->connect_int; 7788c2ecf20Sopenharmony_ci rcu_read_unlock(); 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_ci timeo = connect_int * HZ; 7818c2ecf20Sopenharmony_ci /* 28.5% random jitter */ 7828c2ecf20Sopenharmony_ci timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7; 7838c2ecf20Sopenharmony_ci 7848c2ecf20Sopenharmony_ci err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); 7858c2ecf20Sopenharmony_ci if (err <= 0) 7868c2ecf20Sopenharmony_ci return NULL; 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci err = kernel_accept(ad->s_listen, &s_estab, 0); 7898c2ecf20Sopenharmony_ci if (err < 0) { 7908c2ecf20Sopenharmony_ci if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 7918c2ecf20Sopenharmony_ci drbd_err(connection, "accept failed, err = %d\n", err); 7928c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 7938c2ecf20Sopenharmony_ci } 7948c2ecf20Sopenharmony_ci } 7958c2ecf20Sopenharmony_ci 7968c2ecf20Sopenharmony_ci if (s_estab) 7978c2ecf20Sopenharmony_ci unregister_state_change(s_estab->sk, ad); 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci return s_estab; 8008c2ecf20Sopenharmony_ci} 8018c2ecf20Sopenharmony_ci 8028c2ecf20Sopenharmony_cistatic int decode_header(struct drbd_connection *, void *, struct packet_info *); 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_cistatic int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock, 8058c2ecf20Sopenharmony_ci enum drbd_packet cmd) 8068c2ecf20Sopenharmony_ci{ 8078c2ecf20Sopenharmony_ci if (!conn_prepare_command(connection, sock)) 8088c2ecf20Sopenharmony_ci return -EIO; 8098c2ecf20Sopenharmony_ci return conn_send_command(connection, sock, cmd, 0, NULL, 0); 8108c2ecf20Sopenharmony_ci} 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_cistatic int receive_first_packet(struct drbd_connection *connection, struct socket *sock) 8138c2ecf20Sopenharmony_ci{ 8148c2ecf20Sopenharmony_ci unsigned int header_size = drbd_header_size(connection); 8158c2ecf20Sopenharmony_ci struct packet_info pi; 8168c2ecf20Sopenharmony_ci struct net_conf *nc; 8178c2ecf20Sopenharmony_ci int err; 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_ci rcu_read_lock(); 8208c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 8218c2ecf20Sopenharmony_ci if (!nc) { 8228c2ecf20Sopenharmony_ci rcu_read_unlock(); 8238c2ecf20Sopenharmony_ci return -EIO; 8248c2ecf20Sopenharmony_ci } 8258c2ecf20Sopenharmony_ci sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10; 8268c2ecf20Sopenharmony_ci rcu_read_unlock(); 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); 8298c2ecf20Sopenharmony_ci if (err != header_size) { 8308c2ecf20Sopenharmony_ci if (err >= 0) 8318c2ecf20Sopenharmony_ci err = -EIO; 8328c2ecf20Sopenharmony_ci return err; 8338c2ecf20Sopenharmony_ci } 8348c2ecf20Sopenharmony_ci err = decode_header(connection, connection->data.rbuf, &pi); 8358c2ecf20Sopenharmony_ci if (err) 8368c2ecf20Sopenharmony_ci return err; 8378c2ecf20Sopenharmony_ci return pi.cmd; 8388c2ecf20Sopenharmony_ci} 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_ci/** 8418c2ecf20Sopenharmony_ci * drbd_socket_okay() - Free the socket if its connection is not okay 8428c2ecf20Sopenharmony_ci * @sock: pointer to the pointer to the socket. 8438c2ecf20Sopenharmony_ci */ 8448c2ecf20Sopenharmony_cistatic bool drbd_socket_okay(struct socket **sock) 8458c2ecf20Sopenharmony_ci{ 8468c2ecf20Sopenharmony_ci int rr; 8478c2ecf20Sopenharmony_ci char tb[4]; 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ci if (!*sock) 8508c2ecf20Sopenharmony_ci return false; 8518c2ecf20Sopenharmony_ci 8528c2ecf20Sopenharmony_ci rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ci if (rr > 0 || rr == -EAGAIN) { 8558c2ecf20Sopenharmony_ci return true; 8568c2ecf20Sopenharmony_ci } else { 8578c2ecf20Sopenharmony_ci sock_release(*sock); 8588c2ecf20Sopenharmony_ci *sock = NULL; 8598c2ecf20Sopenharmony_ci return false; 8608c2ecf20Sopenharmony_ci } 8618c2ecf20Sopenharmony_ci} 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_cistatic bool connection_established(struct drbd_connection *connection, 8648c2ecf20Sopenharmony_ci struct socket **sock1, 8658c2ecf20Sopenharmony_ci struct socket **sock2) 8668c2ecf20Sopenharmony_ci{ 8678c2ecf20Sopenharmony_ci struct net_conf *nc; 8688c2ecf20Sopenharmony_ci int timeout; 8698c2ecf20Sopenharmony_ci bool ok; 8708c2ecf20Sopenharmony_ci 8718c2ecf20Sopenharmony_ci if (!*sock1 || !*sock2) 8728c2ecf20Sopenharmony_ci return false; 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci rcu_read_lock(); 8758c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 8768c2ecf20Sopenharmony_ci timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; 8778c2ecf20Sopenharmony_ci rcu_read_unlock(); 8788c2ecf20Sopenharmony_ci schedule_timeout_interruptible(timeout); 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci ok = drbd_socket_okay(sock1); 8818c2ecf20Sopenharmony_ci ok = drbd_socket_okay(sock2) && ok; 8828c2ecf20Sopenharmony_ci 8838c2ecf20Sopenharmony_ci return ok; 8848c2ecf20Sopenharmony_ci} 8858c2ecf20Sopenharmony_ci 8868c2ecf20Sopenharmony_ci/* Gets called if a connection is established, or if a new minor gets created 8878c2ecf20Sopenharmony_ci in a connection */ 8888c2ecf20Sopenharmony_ciint drbd_connected(struct drbd_peer_device *peer_device) 8898c2ecf20Sopenharmony_ci{ 8908c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 8918c2ecf20Sopenharmony_ci int err; 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci atomic_set(&device->packet_seq, 0); 8948c2ecf20Sopenharmony_ci device->peer_seq = 0; 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci device->state_mutex = peer_device->connection->agreed_pro_version < 100 ? 8978c2ecf20Sopenharmony_ci &peer_device->connection->cstate_mutex : 8988c2ecf20Sopenharmony_ci &device->own_state_mutex; 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_ci err = drbd_send_sync_param(peer_device); 9018c2ecf20Sopenharmony_ci if (!err) 9028c2ecf20Sopenharmony_ci err = drbd_send_sizes(peer_device, 0, 0); 9038c2ecf20Sopenharmony_ci if (!err) 9048c2ecf20Sopenharmony_ci err = drbd_send_uuids(peer_device); 9058c2ecf20Sopenharmony_ci if (!err) 9068c2ecf20Sopenharmony_ci err = drbd_send_current_state(peer_device); 9078c2ecf20Sopenharmony_ci clear_bit(USE_DEGR_WFC_T, &device->flags); 9088c2ecf20Sopenharmony_ci clear_bit(RESIZE_PENDING, &device->flags); 9098c2ecf20Sopenharmony_ci atomic_set(&device->ap_in_flight, 0); 9108c2ecf20Sopenharmony_ci mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */ 9118c2ecf20Sopenharmony_ci return err; 9128c2ecf20Sopenharmony_ci} 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_ci/* 9158c2ecf20Sopenharmony_ci * return values: 9168c2ecf20Sopenharmony_ci * 1 yes, we have a valid connection 9178c2ecf20Sopenharmony_ci * 0 oops, did not work out, please try again 9188c2ecf20Sopenharmony_ci * -1 peer talks different language, 9198c2ecf20Sopenharmony_ci * no point in trying again, please go standalone. 9208c2ecf20Sopenharmony_ci * -2 We do not have a network config... 9218c2ecf20Sopenharmony_ci */ 9228c2ecf20Sopenharmony_cistatic int conn_connect(struct drbd_connection *connection) 9238c2ecf20Sopenharmony_ci{ 9248c2ecf20Sopenharmony_ci struct drbd_socket sock, msock; 9258c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 9268c2ecf20Sopenharmony_ci struct net_conf *nc; 9278c2ecf20Sopenharmony_ci int vnr, timeout, h; 9288c2ecf20Sopenharmony_ci bool discard_my_data, ok; 9298c2ecf20Sopenharmony_ci enum drbd_state_rv rv; 9308c2ecf20Sopenharmony_ci struct accept_wait_data ad = { 9318c2ecf20Sopenharmony_ci .connection = connection, 9328c2ecf20Sopenharmony_ci .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), 9338c2ecf20Sopenharmony_ci }; 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci clear_bit(DISCONNECT_SENT, &connection->flags); 9368c2ecf20Sopenharmony_ci if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) 9378c2ecf20Sopenharmony_ci return -2; 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci mutex_init(&sock.mutex); 9408c2ecf20Sopenharmony_ci sock.sbuf = connection->data.sbuf; 9418c2ecf20Sopenharmony_ci sock.rbuf = connection->data.rbuf; 9428c2ecf20Sopenharmony_ci sock.socket = NULL; 9438c2ecf20Sopenharmony_ci mutex_init(&msock.mutex); 9448c2ecf20Sopenharmony_ci msock.sbuf = connection->meta.sbuf; 9458c2ecf20Sopenharmony_ci msock.rbuf = connection->meta.rbuf; 9468c2ecf20Sopenharmony_ci msock.socket = NULL; 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_ci /* Assume that the peer only understands protocol 80 until we know better. */ 9498c2ecf20Sopenharmony_ci connection->agreed_pro_version = 80; 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_ci if (prepare_listen_socket(connection, &ad)) 9528c2ecf20Sopenharmony_ci return 0; 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci do { 9558c2ecf20Sopenharmony_ci struct socket *s; 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci s = drbd_try_connect(connection); 9588c2ecf20Sopenharmony_ci if (s) { 9598c2ecf20Sopenharmony_ci if (!sock.socket) { 9608c2ecf20Sopenharmony_ci sock.socket = s; 9618c2ecf20Sopenharmony_ci send_first_packet(connection, &sock, P_INITIAL_DATA); 9628c2ecf20Sopenharmony_ci } else if (!msock.socket) { 9638c2ecf20Sopenharmony_ci clear_bit(RESOLVE_CONFLICTS, &connection->flags); 9648c2ecf20Sopenharmony_ci msock.socket = s; 9658c2ecf20Sopenharmony_ci send_first_packet(connection, &msock, P_INITIAL_META); 9668c2ecf20Sopenharmony_ci } else { 9678c2ecf20Sopenharmony_ci drbd_err(connection, "Logic error in conn_connect()\n"); 9688c2ecf20Sopenharmony_ci goto out_release_sockets; 9698c2ecf20Sopenharmony_ci } 9708c2ecf20Sopenharmony_ci } 9718c2ecf20Sopenharmony_ci 9728c2ecf20Sopenharmony_ci if (connection_established(connection, &sock.socket, &msock.socket)) 9738c2ecf20Sopenharmony_ci break; 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_ciretry: 9768c2ecf20Sopenharmony_ci s = drbd_wait_for_connect(connection, &ad); 9778c2ecf20Sopenharmony_ci if (s) { 9788c2ecf20Sopenharmony_ci int fp = receive_first_packet(connection, s); 9798c2ecf20Sopenharmony_ci drbd_socket_okay(&sock.socket); 9808c2ecf20Sopenharmony_ci drbd_socket_okay(&msock.socket); 9818c2ecf20Sopenharmony_ci switch (fp) { 9828c2ecf20Sopenharmony_ci case P_INITIAL_DATA: 9838c2ecf20Sopenharmony_ci if (sock.socket) { 9848c2ecf20Sopenharmony_ci drbd_warn(connection, "initial packet S crossed\n"); 9858c2ecf20Sopenharmony_ci sock_release(sock.socket); 9868c2ecf20Sopenharmony_ci sock.socket = s; 9878c2ecf20Sopenharmony_ci goto randomize; 9888c2ecf20Sopenharmony_ci } 9898c2ecf20Sopenharmony_ci sock.socket = s; 9908c2ecf20Sopenharmony_ci break; 9918c2ecf20Sopenharmony_ci case P_INITIAL_META: 9928c2ecf20Sopenharmony_ci set_bit(RESOLVE_CONFLICTS, &connection->flags); 9938c2ecf20Sopenharmony_ci if (msock.socket) { 9948c2ecf20Sopenharmony_ci drbd_warn(connection, "initial packet M crossed\n"); 9958c2ecf20Sopenharmony_ci sock_release(msock.socket); 9968c2ecf20Sopenharmony_ci msock.socket = s; 9978c2ecf20Sopenharmony_ci goto randomize; 9988c2ecf20Sopenharmony_ci } 9998c2ecf20Sopenharmony_ci msock.socket = s; 10008c2ecf20Sopenharmony_ci break; 10018c2ecf20Sopenharmony_ci default: 10028c2ecf20Sopenharmony_ci drbd_warn(connection, "Error receiving initial packet\n"); 10038c2ecf20Sopenharmony_ci sock_release(s); 10048c2ecf20Sopenharmony_cirandomize: 10058c2ecf20Sopenharmony_ci if (prandom_u32() & 1) 10068c2ecf20Sopenharmony_ci goto retry; 10078c2ecf20Sopenharmony_ci } 10088c2ecf20Sopenharmony_ci } 10098c2ecf20Sopenharmony_ci 10108c2ecf20Sopenharmony_ci if (connection->cstate <= C_DISCONNECTING) 10118c2ecf20Sopenharmony_ci goto out_release_sockets; 10128c2ecf20Sopenharmony_ci if (signal_pending(current)) { 10138c2ecf20Sopenharmony_ci flush_signals(current); 10148c2ecf20Sopenharmony_ci smp_rmb(); 10158c2ecf20Sopenharmony_ci if (get_t_state(&connection->receiver) == EXITING) 10168c2ecf20Sopenharmony_ci goto out_release_sockets; 10178c2ecf20Sopenharmony_ci } 10188c2ecf20Sopenharmony_ci 10198c2ecf20Sopenharmony_ci ok = connection_established(connection, &sock.socket, &msock.socket); 10208c2ecf20Sopenharmony_ci } while (!ok); 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_ci if (ad.s_listen) 10238c2ecf20Sopenharmony_ci sock_release(ad.s_listen); 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_ci sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 10268c2ecf20Sopenharmony_ci msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_ci sock.socket->sk->sk_allocation = GFP_NOIO; 10298c2ecf20Sopenharmony_ci msock.socket->sk->sk_allocation = GFP_NOIO; 10308c2ecf20Sopenharmony_ci 10318c2ecf20Sopenharmony_ci sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 10328c2ecf20Sopenharmony_ci msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; 10338c2ecf20Sopenharmony_ci 10348c2ecf20Sopenharmony_ci /* NOT YET ... 10358c2ecf20Sopenharmony_ci * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10; 10368c2ecf20Sopenharmony_ci * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 10378c2ecf20Sopenharmony_ci * first set it to the P_CONNECTION_FEATURES timeout, 10388c2ecf20Sopenharmony_ci * which we set to 4x the configured ping_timeout. */ 10398c2ecf20Sopenharmony_ci rcu_read_lock(); 10408c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 10418c2ecf20Sopenharmony_ci 10428c2ecf20Sopenharmony_ci sock.socket->sk->sk_sndtimeo = 10438c2ecf20Sopenharmony_ci sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; 10448c2ecf20Sopenharmony_ci 10458c2ecf20Sopenharmony_ci msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ; 10468c2ecf20Sopenharmony_ci timeout = nc->timeout * HZ / 10; 10478c2ecf20Sopenharmony_ci discard_my_data = nc->discard_my_data; 10488c2ecf20Sopenharmony_ci rcu_read_unlock(); 10498c2ecf20Sopenharmony_ci 10508c2ecf20Sopenharmony_ci msock.socket->sk->sk_sndtimeo = timeout; 10518c2ecf20Sopenharmony_ci 10528c2ecf20Sopenharmony_ci /* we don't want delays. 10538c2ecf20Sopenharmony_ci * we use TCP_CORK where appropriate, though */ 10548c2ecf20Sopenharmony_ci tcp_sock_set_nodelay(sock.socket->sk); 10558c2ecf20Sopenharmony_ci tcp_sock_set_nodelay(msock.socket->sk); 10568c2ecf20Sopenharmony_ci 10578c2ecf20Sopenharmony_ci connection->data.socket = sock.socket; 10588c2ecf20Sopenharmony_ci connection->meta.socket = msock.socket; 10598c2ecf20Sopenharmony_ci connection->last_received = jiffies; 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_ci h = drbd_do_features(connection); 10628c2ecf20Sopenharmony_ci if (h <= 0) 10638c2ecf20Sopenharmony_ci return h; 10648c2ecf20Sopenharmony_ci 10658c2ecf20Sopenharmony_ci if (connection->cram_hmac_tfm) { 10668c2ecf20Sopenharmony_ci /* drbd_request_state(device, NS(conn, WFAuth)); */ 10678c2ecf20Sopenharmony_ci switch (drbd_do_auth(connection)) { 10688c2ecf20Sopenharmony_ci case -1: 10698c2ecf20Sopenharmony_ci drbd_err(connection, "Authentication of peer failed\n"); 10708c2ecf20Sopenharmony_ci return -1; 10718c2ecf20Sopenharmony_ci case 0: 10728c2ecf20Sopenharmony_ci drbd_err(connection, "Authentication of peer failed, trying again.\n"); 10738c2ecf20Sopenharmony_ci return 0; 10748c2ecf20Sopenharmony_ci } 10758c2ecf20Sopenharmony_ci } 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_ci connection->data.socket->sk->sk_sndtimeo = timeout; 10788c2ecf20Sopenharmony_ci connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 10798c2ecf20Sopenharmony_ci 10808c2ecf20Sopenharmony_ci if (drbd_send_protocol(connection) == -EOPNOTSUPP) 10818c2ecf20Sopenharmony_ci return -1; 10828c2ecf20Sopenharmony_ci 10838c2ecf20Sopenharmony_ci /* Prevent a race between resync-handshake and 10848c2ecf20Sopenharmony_ci * being promoted to Primary. 10858c2ecf20Sopenharmony_ci * 10868c2ecf20Sopenharmony_ci * Grab and release the state mutex, so we know that any current 10878c2ecf20Sopenharmony_ci * drbd_set_role() is finished, and any incoming drbd_set_role 10888c2ecf20Sopenharmony_ci * will see the STATE_SENT flag, and wait for it to be cleared. 10898c2ecf20Sopenharmony_ci */ 10908c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 10918c2ecf20Sopenharmony_ci mutex_lock(peer_device->device->state_mutex); 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci /* avoid a race with conn_request_state( C_DISCONNECTING ) */ 10948c2ecf20Sopenharmony_ci spin_lock_irq(&connection->resource->req_lock); 10958c2ecf20Sopenharmony_ci set_bit(STATE_SENT, &connection->flags); 10968c2ecf20Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 10978c2ecf20Sopenharmony_ci 10988c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 10998c2ecf20Sopenharmony_ci mutex_unlock(peer_device->device->state_mutex); 11008c2ecf20Sopenharmony_ci 11018c2ecf20Sopenharmony_ci rcu_read_lock(); 11028c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 11038c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 11048c2ecf20Sopenharmony_ci kref_get(&device->kref); 11058c2ecf20Sopenharmony_ci rcu_read_unlock(); 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci if (discard_my_data) 11088c2ecf20Sopenharmony_ci set_bit(DISCARD_MY_DATA, &device->flags); 11098c2ecf20Sopenharmony_ci else 11108c2ecf20Sopenharmony_ci clear_bit(DISCARD_MY_DATA, &device->flags); 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ci drbd_connected(peer_device); 11138c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 11148c2ecf20Sopenharmony_ci rcu_read_lock(); 11158c2ecf20Sopenharmony_ci } 11168c2ecf20Sopenharmony_ci rcu_read_unlock(); 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ci rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); 11198c2ecf20Sopenharmony_ci if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) { 11208c2ecf20Sopenharmony_ci clear_bit(STATE_SENT, &connection->flags); 11218c2ecf20Sopenharmony_ci return 0; 11228c2ecf20Sopenharmony_ci } 11238c2ecf20Sopenharmony_ci 11248c2ecf20Sopenharmony_ci drbd_thread_start(&connection->ack_receiver); 11258c2ecf20Sopenharmony_ci /* opencoded create_singlethread_workqueue(), 11268c2ecf20Sopenharmony_ci * to be able to use format string arguments */ 11278c2ecf20Sopenharmony_ci connection->ack_sender = 11288c2ecf20Sopenharmony_ci alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name); 11298c2ecf20Sopenharmony_ci if (!connection->ack_sender) { 11308c2ecf20Sopenharmony_ci drbd_err(connection, "Failed to create workqueue ack_sender\n"); 11318c2ecf20Sopenharmony_ci return 0; 11328c2ecf20Sopenharmony_ci } 11338c2ecf20Sopenharmony_ci 11348c2ecf20Sopenharmony_ci mutex_lock(&connection->resource->conf_update); 11358c2ecf20Sopenharmony_ci /* The discard_my_data flag is a single-shot modifier to the next 11368c2ecf20Sopenharmony_ci * connection attempt, the handshake of which is now well underway. 11378c2ecf20Sopenharmony_ci * No need for rcu style copying of the whole struct 11388c2ecf20Sopenharmony_ci * just to clear a single value. */ 11398c2ecf20Sopenharmony_ci connection->net_conf->discard_my_data = 0; 11408c2ecf20Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci return h; 11438c2ecf20Sopenharmony_ci 11448c2ecf20Sopenharmony_ciout_release_sockets: 11458c2ecf20Sopenharmony_ci if (ad.s_listen) 11468c2ecf20Sopenharmony_ci sock_release(ad.s_listen); 11478c2ecf20Sopenharmony_ci if (sock.socket) 11488c2ecf20Sopenharmony_ci sock_release(sock.socket); 11498c2ecf20Sopenharmony_ci if (msock.socket) 11508c2ecf20Sopenharmony_ci sock_release(msock.socket); 11518c2ecf20Sopenharmony_ci return -1; 11528c2ecf20Sopenharmony_ci} 11538c2ecf20Sopenharmony_ci 11548c2ecf20Sopenharmony_cistatic int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi) 11558c2ecf20Sopenharmony_ci{ 11568c2ecf20Sopenharmony_ci unsigned int header_size = drbd_header_size(connection); 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci if (header_size == sizeof(struct p_header100) && 11598c2ecf20Sopenharmony_ci *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { 11608c2ecf20Sopenharmony_ci struct p_header100 *h = header; 11618c2ecf20Sopenharmony_ci if (h->pad != 0) { 11628c2ecf20Sopenharmony_ci drbd_err(connection, "Header padding is not zero\n"); 11638c2ecf20Sopenharmony_ci return -EINVAL; 11648c2ecf20Sopenharmony_ci } 11658c2ecf20Sopenharmony_ci pi->vnr = be16_to_cpu(h->volume); 11668c2ecf20Sopenharmony_ci pi->cmd = be16_to_cpu(h->command); 11678c2ecf20Sopenharmony_ci pi->size = be32_to_cpu(h->length); 11688c2ecf20Sopenharmony_ci } else if (header_size == sizeof(struct p_header95) && 11698c2ecf20Sopenharmony_ci *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { 11708c2ecf20Sopenharmony_ci struct p_header95 *h = header; 11718c2ecf20Sopenharmony_ci pi->cmd = be16_to_cpu(h->command); 11728c2ecf20Sopenharmony_ci pi->size = be32_to_cpu(h->length); 11738c2ecf20Sopenharmony_ci pi->vnr = 0; 11748c2ecf20Sopenharmony_ci } else if (header_size == sizeof(struct p_header80) && 11758c2ecf20Sopenharmony_ci *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) { 11768c2ecf20Sopenharmony_ci struct p_header80 *h = header; 11778c2ecf20Sopenharmony_ci pi->cmd = be16_to_cpu(h->command); 11788c2ecf20Sopenharmony_ci pi->size = be16_to_cpu(h->length); 11798c2ecf20Sopenharmony_ci pi->vnr = 0; 11808c2ecf20Sopenharmony_ci } else { 11818c2ecf20Sopenharmony_ci drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n", 11828c2ecf20Sopenharmony_ci be32_to_cpu(*(__be32 *)header), 11838c2ecf20Sopenharmony_ci connection->agreed_pro_version); 11848c2ecf20Sopenharmony_ci return -EINVAL; 11858c2ecf20Sopenharmony_ci } 11868c2ecf20Sopenharmony_ci pi->data = header + header_size; 11878c2ecf20Sopenharmony_ci return 0; 11888c2ecf20Sopenharmony_ci} 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_cistatic void drbd_unplug_all_devices(struct drbd_connection *connection) 11918c2ecf20Sopenharmony_ci{ 11928c2ecf20Sopenharmony_ci if (current->plug == &connection->receiver_plug) { 11938c2ecf20Sopenharmony_ci blk_finish_plug(&connection->receiver_plug); 11948c2ecf20Sopenharmony_ci blk_start_plug(&connection->receiver_plug); 11958c2ecf20Sopenharmony_ci } /* else: maybe just schedule() ?? */ 11968c2ecf20Sopenharmony_ci} 11978c2ecf20Sopenharmony_ci 11988c2ecf20Sopenharmony_cistatic int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi) 11998c2ecf20Sopenharmony_ci{ 12008c2ecf20Sopenharmony_ci void *buffer = connection->data.rbuf; 12018c2ecf20Sopenharmony_ci int err; 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection)); 12048c2ecf20Sopenharmony_ci if (err) 12058c2ecf20Sopenharmony_ci return err; 12068c2ecf20Sopenharmony_ci 12078c2ecf20Sopenharmony_ci err = decode_header(connection, buffer, pi); 12088c2ecf20Sopenharmony_ci connection->last_received = jiffies; 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci return err; 12118c2ecf20Sopenharmony_ci} 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_cistatic int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi) 12148c2ecf20Sopenharmony_ci{ 12158c2ecf20Sopenharmony_ci void *buffer = connection->data.rbuf; 12168c2ecf20Sopenharmony_ci unsigned int size = drbd_header_size(connection); 12178c2ecf20Sopenharmony_ci int err; 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT); 12208c2ecf20Sopenharmony_ci if (err != size) { 12218c2ecf20Sopenharmony_ci /* If we have nothing in the receive buffer now, to reduce 12228c2ecf20Sopenharmony_ci * application latency, try to drain the backend queues as 12238c2ecf20Sopenharmony_ci * quickly as possible, and let remote TCP know what we have 12248c2ecf20Sopenharmony_ci * received so far. */ 12258c2ecf20Sopenharmony_ci if (err == -EAGAIN) { 12268c2ecf20Sopenharmony_ci tcp_sock_set_quickack(connection->data.socket->sk, 2); 12278c2ecf20Sopenharmony_ci drbd_unplug_all_devices(connection); 12288c2ecf20Sopenharmony_ci } 12298c2ecf20Sopenharmony_ci if (err > 0) { 12308c2ecf20Sopenharmony_ci buffer += err; 12318c2ecf20Sopenharmony_ci size -= err; 12328c2ecf20Sopenharmony_ci } 12338c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(connection, buffer, size); 12348c2ecf20Sopenharmony_ci if (err) 12358c2ecf20Sopenharmony_ci return err; 12368c2ecf20Sopenharmony_ci } 12378c2ecf20Sopenharmony_ci 12388c2ecf20Sopenharmony_ci err = decode_header(connection, connection->data.rbuf, pi); 12398c2ecf20Sopenharmony_ci connection->last_received = jiffies; 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_ci return err; 12428c2ecf20Sopenharmony_ci} 12438c2ecf20Sopenharmony_ci/* This is blkdev_issue_flush, but asynchronous. 12448c2ecf20Sopenharmony_ci * We want to submit to all component volumes in parallel, 12458c2ecf20Sopenharmony_ci * then wait for all completions. 12468c2ecf20Sopenharmony_ci */ 12478c2ecf20Sopenharmony_cistruct issue_flush_context { 12488c2ecf20Sopenharmony_ci atomic_t pending; 12498c2ecf20Sopenharmony_ci int error; 12508c2ecf20Sopenharmony_ci struct completion done; 12518c2ecf20Sopenharmony_ci}; 12528c2ecf20Sopenharmony_cistruct one_flush_context { 12538c2ecf20Sopenharmony_ci struct drbd_device *device; 12548c2ecf20Sopenharmony_ci struct issue_flush_context *ctx; 12558c2ecf20Sopenharmony_ci}; 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_cistatic void one_flush_endio(struct bio *bio) 12588c2ecf20Sopenharmony_ci{ 12598c2ecf20Sopenharmony_ci struct one_flush_context *octx = bio->bi_private; 12608c2ecf20Sopenharmony_ci struct drbd_device *device = octx->device; 12618c2ecf20Sopenharmony_ci struct issue_flush_context *ctx = octx->ctx; 12628c2ecf20Sopenharmony_ci 12638c2ecf20Sopenharmony_ci if (bio->bi_status) { 12648c2ecf20Sopenharmony_ci ctx->error = blk_status_to_errno(bio->bi_status); 12658c2ecf20Sopenharmony_ci drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status); 12668c2ecf20Sopenharmony_ci } 12678c2ecf20Sopenharmony_ci kfree(octx); 12688c2ecf20Sopenharmony_ci bio_put(bio); 12698c2ecf20Sopenharmony_ci 12708c2ecf20Sopenharmony_ci clear_bit(FLUSH_PENDING, &device->flags); 12718c2ecf20Sopenharmony_ci put_ldev(device); 12728c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&ctx->pending)) 12758c2ecf20Sopenharmony_ci complete(&ctx->done); 12768c2ecf20Sopenharmony_ci} 12778c2ecf20Sopenharmony_ci 12788c2ecf20Sopenharmony_cistatic void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx) 12798c2ecf20Sopenharmony_ci{ 12808c2ecf20Sopenharmony_ci struct bio *bio = bio_alloc(GFP_NOIO, 0); 12818c2ecf20Sopenharmony_ci struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO); 12828c2ecf20Sopenharmony_ci if (!bio || !octx) { 12838c2ecf20Sopenharmony_ci drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n"); 12848c2ecf20Sopenharmony_ci /* FIXME: what else can I do now? disconnecting or detaching 12858c2ecf20Sopenharmony_ci * really does not help to improve the state of the world, either. 12868c2ecf20Sopenharmony_ci */ 12878c2ecf20Sopenharmony_ci kfree(octx); 12888c2ecf20Sopenharmony_ci if (bio) 12898c2ecf20Sopenharmony_ci bio_put(bio); 12908c2ecf20Sopenharmony_ci 12918c2ecf20Sopenharmony_ci ctx->error = -ENOMEM; 12928c2ecf20Sopenharmony_ci put_ldev(device); 12938c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 12948c2ecf20Sopenharmony_ci return; 12958c2ecf20Sopenharmony_ci } 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci octx->device = device; 12988c2ecf20Sopenharmony_ci octx->ctx = ctx; 12998c2ecf20Sopenharmony_ci bio_set_dev(bio, device->ldev->backing_bdev); 13008c2ecf20Sopenharmony_ci bio->bi_private = octx; 13018c2ecf20Sopenharmony_ci bio->bi_end_io = one_flush_endio; 13028c2ecf20Sopenharmony_ci bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci device->flush_jif = jiffies; 13058c2ecf20Sopenharmony_ci set_bit(FLUSH_PENDING, &device->flags); 13068c2ecf20Sopenharmony_ci atomic_inc(&ctx->pending); 13078c2ecf20Sopenharmony_ci submit_bio(bio); 13088c2ecf20Sopenharmony_ci} 13098c2ecf20Sopenharmony_ci 13108c2ecf20Sopenharmony_cistatic void drbd_flush(struct drbd_connection *connection) 13118c2ecf20Sopenharmony_ci{ 13128c2ecf20Sopenharmony_ci if (connection->resource->write_ordering >= WO_BDEV_FLUSH) { 13138c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 13148c2ecf20Sopenharmony_ci struct issue_flush_context ctx; 13158c2ecf20Sopenharmony_ci int vnr; 13168c2ecf20Sopenharmony_ci 13178c2ecf20Sopenharmony_ci atomic_set(&ctx.pending, 1); 13188c2ecf20Sopenharmony_ci ctx.error = 0; 13198c2ecf20Sopenharmony_ci init_completion(&ctx.done); 13208c2ecf20Sopenharmony_ci 13218c2ecf20Sopenharmony_ci rcu_read_lock(); 13228c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 13238c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ci if (!get_ldev(device)) 13268c2ecf20Sopenharmony_ci continue; 13278c2ecf20Sopenharmony_ci kref_get(&device->kref); 13288c2ecf20Sopenharmony_ci rcu_read_unlock(); 13298c2ecf20Sopenharmony_ci 13308c2ecf20Sopenharmony_ci submit_one_flush(device, &ctx); 13318c2ecf20Sopenharmony_ci 13328c2ecf20Sopenharmony_ci rcu_read_lock(); 13338c2ecf20Sopenharmony_ci } 13348c2ecf20Sopenharmony_ci rcu_read_unlock(); 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ci /* Do we want to add a timeout, 13378c2ecf20Sopenharmony_ci * if disk-timeout is set? */ 13388c2ecf20Sopenharmony_ci if (!atomic_dec_and_test(&ctx.pending)) 13398c2ecf20Sopenharmony_ci wait_for_completion(&ctx.done); 13408c2ecf20Sopenharmony_ci 13418c2ecf20Sopenharmony_ci if (ctx.error) { 13428c2ecf20Sopenharmony_ci /* would rather check on EOPNOTSUPP, but that is not reliable. 13438c2ecf20Sopenharmony_ci * don't try again for ANY return value != 0 13448c2ecf20Sopenharmony_ci * if (rv == -EOPNOTSUPP) */ 13458c2ecf20Sopenharmony_ci /* Any error is already reported by bio_endio callback. */ 13468c2ecf20Sopenharmony_ci drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO); 13478c2ecf20Sopenharmony_ci } 13488c2ecf20Sopenharmony_ci } 13498c2ecf20Sopenharmony_ci} 13508c2ecf20Sopenharmony_ci 13518c2ecf20Sopenharmony_ci/** 13528c2ecf20Sopenharmony_ci * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it. 13538c2ecf20Sopenharmony_ci * @device: DRBD device. 13548c2ecf20Sopenharmony_ci * @epoch: Epoch object. 13558c2ecf20Sopenharmony_ci * @ev: Epoch event. 13568c2ecf20Sopenharmony_ci */ 13578c2ecf20Sopenharmony_cistatic enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection, 13588c2ecf20Sopenharmony_ci struct drbd_epoch *epoch, 13598c2ecf20Sopenharmony_ci enum epoch_event ev) 13608c2ecf20Sopenharmony_ci{ 13618c2ecf20Sopenharmony_ci int epoch_size; 13628c2ecf20Sopenharmony_ci struct drbd_epoch *next_epoch; 13638c2ecf20Sopenharmony_ci enum finish_epoch rv = FE_STILL_LIVE; 13648c2ecf20Sopenharmony_ci 13658c2ecf20Sopenharmony_ci spin_lock(&connection->epoch_lock); 13668c2ecf20Sopenharmony_ci do { 13678c2ecf20Sopenharmony_ci next_epoch = NULL; 13688c2ecf20Sopenharmony_ci 13698c2ecf20Sopenharmony_ci epoch_size = atomic_read(&epoch->epoch_size); 13708c2ecf20Sopenharmony_ci 13718c2ecf20Sopenharmony_ci switch (ev & ~EV_CLEANUP) { 13728c2ecf20Sopenharmony_ci case EV_PUT: 13738c2ecf20Sopenharmony_ci atomic_dec(&epoch->active); 13748c2ecf20Sopenharmony_ci break; 13758c2ecf20Sopenharmony_ci case EV_GOT_BARRIER_NR: 13768c2ecf20Sopenharmony_ci set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); 13778c2ecf20Sopenharmony_ci break; 13788c2ecf20Sopenharmony_ci case EV_BECAME_LAST: 13798c2ecf20Sopenharmony_ci /* nothing to do*/ 13808c2ecf20Sopenharmony_ci break; 13818c2ecf20Sopenharmony_ci } 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci if (epoch_size != 0 && 13848c2ecf20Sopenharmony_ci atomic_read(&epoch->active) == 0 && 13858c2ecf20Sopenharmony_ci (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { 13868c2ecf20Sopenharmony_ci if (!(ev & EV_CLEANUP)) { 13878c2ecf20Sopenharmony_ci spin_unlock(&connection->epoch_lock); 13888c2ecf20Sopenharmony_ci drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size); 13898c2ecf20Sopenharmony_ci spin_lock(&connection->epoch_lock); 13908c2ecf20Sopenharmony_ci } 13918c2ecf20Sopenharmony_ci#if 0 13928c2ecf20Sopenharmony_ci /* FIXME: dec unacked on connection, once we have 13938c2ecf20Sopenharmony_ci * something to count pending connection packets in. */ 13948c2ecf20Sopenharmony_ci if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) 13958c2ecf20Sopenharmony_ci dec_unacked(epoch->connection); 13968c2ecf20Sopenharmony_ci#endif 13978c2ecf20Sopenharmony_ci 13988c2ecf20Sopenharmony_ci if (connection->current_epoch != epoch) { 13998c2ecf20Sopenharmony_ci next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); 14008c2ecf20Sopenharmony_ci list_del(&epoch->list); 14018c2ecf20Sopenharmony_ci ev = EV_BECAME_LAST | (ev & EV_CLEANUP); 14028c2ecf20Sopenharmony_ci connection->epochs--; 14038c2ecf20Sopenharmony_ci kfree(epoch); 14048c2ecf20Sopenharmony_ci 14058c2ecf20Sopenharmony_ci if (rv == FE_STILL_LIVE) 14068c2ecf20Sopenharmony_ci rv = FE_DESTROYED; 14078c2ecf20Sopenharmony_ci } else { 14088c2ecf20Sopenharmony_ci epoch->flags = 0; 14098c2ecf20Sopenharmony_ci atomic_set(&epoch->epoch_size, 0); 14108c2ecf20Sopenharmony_ci /* atomic_set(&epoch->active, 0); is already zero */ 14118c2ecf20Sopenharmony_ci if (rv == FE_STILL_LIVE) 14128c2ecf20Sopenharmony_ci rv = FE_RECYCLED; 14138c2ecf20Sopenharmony_ci } 14148c2ecf20Sopenharmony_ci } 14158c2ecf20Sopenharmony_ci 14168c2ecf20Sopenharmony_ci if (!next_epoch) 14178c2ecf20Sopenharmony_ci break; 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci epoch = next_epoch; 14208c2ecf20Sopenharmony_ci } while (1); 14218c2ecf20Sopenharmony_ci 14228c2ecf20Sopenharmony_ci spin_unlock(&connection->epoch_lock); 14238c2ecf20Sopenharmony_ci 14248c2ecf20Sopenharmony_ci return rv; 14258c2ecf20Sopenharmony_ci} 14268c2ecf20Sopenharmony_ci 14278c2ecf20Sopenharmony_cistatic enum write_ordering_e 14288c2ecf20Sopenharmony_cimax_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) 14298c2ecf20Sopenharmony_ci{ 14308c2ecf20Sopenharmony_ci struct disk_conf *dc; 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_ci dc = rcu_dereference(bdev->disk_conf); 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci if (wo == WO_BDEV_FLUSH && !dc->disk_flushes) 14358c2ecf20Sopenharmony_ci wo = WO_DRAIN_IO; 14368c2ecf20Sopenharmony_ci if (wo == WO_DRAIN_IO && !dc->disk_drain) 14378c2ecf20Sopenharmony_ci wo = WO_NONE; 14388c2ecf20Sopenharmony_ci 14398c2ecf20Sopenharmony_ci return wo; 14408c2ecf20Sopenharmony_ci} 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci/** 14438c2ecf20Sopenharmony_ci * drbd_bump_write_ordering() - Fall back to an other write ordering method 14448c2ecf20Sopenharmony_ci * @connection: DRBD connection. 14458c2ecf20Sopenharmony_ci * @wo: Write ordering method to try. 14468c2ecf20Sopenharmony_ci */ 14478c2ecf20Sopenharmony_civoid drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, 14488c2ecf20Sopenharmony_ci enum write_ordering_e wo) 14498c2ecf20Sopenharmony_ci{ 14508c2ecf20Sopenharmony_ci struct drbd_device *device; 14518c2ecf20Sopenharmony_ci enum write_ordering_e pwo; 14528c2ecf20Sopenharmony_ci int vnr; 14538c2ecf20Sopenharmony_ci static char *write_ordering_str[] = { 14548c2ecf20Sopenharmony_ci [WO_NONE] = "none", 14558c2ecf20Sopenharmony_ci [WO_DRAIN_IO] = "drain", 14568c2ecf20Sopenharmony_ci [WO_BDEV_FLUSH] = "flush", 14578c2ecf20Sopenharmony_ci }; 14588c2ecf20Sopenharmony_ci 14598c2ecf20Sopenharmony_ci pwo = resource->write_ordering; 14608c2ecf20Sopenharmony_ci if (wo != WO_BDEV_FLUSH) 14618c2ecf20Sopenharmony_ci wo = min(pwo, wo); 14628c2ecf20Sopenharmony_ci rcu_read_lock(); 14638c2ecf20Sopenharmony_ci idr_for_each_entry(&resource->devices, device, vnr) { 14648c2ecf20Sopenharmony_ci if (get_ldev(device)) { 14658c2ecf20Sopenharmony_ci wo = max_allowed_wo(device->ldev, wo); 14668c2ecf20Sopenharmony_ci if (device->ldev == bdev) 14678c2ecf20Sopenharmony_ci bdev = NULL; 14688c2ecf20Sopenharmony_ci put_ldev(device); 14698c2ecf20Sopenharmony_ci } 14708c2ecf20Sopenharmony_ci } 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci if (bdev) 14738c2ecf20Sopenharmony_ci wo = max_allowed_wo(bdev, wo); 14748c2ecf20Sopenharmony_ci 14758c2ecf20Sopenharmony_ci rcu_read_unlock(); 14768c2ecf20Sopenharmony_ci 14778c2ecf20Sopenharmony_ci resource->write_ordering = wo; 14788c2ecf20Sopenharmony_ci if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH) 14798c2ecf20Sopenharmony_ci drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); 14808c2ecf20Sopenharmony_ci} 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci/* 14838c2ecf20Sopenharmony_ci * Mapping "discard" to ZEROOUT with UNMAP does not work for us: 14848c2ecf20Sopenharmony_ci * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it 14858c2ecf20Sopenharmony_ci * will directly go to fallback mode, submitting normal writes, and 14868c2ecf20Sopenharmony_ci * never even try to UNMAP. 14878c2ecf20Sopenharmony_ci * 14888c2ecf20Sopenharmony_ci * And dm-thin does not do this (yet), mostly because in general it has 14898c2ecf20Sopenharmony_ci * to assume that "skip_block_zeroing" is set. See also: 14908c2ecf20Sopenharmony_ci * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html 14918c2ecf20Sopenharmony_ci * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html 14928c2ecf20Sopenharmony_ci * 14938c2ecf20Sopenharmony_ci * We *may* ignore the discard-zeroes-data setting, if so configured. 14948c2ecf20Sopenharmony_ci * 14958c2ecf20Sopenharmony_ci * Assumption is that this "discard_zeroes_data=0" is only because the backend 14968c2ecf20Sopenharmony_ci * may ignore partial unaligned discards. 14978c2ecf20Sopenharmony_ci * 14988c2ecf20Sopenharmony_ci * LVM/DM thin as of at least 14998c2ecf20Sopenharmony_ci * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) 15008c2ecf20Sopenharmony_ci * Library version: 1.02.93-RHEL7 (2015-01-28) 15018c2ecf20Sopenharmony_ci * Driver version: 4.29.0 15028c2ecf20Sopenharmony_ci * still behaves this way. 15038c2ecf20Sopenharmony_ci * 15048c2ecf20Sopenharmony_ci * For unaligned (wrt. alignment and granularity) or too small discards, 15058c2ecf20Sopenharmony_ci * we zero-out the initial (and/or) trailing unaligned partial chunks, 15068c2ecf20Sopenharmony_ci * but discard all the aligned full chunks. 15078c2ecf20Sopenharmony_ci * 15088c2ecf20Sopenharmony_ci * At least for LVM/DM thin, with skip_block_zeroing=false, 15098c2ecf20Sopenharmony_ci * the result is effectively "discard_zeroes_data=1". 15108c2ecf20Sopenharmony_ci */ 15118c2ecf20Sopenharmony_ci/* flags: EE_TRIM|EE_ZEROOUT */ 15128c2ecf20Sopenharmony_ciint drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags) 15138c2ecf20Sopenharmony_ci{ 15148c2ecf20Sopenharmony_ci struct block_device *bdev = device->ldev->backing_bdev; 15158c2ecf20Sopenharmony_ci struct request_queue *q = bdev_get_queue(bdev); 15168c2ecf20Sopenharmony_ci sector_t tmp, nr; 15178c2ecf20Sopenharmony_ci unsigned int max_discard_sectors, granularity; 15188c2ecf20Sopenharmony_ci int alignment; 15198c2ecf20Sopenharmony_ci int err = 0; 15208c2ecf20Sopenharmony_ci 15218c2ecf20Sopenharmony_ci if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM)) 15228c2ecf20Sopenharmony_ci goto zero_out; 15238c2ecf20Sopenharmony_ci 15248c2ecf20Sopenharmony_ci /* Zero-sector (unknown) and one-sector granularities are the same. */ 15258c2ecf20Sopenharmony_ci granularity = max(q->limits.discard_granularity >> 9, 1U); 15268c2ecf20Sopenharmony_ci alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; 15278c2ecf20Sopenharmony_ci 15288c2ecf20Sopenharmony_ci max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); 15298c2ecf20Sopenharmony_ci max_discard_sectors -= max_discard_sectors % granularity; 15308c2ecf20Sopenharmony_ci if (unlikely(!max_discard_sectors)) 15318c2ecf20Sopenharmony_ci goto zero_out; 15328c2ecf20Sopenharmony_ci 15338c2ecf20Sopenharmony_ci if (nr_sectors < granularity) 15348c2ecf20Sopenharmony_ci goto zero_out; 15358c2ecf20Sopenharmony_ci 15368c2ecf20Sopenharmony_ci tmp = start; 15378c2ecf20Sopenharmony_ci if (sector_div(tmp, granularity) != alignment) { 15388c2ecf20Sopenharmony_ci if (nr_sectors < 2*granularity) 15398c2ecf20Sopenharmony_ci goto zero_out; 15408c2ecf20Sopenharmony_ci /* start + gran - (start + gran - align) % gran */ 15418c2ecf20Sopenharmony_ci tmp = start + granularity - alignment; 15428c2ecf20Sopenharmony_ci tmp = start + granularity - sector_div(tmp, granularity); 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci nr = tmp - start; 15458c2ecf20Sopenharmony_ci /* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many 15468c2ecf20Sopenharmony_ci * layers are below us, some may have smaller granularity */ 15478c2ecf20Sopenharmony_ci err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); 15488c2ecf20Sopenharmony_ci nr_sectors -= nr; 15498c2ecf20Sopenharmony_ci start = tmp; 15508c2ecf20Sopenharmony_ci } 15518c2ecf20Sopenharmony_ci while (nr_sectors >= max_discard_sectors) { 15528c2ecf20Sopenharmony_ci err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0); 15538c2ecf20Sopenharmony_ci nr_sectors -= max_discard_sectors; 15548c2ecf20Sopenharmony_ci start += max_discard_sectors; 15558c2ecf20Sopenharmony_ci } 15568c2ecf20Sopenharmony_ci if (nr_sectors) { 15578c2ecf20Sopenharmony_ci /* max_discard_sectors is unsigned int (and a multiple of 15588c2ecf20Sopenharmony_ci * granularity, we made sure of that above already); 15598c2ecf20Sopenharmony_ci * nr is < max_discard_sectors; 15608c2ecf20Sopenharmony_ci * I don't need sector_div here, even though nr is sector_t */ 15618c2ecf20Sopenharmony_ci nr = nr_sectors; 15628c2ecf20Sopenharmony_ci nr -= (unsigned int)nr % granularity; 15638c2ecf20Sopenharmony_ci if (nr) { 15648c2ecf20Sopenharmony_ci err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); 15658c2ecf20Sopenharmony_ci nr_sectors -= nr; 15668c2ecf20Sopenharmony_ci start += nr; 15678c2ecf20Sopenharmony_ci } 15688c2ecf20Sopenharmony_ci } 15698c2ecf20Sopenharmony_ci zero_out: 15708c2ecf20Sopenharmony_ci if (nr_sectors) { 15718c2ecf20Sopenharmony_ci err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 15728c2ecf20Sopenharmony_ci (flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP); 15738c2ecf20Sopenharmony_ci } 15748c2ecf20Sopenharmony_ci return err != 0; 15758c2ecf20Sopenharmony_ci} 15768c2ecf20Sopenharmony_ci 15778c2ecf20Sopenharmony_cistatic bool can_do_reliable_discards(struct drbd_device *device) 15788c2ecf20Sopenharmony_ci{ 15798c2ecf20Sopenharmony_ci struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); 15808c2ecf20Sopenharmony_ci struct disk_conf *dc; 15818c2ecf20Sopenharmony_ci bool can_do; 15828c2ecf20Sopenharmony_ci 15838c2ecf20Sopenharmony_ci if (!blk_queue_discard(q)) 15848c2ecf20Sopenharmony_ci return false; 15858c2ecf20Sopenharmony_ci 15868c2ecf20Sopenharmony_ci rcu_read_lock(); 15878c2ecf20Sopenharmony_ci dc = rcu_dereference(device->ldev->disk_conf); 15888c2ecf20Sopenharmony_ci can_do = dc->discard_zeroes_if_aligned; 15898c2ecf20Sopenharmony_ci rcu_read_unlock(); 15908c2ecf20Sopenharmony_ci return can_do; 15918c2ecf20Sopenharmony_ci} 15928c2ecf20Sopenharmony_ci 15938c2ecf20Sopenharmony_cistatic void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req) 15948c2ecf20Sopenharmony_ci{ 15958c2ecf20Sopenharmony_ci /* If the backend cannot discard, or does not guarantee 15968c2ecf20Sopenharmony_ci * read-back zeroes in discarded ranges, we fall back to 15978c2ecf20Sopenharmony_ci * zero-out. Unless configuration specifically requested 15988c2ecf20Sopenharmony_ci * otherwise. */ 15998c2ecf20Sopenharmony_ci if (!can_do_reliable_discards(device)) 16008c2ecf20Sopenharmony_ci peer_req->flags |= EE_ZEROOUT; 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, 16038c2ecf20Sopenharmony_ci peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM))) 16048c2ecf20Sopenharmony_ci peer_req->flags |= EE_WAS_ERROR; 16058c2ecf20Sopenharmony_ci drbd_endio_write_sec_final(peer_req); 16068c2ecf20Sopenharmony_ci} 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_cistatic void drbd_issue_peer_wsame(struct drbd_device *device, 16098c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req) 16108c2ecf20Sopenharmony_ci{ 16118c2ecf20Sopenharmony_ci struct block_device *bdev = device->ldev->backing_bdev; 16128c2ecf20Sopenharmony_ci sector_t s = peer_req->i.sector; 16138c2ecf20Sopenharmony_ci sector_t nr = peer_req->i.size >> 9; 16148c2ecf20Sopenharmony_ci if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages)) 16158c2ecf20Sopenharmony_ci peer_req->flags |= EE_WAS_ERROR; 16168c2ecf20Sopenharmony_ci drbd_endio_write_sec_final(peer_req); 16178c2ecf20Sopenharmony_ci} 16188c2ecf20Sopenharmony_ci 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci/** 16218c2ecf20Sopenharmony_ci * drbd_submit_peer_request() 16228c2ecf20Sopenharmony_ci * @device: DRBD device. 16238c2ecf20Sopenharmony_ci * @peer_req: peer request 16248c2ecf20Sopenharmony_ci * @rw: flag field, see bio->bi_opf 16258c2ecf20Sopenharmony_ci * 16268c2ecf20Sopenharmony_ci * May spread the pages to multiple bios, 16278c2ecf20Sopenharmony_ci * depending on bio_add_page restrictions. 16288c2ecf20Sopenharmony_ci * 16298c2ecf20Sopenharmony_ci * Returns 0 if all bios have been submitted, 16308c2ecf20Sopenharmony_ci * -ENOMEM if we could not allocate enough bios, 16318c2ecf20Sopenharmony_ci * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a 16328c2ecf20Sopenharmony_ci * single page to an empty bio (which should never happen and likely indicates 16338c2ecf20Sopenharmony_ci * that the lower level IO stack is in some way broken). This has been observed 16348c2ecf20Sopenharmony_ci * on certain Xen deployments. 16358c2ecf20Sopenharmony_ci */ 16368c2ecf20Sopenharmony_ci/* TODO allocate from our own bio_set. */ 16378c2ecf20Sopenharmony_ciint drbd_submit_peer_request(struct drbd_device *device, 16388c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req, 16398c2ecf20Sopenharmony_ci const unsigned op, const unsigned op_flags, 16408c2ecf20Sopenharmony_ci const int fault_type) 16418c2ecf20Sopenharmony_ci{ 16428c2ecf20Sopenharmony_ci struct bio *bios = NULL; 16438c2ecf20Sopenharmony_ci struct bio *bio; 16448c2ecf20Sopenharmony_ci struct page *page = peer_req->pages; 16458c2ecf20Sopenharmony_ci sector_t sector = peer_req->i.sector; 16468c2ecf20Sopenharmony_ci unsigned data_size = peer_req->i.size; 16478c2ecf20Sopenharmony_ci unsigned n_bios = 0; 16488c2ecf20Sopenharmony_ci unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; 16498c2ecf20Sopenharmony_ci int err = -ENOMEM; 16508c2ecf20Sopenharmony_ci 16518c2ecf20Sopenharmony_ci /* TRIM/DISCARD: for now, always use the helper function 16528c2ecf20Sopenharmony_ci * blkdev_issue_zeroout(..., discard=true). 16538c2ecf20Sopenharmony_ci * It's synchronous, but it does the right thing wrt. bio splitting. 16548c2ecf20Sopenharmony_ci * Correctness first, performance later. Next step is to code an 16558c2ecf20Sopenharmony_ci * asynchronous variant of the same. 16568c2ecf20Sopenharmony_ci */ 16578c2ecf20Sopenharmony_ci if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) { 16588c2ecf20Sopenharmony_ci /* wait for all pending IO completions, before we start 16598c2ecf20Sopenharmony_ci * zeroing things out. */ 16608c2ecf20Sopenharmony_ci conn_wait_active_ee_empty(peer_req->peer_device->connection); 16618c2ecf20Sopenharmony_ci /* add it to the active list now, 16628c2ecf20Sopenharmony_ci * so we can find it to present it in debugfs */ 16638c2ecf20Sopenharmony_ci peer_req->submit_jif = jiffies; 16648c2ecf20Sopenharmony_ci peer_req->flags |= EE_SUBMITTED; 16658c2ecf20Sopenharmony_ci 16668c2ecf20Sopenharmony_ci /* If this was a resync request from receive_rs_deallocated(), 16678c2ecf20Sopenharmony_ci * it is already on the sync_ee list */ 16688c2ecf20Sopenharmony_ci if (list_empty(&peer_req->w.list)) { 16698c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 16708c2ecf20Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->active_ee); 16718c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 16728c2ecf20Sopenharmony_ci } 16738c2ecf20Sopenharmony_ci 16748c2ecf20Sopenharmony_ci if (peer_req->flags & (EE_TRIM|EE_ZEROOUT)) 16758c2ecf20Sopenharmony_ci drbd_issue_peer_discard_or_zero_out(device, peer_req); 16768c2ecf20Sopenharmony_ci else /* EE_WRITE_SAME */ 16778c2ecf20Sopenharmony_ci drbd_issue_peer_wsame(device, peer_req); 16788c2ecf20Sopenharmony_ci return 0; 16798c2ecf20Sopenharmony_ci } 16808c2ecf20Sopenharmony_ci 16818c2ecf20Sopenharmony_ci /* In most cases, we will only need one bio. But in case the lower 16828c2ecf20Sopenharmony_ci * level restrictions happen to be different at this offset on this 16838c2ecf20Sopenharmony_ci * side than those of the sending peer, we may need to submit the 16848c2ecf20Sopenharmony_ci * request in more than one bio. 16858c2ecf20Sopenharmony_ci * 16868c2ecf20Sopenharmony_ci * Plain bio_alloc is good enough here, this is no DRBD internally 16878c2ecf20Sopenharmony_ci * generated bio, but a bio allocated on behalf of the peer. 16888c2ecf20Sopenharmony_ci */ 16898c2ecf20Sopenharmony_cinext_bio: 16908c2ecf20Sopenharmony_ci bio = bio_alloc(GFP_NOIO, nr_pages); 16918c2ecf20Sopenharmony_ci if (!bio) { 16928c2ecf20Sopenharmony_ci drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages); 16938c2ecf20Sopenharmony_ci goto fail; 16948c2ecf20Sopenharmony_ci } 16958c2ecf20Sopenharmony_ci /* > peer_req->i.sector, unless this is the first bio */ 16968c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = sector; 16978c2ecf20Sopenharmony_ci bio_set_dev(bio, device->ldev->backing_bdev); 16988c2ecf20Sopenharmony_ci bio_set_op_attrs(bio, op, op_flags); 16998c2ecf20Sopenharmony_ci bio->bi_private = peer_req; 17008c2ecf20Sopenharmony_ci bio->bi_end_io = drbd_peer_request_endio; 17018c2ecf20Sopenharmony_ci 17028c2ecf20Sopenharmony_ci bio->bi_next = bios; 17038c2ecf20Sopenharmony_ci bios = bio; 17048c2ecf20Sopenharmony_ci ++n_bios; 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_ci page_chain_for_each(page) { 17078c2ecf20Sopenharmony_ci unsigned len = min_t(unsigned, data_size, PAGE_SIZE); 17088c2ecf20Sopenharmony_ci if (!bio_add_page(bio, page, len, 0)) 17098c2ecf20Sopenharmony_ci goto next_bio; 17108c2ecf20Sopenharmony_ci data_size -= len; 17118c2ecf20Sopenharmony_ci sector += len >> 9; 17128c2ecf20Sopenharmony_ci --nr_pages; 17138c2ecf20Sopenharmony_ci } 17148c2ecf20Sopenharmony_ci D_ASSERT(device, data_size == 0); 17158c2ecf20Sopenharmony_ci D_ASSERT(device, page == NULL); 17168c2ecf20Sopenharmony_ci 17178c2ecf20Sopenharmony_ci atomic_set(&peer_req->pending_bios, n_bios); 17188c2ecf20Sopenharmony_ci /* for debugfs: update timestamp, mark as submitted */ 17198c2ecf20Sopenharmony_ci peer_req->submit_jif = jiffies; 17208c2ecf20Sopenharmony_ci peer_req->flags |= EE_SUBMITTED; 17218c2ecf20Sopenharmony_ci do { 17228c2ecf20Sopenharmony_ci bio = bios; 17238c2ecf20Sopenharmony_ci bios = bios->bi_next; 17248c2ecf20Sopenharmony_ci bio->bi_next = NULL; 17258c2ecf20Sopenharmony_ci 17268c2ecf20Sopenharmony_ci drbd_submit_bio_noacct(device, fault_type, bio); 17278c2ecf20Sopenharmony_ci } while (bios); 17288c2ecf20Sopenharmony_ci return 0; 17298c2ecf20Sopenharmony_ci 17308c2ecf20Sopenharmony_cifail: 17318c2ecf20Sopenharmony_ci while (bios) { 17328c2ecf20Sopenharmony_ci bio = bios; 17338c2ecf20Sopenharmony_ci bios = bios->bi_next; 17348c2ecf20Sopenharmony_ci bio_put(bio); 17358c2ecf20Sopenharmony_ci } 17368c2ecf20Sopenharmony_ci return err; 17378c2ecf20Sopenharmony_ci} 17388c2ecf20Sopenharmony_ci 17398c2ecf20Sopenharmony_cistatic void drbd_remove_epoch_entry_interval(struct drbd_device *device, 17408c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req) 17418c2ecf20Sopenharmony_ci{ 17428c2ecf20Sopenharmony_ci struct drbd_interval *i = &peer_req->i; 17438c2ecf20Sopenharmony_ci 17448c2ecf20Sopenharmony_ci drbd_remove_interval(&device->write_requests, i); 17458c2ecf20Sopenharmony_ci drbd_clear_interval(i); 17468c2ecf20Sopenharmony_ci 17478c2ecf20Sopenharmony_ci /* Wake up any processes waiting for this peer request to complete. */ 17488c2ecf20Sopenharmony_ci if (i->waiting) 17498c2ecf20Sopenharmony_ci wake_up(&device->misc_wait); 17508c2ecf20Sopenharmony_ci} 17518c2ecf20Sopenharmony_ci 17528c2ecf20Sopenharmony_cistatic void conn_wait_active_ee_empty(struct drbd_connection *connection) 17538c2ecf20Sopenharmony_ci{ 17548c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 17558c2ecf20Sopenharmony_ci int vnr; 17568c2ecf20Sopenharmony_ci 17578c2ecf20Sopenharmony_ci rcu_read_lock(); 17588c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 17598c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 17608c2ecf20Sopenharmony_ci 17618c2ecf20Sopenharmony_ci kref_get(&device->kref); 17628c2ecf20Sopenharmony_ci rcu_read_unlock(); 17638c2ecf20Sopenharmony_ci drbd_wait_ee_list_empty(device, &device->active_ee); 17648c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 17658c2ecf20Sopenharmony_ci rcu_read_lock(); 17668c2ecf20Sopenharmony_ci } 17678c2ecf20Sopenharmony_ci rcu_read_unlock(); 17688c2ecf20Sopenharmony_ci} 17698c2ecf20Sopenharmony_ci 17708c2ecf20Sopenharmony_cistatic int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi) 17718c2ecf20Sopenharmony_ci{ 17728c2ecf20Sopenharmony_ci int rv; 17738c2ecf20Sopenharmony_ci struct p_barrier *p = pi->data; 17748c2ecf20Sopenharmony_ci struct drbd_epoch *epoch; 17758c2ecf20Sopenharmony_ci 17768c2ecf20Sopenharmony_ci /* FIXME these are unacked on connection, 17778c2ecf20Sopenharmony_ci * not a specific (peer)device. 17788c2ecf20Sopenharmony_ci */ 17798c2ecf20Sopenharmony_ci connection->current_epoch->barrier_nr = p->barrier; 17808c2ecf20Sopenharmony_ci connection->current_epoch->connection = connection; 17818c2ecf20Sopenharmony_ci rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR); 17828c2ecf20Sopenharmony_ci 17838c2ecf20Sopenharmony_ci /* P_BARRIER_ACK may imply that the corresponding extent is dropped from 17848c2ecf20Sopenharmony_ci * the activity log, which means it would not be resynced in case the 17858c2ecf20Sopenharmony_ci * R_PRIMARY crashes now. 17868c2ecf20Sopenharmony_ci * Therefore we must send the barrier_ack after the barrier request was 17878c2ecf20Sopenharmony_ci * completed. */ 17888c2ecf20Sopenharmony_ci switch (connection->resource->write_ordering) { 17898c2ecf20Sopenharmony_ci case WO_NONE: 17908c2ecf20Sopenharmony_ci if (rv == FE_RECYCLED) 17918c2ecf20Sopenharmony_ci return 0; 17928c2ecf20Sopenharmony_ci 17938c2ecf20Sopenharmony_ci /* receiver context, in the writeout path of the other node. 17948c2ecf20Sopenharmony_ci * avoid potential distributed deadlock */ 17958c2ecf20Sopenharmony_ci epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 17968c2ecf20Sopenharmony_ci if (epoch) 17978c2ecf20Sopenharmony_ci break; 17988c2ecf20Sopenharmony_ci else 17998c2ecf20Sopenharmony_ci drbd_warn(connection, "Allocation of an epoch failed, slowing down\n"); 18008c2ecf20Sopenharmony_ci fallthrough; 18018c2ecf20Sopenharmony_ci 18028c2ecf20Sopenharmony_ci case WO_BDEV_FLUSH: 18038c2ecf20Sopenharmony_ci case WO_DRAIN_IO: 18048c2ecf20Sopenharmony_ci conn_wait_active_ee_empty(connection); 18058c2ecf20Sopenharmony_ci drbd_flush(connection); 18068c2ecf20Sopenharmony_ci 18078c2ecf20Sopenharmony_ci if (atomic_read(&connection->current_epoch->epoch_size)) { 18088c2ecf20Sopenharmony_ci epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 18098c2ecf20Sopenharmony_ci if (epoch) 18108c2ecf20Sopenharmony_ci break; 18118c2ecf20Sopenharmony_ci } 18128c2ecf20Sopenharmony_ci 18138c2ecf20Sopenharmony_ci return 0; 18148c2ecf20Sopenharmony_ci default: 18158c2ecf20Sopenharmony_ci drbd_err(connection, "Strangeness in connection->write_ordering %d\n", 18168c2ecf20Sopenharmony_ci connection->resource->write_ordering); 18178c2ecf20Sopenharmony_ci return -EIO; 18188c2ecf20Sopenharmony_ci } 18198c2ecf20Sopenharmony_ci 18208c2ecf20Sopenharmony_ci epoch->flags = 0; 18218c2ecf20Sopenharmony_ci atomic_set(&epoch->epoch_size, 0); 18228c2ecf20Sopenharmony_ci atomic_set(&epoch->active, 0); 18238c2ecf20Sopenharmony_ci 18248c2ecf20Sopenharmony_ci spin_lock(&connection->epoch_lock); 18258c2ecf20Sopenharmony_ci if (atomic_read(&connection->current_epoch->epoch_size)) { 18268c2ecf20Sopenharmony_ci list_add(&epoch->list, &connection->current_epoch->list); 18278c2ecf20Sopenharmony_ci connection->current_epoch = epoch; 18288c2ecf20Sopenharmony_ci connection->epochs++; 18298c2ecf20Sopenharmony_ci } else { 18308c2ecf20Sopenharmony_ci /* The current_epoch got recycled while we allocated this one... */ 18318c2ecf20Sopenharmony_ci kfree(epoch); 18328c2ecf20Sopenharmony_ci } 18338c2ecf20Sopenharmony_ci spin_unlock(&connection->epoch_lock); 18348c2ecf20Sopenharmony_ci 18358c2ecf20Sopenharmony_ci return 0; 18368c2ecf20Sopenharmony_ci} 18378c2ecf20Sopenharmony_ci 18388c2ecf20Sopenharmony_ci/* quick wrapper in case payload size != request_size (write same) */ 18398c2ecf20Sopenharmony_cistatic void drbd_csum_ee_size(struct crypto_shash *h, 18408c2ecf20Sopenharmony_ci struct drbd_peer_request *r, void *d, 18418c2ecf20Sopenharmony_ci unsigned int payload_size) 18428c2ecf20Sopenharmony_ci{ 18438c2ecf20Sopenharmony_ci unsigned int tmp = r->i.size; 18448c2ecf20Sopenharmony_ci r->i.size = payload_size; 18458c2ecf20Sopenharmony_ci drbd_csum_ee(h, r, d); 18468c2ecf20Sopenharmony_ci r->i.size = tmp; 18478c2ecf20Sopenharmony_ci} 18488c2ecf20Sopenharmony_ci 18498c2ecf20Sopenharmony_ci/* used from receive_RSDataReply (recv_resync_read) 18508c2ecf20Sopenharmony_ci * and from receive_Data. 18518c2ecf20Sopenharmony_ci * data_size: actual payload ("data in") 18528c2ecf20Sopenharmony_ci * for normal writes that is bi_size. 18538c2ecf20Sopenharmony_ci * for discards, that is zero. 18548c2ecf20Sopenharmony_ci * for write same, it is logical_block_size. 18558c2ecf20Sopenharmony_ci * both trim and write same have the bi_size ("data len to be affected") 18568c2ecf20Sopenharmony_ci * as extra argument in the packet header. 18578c2ecf20Sopenharmony_ci */ 18588c2ecf20Sopenharmony_cistatic struct drbd_peer_request * 18598c2ecf20Sopenharmony_ciread_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 18608c2ecf20Sopenharmony_ci struct packet_info *pi) __must_hold(local) 18618c2ecf20Sopenharmony_ci{ 18628c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 18638c2ecf20Sopenharmony_ci const sector_t capacity = get_capacity(device->vdisk); 18648c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req; 18658c2ecf20Sopenharmony_ci struct page *page; 18668c2ecf20Sopenharmony_ci int digest_size, err; 18678c2ecf20Sopenharmony_ci unsigned int data_size = pi->size, ds; 18688c2ecf20Sopenharmony_ci void *dig_in = peer_device->connection->int_dig_in; 18698c2ecf20Sopenharmony_ci void *dig_vv = peer_device->connection->int_dig_vv; 18708c2ecf20Sopenharmony_ci unsigned long *data; 18718c2ecf20Sopenharmony_ci struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; 18728c2ecf20Sopenharmony_ci struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL; 18738c2ecf20Sopenharmony_ci struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL; 18748c2ecf20Sopenharmony_ci 18758c2ecf20Sopenharmony_ci digest_size = 0; 18768c2ecf20Sopenharmony_ci if (!trim && peer_device->connection->peer_integrity_tfm) { 18778c2ecf20Sopenharmony_ci digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); 18788c2ecf20Sopenharmony_ci /* 18798c2ecf20Sopenharmony_ci * FIXME: Receive the incoming digest into the receive buffer 18808c2ecf20Sopenharmony_ci * here, together with its struct p_data? 18818c2ecf20Sopenharmony_ci */ 18828c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); 18838c2ecf20Sopenharmony_ci if (err) 18848c2ecf20Sopenharmony_ci return NULL; 18858c2ecf20Sopenharmony_ci data_size -= digest_size; 18868c2ecf20Sopenharmony_ci } 18878c2ecf20Sopenharmony_ci 18888c2ecf20Sopenharmony_ci /* assume request_size == data_size, but special case trim and wsame. */ 18898c2ecf20Sopenharmony_ci ds = data_size; 18908c2ecf20Sopenharmony_ci if (trim) { 18918c2ecf20Sopenharmony_ci if (!expect(data_size == 0)) 18928c2ecf20Sopenharmony_ci return NULL; 18938c2ecf20Sopenharmony_ci ds = be32_to_cpu(trim->size); 18948c2ecf20Sopenharmony_ci } else if (zeroes) { 18958c2ecf20Sopenharmony_ci if (!expect(data_size == 0)) 18968c2ecf20Sopenharmony_ci return NULL; 18978c2ecf20Sopenharmony_ci ds = be32_to_cpu(zeroes->size); 18988c2ecf20Sopenharmony_ci } else if (wsame) { 18998c2ecf20Sopenharmony_ci if (data_size != queue_logical_block_size(device->rq_queue)) { 19008c2ecf20Sopenharmony_ci drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n", 19018c2ecf20Sopenharmony_ci data_size, queue_logical_block_size(device->rq_queue)); 19028c2ecf20Sopenharmony_ci return NULL; 19038c2ecf20Sopenharmony_ci } 19048c2ecf20Sopenharmony_ci if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) { 19058c2ecf20Sopenharmony_ci drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n", 19068c2ecf20Sopenharmony_ci data_size, bdev_logical_block_size(device->ldev->backing_bdev)); 19078c2ecf20Sopenharmony_ci return NULL; 19088c2ecf20Sopenharmony_ci } 19098c2ecf20Sopenharmony_ci ds = be32_to_cpu(wsame->size); 19108c2ecf20Sopenharmony_ci } 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_ci if (!expect(IS_ALIGNED(ds, 512))) 19138c2ecf20Sopenharmony_ci return NULL; 19148c2ecf20Sopenharmony_ci if (trim || wsame || zeroes) { 19158c2ecf20Sopenharmony_ci if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9))) 19168c2ecf20Sopenharmony_ci return NULL; 19178c2ecf20Sopenharmony_ci } else if (!expect(ds <= DRBD_MAX_BIO_SIZE)) 19188c2ecf20Sopenharmony_ci return NULL; 19198c2ecf20Sopenharmony_ci 19208c2ecf20Sopenharmony_ci /* even though we trust out peer, 19218c2ecf20Sopenharmony_ci * we sometimes have to double check. */ 19228c2ecf20Sopenharmony_ci if (sector + (ds>>9) > capacity) { 19238c2ecf20Sopenharmony_ci drbd_err(device, "request from peer beyond end of local disk: " 19248c2ecf20Sopenharmony_ci "capacity: %llus < sector: %llus + size: %u\n", 19258c2ecf20Sopenharmony_ci (unsigned long long)capacity, 19268c2ecf20Sopenharmony_ci (unsigned long long)sector, ds); 19278c2ecf20Sopenharmony_ci return NULL; 19288c2ecf20Sopenharmony_ci } 19298c2ecf20Sopenharmony_ci 19308c2ecf20Sopenharmony_ci /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 19318c2ecf20Sopenharmony_ci * "criss-cross" setup, that might cause write-out on some other DRBD, 19328c2ecf20Sopenharmony_ci * which in turn might block on the other node at this very place. */ 19338c2ecf20Sopenharmony_ci peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO); 19348c2ecf20Sopenharmony_ci if (!peer_req) 19358c2ecf20Sopenharmony_ci return NULL; 19368c2ecf20Sopenharmony_ci 19378c2ecf20Sopenharmony_ci peer_req->flags |= EE_WRITE; 19388c2ecf20Sopenharmony_ci if (trim) { 19398c2ecf20Sopenharmony_ci peer_req->flags |= EE_TRIM; 19408c2ecf20Sopenharmony_ci return peer_req; 19418c2ecf20Sopenharmony_ci } 19428c2ecf20Sopenharmony_ci if (zeroes) { 19438c2ecf20Sopenharmony_ci peer_req->flags |= EE_ZEROOUT; 19448c2ecf20Sopenharmony_ci return peer_req; 19458c2ecf20Sopenharmony_ci } 19468c2ecf20Sopenharmony_ci if (wsame) 19478c2ecf20Sopenharmony_ci peer_req->flags |= EE_WRITE_SAME; 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_ci /* receive payload size bytes into page chain */ 19508c2ecf20Sopenharmony_ci ds = data_size; 19518c2ecf20Sopenharmony_ci page = peer_req->pages; 19528c2ecf20Sopenharmony_ci page_chain_for_each(page) { 19538c2ecf20Sopenharmony_ci unsigned len = min_t(int, ds, PAGE_SIZE); 19548c2ecf20Sopenharmony_ci data = kmap(page); 19558c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, data, len); 19568c2ecf20Sopenharmony_ci if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) { 19578c2ecf20Sopenharmony_ci drbd_err(device, "Fault injection: Corrupting data on receive\n"); 19588c2ecf20Sopenharmony_ci data[0] = data[0] ^ (unsigned long)-1; 19598c2ecf20Sopenharmony_ci } 19608c2ecf20Sopenharmony_ci kunmap(page); 19618c2ecf20Sopenharmony_ci if (err) { 19628c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 19638c2ecf20Sopenharmony_ci return NULL; 19648c2ecf20Sopenharmony_ci } 19658c2ecf20Sopenharmony_ci ds -= len; 19668c2ecf20Sopenharmony_ci } 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci if (digest_size) { 19698c2ecf20Sopenharmony_ci drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size); 19708c2ecf20Sopenharmony_ci if (memcmp(dig_in, dig_vv, digest_size)) { 19718c2ecf20Sopenharmony_ci drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", 19728c2ecf20Sopenharmony_ci (unsigned long long)sector, data_size); 19738c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 19748c2ecf20Sopenharmony_ci return NULL; 19758c2ecf20Sopenharmony_ci } 19768c2ecf20Sopenharmony_ci } 19778c2ecf20Sopenharmony_ci device->recv_cnt += data_size >> 9; 19788c2ecf20Sopenharmony_ci return peer_req; 19798c2ecf20Sopenharmony_ci} 19808c2ecf20Sopenharmony_ci 19818c2ecf20Sopenharmony_ci/* drbd_drain_block() just takes a data block 19828c2ecf20Sopenharmony_ci * out of the socket input buffer, and discards it. 19838c2ecf20Sopenharmony_ci */ 19848c2ecf20Sopenharmony_cistatic int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) 19858c2ecf20Sopenharmony_ci{ 19868c2ecf20Sopenharmony_ci struct page *page; 19878c2ecf20Sopenharmony_ci int err = 0; 19888c2ecf20Sopenharmony_ci void *data; 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci if (!data_size) 19918c2ecf20Sopenharmony_ci return 0; 19928c2ecf20Sopenharmony_ci 19938c2ecf20Sopenharmony_ci page = drbd_alloc_pages(peer_device, 1, 1); 19948c2ecf20Sopenharmony_ci 19958c2ecf20Sopenharmony_ci data = kmap(page); 19968c2ecf20Sopenharmony_ci while (data_size) { 19978c2ecf20Sopenharmony_ci unsigned int len = min_t(int, data_size, PAGE_SIZE); 19988c2ecf20Sopenharmony_ci 19998c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, data, len); 20008c2ecf20Sopenharmony_ci if (err) 20018c2ecf20Sopenharmony_ci break; 20028c2ecf20Sopenharmony_ci data_size -= len; 20038c2ecf20Sopenharmony_ci } 20048c2ecf20Sopenharmony_ci kunmap(page); 20058c2ecf20Sopenharmony_ci drbd_free_pages(peer_device->device, page, 0); 20068c2ecf20Sopenharmony_ci return err; 20078c2ecf20Sopenharmony_ci} 20088c2ecf20Sopenharmony_ci 20098c2ecf20Sopenharmony_cistatic int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req, 20108c2ecf20Sopenharmony_ci sector_t sector, int data_size) 20118c2ecf20Sopenharmony_ci{ 20128c2ecf20Sopenharmony_ci struct bio_vec bvec; 20138c2ecf20Sopenharmony_ci struct bvec_iter iter; 20148c2ecf20Sopenharmony_ci struct bio *bio; 20158c2ecf20Sopenharmony_ci int digest_size, err, expect; 20168c2ecf20Sopenharmony_ci void *dig_in = peer_device->connection->int_dig_in; 20178c2ecf20Sopenharmony_ci void *dig_vv = peer_device->connection->int_dig_vv; 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_ci digest_size = 0; 20208c2ecf20Sopenharmony_ci if (peer_device->connection->peer_integrity_tfm) { 20218c2ecf20Sopenharmony_ci digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); 20228c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); 20238c2ecf20Sopenharmony_ci if (err) 20248c2ecf20Sopenharmony_ci return err; 20258c2ecf20Sopenharmony_ci data_size -= digest_size; 20268c2ecf20Sopenharmony_ci } 20278c2ecf20Sopenharmony_ci 20288c2ecf20Sopenharmony_ci /* optimistically update recv_cnt. if receiving fails below, 20298c2ecf20Sopenharmony_ci * we disconnect anyways, and counters will be reset. */ 20308c2ecf20Sopenharmony_ci peer_device->device->recv_cnt += data_size>>9; 20318c2ecf20Sopenharmony_ci 20328c2ecf20Sopenharmony_ci bio = req->master_bio; 20338c2ecf20Sopenharmony_ci D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector); 20348c2ecf20Sopenharmony_ci 20358c2ecf20Sopenharmony_ci bio_for_each_segment(bvec, bio, iter) { 20368c2ecf20Sopenharmony_ci void *mapped = kmap(bvec.bv_page) + bvec.bv_offset; 20378c2ecf20Sopenharmony_ci expect = min_t(int, data_size, bvec.bv_len); 20388c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(peer_device->connection, mapped, expect); 20398c2ecf20Sopenharmony_ci kunmap(bvec.bv_page); 20408c2ecf20Sopenharmony_ci if (err) 20418c2ecf20Sopenharmony_ci return err; 20428c2ecf20Sopenharmony_ci data_size -= expect; 20438c2ecf20Sopenharmony_ci } 20448c2ecf20Sopenharmony_ci 20458c2ecf20Sopenharmony_ci if (digest_size) { 20468c2ecf20Sopenharmony_ci drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv); 20478c2ecf20Sopenharmony_ci if (memcmp(dig_in, dig_vv, digest_size)) { 20488c2ecf20Sopenharmony_ci drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n"); 20498c2ecf20Sopenharmony_ci return -EINVAL; 20508c2ecf20Sopenharmony_ci } 20518c2ecf20Sopenharmony_ci } 20528c2ecf20Sopenharmony_ci 20538c2ecf20Sopenharmony_ci D_ASSERT(peer_device->device, data_size == 0); 20548c2ecf20Sopenharmony_ci return 0; 20558c2ecf20Sopenharmony_ci} 20568c2ecf20Sopenharmony_ci 20578c2ecf20Sopenharmony_ci/* 20588c2ecf20Sopenharmony_ci * e_end_resync_block() is called in ack_sender context via 20598c2ecf20Sopenharmony_ci * drbd_finish_peer_reqs(). 20608c2ecf20Sopenharmony_ci */ 20618c2ecf20Sopenharmony_cistatic int e_end_resync_block(struct drbd_work *w, int unused) 20628c2ecf20Sopenharmony_ci{ 20638c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = 20648c2ecf20Sopenharmony_ci container_of(w, struct drbd_peer_request, w); 20658c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 20668c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 20678c2ecf20Sopenharmony_ci sector_t sector = peer_req->i.sector; 20688c2ecf20Sopenharmony_ci int err; 20698c2ecf20Sopenharmony_ci 20708c2ecf20Sopenharmony_ci D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 20718c2ecf20Sopenharmony_ci 20728c2ecf20Sopenharmony_ci if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 20738c2ecf20Sopenharmony_ci drbd_set_in_sync(device, sector, peer_req->i.size); 20748c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); 20758c2ecf20Sopenharmony_ci } else { 20768c2ecf20Sopenharmony_ci /* Record failure to sync */ 20778c2ecf20Sopenharmony_ci drbd_rs_failed_io(device, sector, peer_req->i.size); 20788c2ecf20Sopenharmony_ci 20798c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 20808c2ecf20Sopenharmony_ci } 20818c2ecf20Sopenharmony_ci dec_unacked(device); 20828c2ecf20Sopenharmony_ci 20838c2ecf20Sopenharmony_ci return err; 20848c2ecf20Sopenharmony_ci} 20858c2ecf20Sopenharmony_ci 20868c2ecf20Sopenharmony_cistatic int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, 20878c2ecf20Sopenharmony_ci struct packet_info *pi) __releases(local) 20888c2ecf20Sopenharmony_ci{ 20898c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 20908c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req; 20918c2ecf20Sopenharmony_ci 20928c2ecf20Sopenharmony_ci peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi); 20938c2ecf20Sopenharmony_ci if (!peer_req) 20948c2ecf20Sopenharmony_ci goto fail; 20958c2ecf20Sopenharmony_ci 20968c2ecf20Sopenharmony_ci dec_rs_pending(device); 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_ci inc_unacked(device); 20998c2ecf20Sopenharmony_ci /* corresponding dec_unacked() in e_end_resync_block() 21008c2ecf20Sopenharmony_ci * respective _drbd_clear_done_ee */ 21018c2ecf20Sopenharmony_ci 21028c2ecf20Sopenharmony_ci peer_req->w.cb = e_end_resync_block; 21038c2ecf20Sopenharmony_ci peer_req->submit_jif = jiffies; 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 21068c2ecf20Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->sync_ee); 21078c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 21088c2ecf20Sopenharmony_ci 21098c2ecf20Sopenharmony_ci atomic_add(pi->size >> 9, &device->rs_sect_ev); 21108c2ecf20Sopenharmony_ci if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0, 21118c2ecf20Sopenharmony_ci DRBD_FAULT_RS_WR) == 0) 21128c2ecf20Sopenharmony_ci return 0; 21138c2ecf20Sopenharmony_ci 21148c2ecf20Sopenharmony_ci /* don't care for the reason here */ 21158c2ecf20Sopenharmony_ci drbd_err(device, "submit failed, triggering re-connect\n"); 21168c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 21178c2ecf20Sopenharmony_ci list_del(&peer_req->w.list); 21188c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 21198c2ecf20Sopenharmony_ci 21208c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 21218c2ecf20Sopenharmony_cifail: 21228c2ecf20Sopenharmony_ci put_ldev(device); 21238c2ecf20Sopenharmony_ci return -EIO; 21248c2ecf20Sopenharmony_ci} 21258c2ecf20Sopenharmony_ci 21268c2ecf20Sopenharmony_cistatic struct drbd_request * 21278c2ecf20Sopenharmony_cifind_request(struct drbd_device *device, struct rb_root *root, u64 id, 21288c2ecf20Sopenharmony_ci sector_t sector, bool missing_ok, const char *func) 21298c2ecf20Sopenharmony_ci{ 21308c2ecf20Sopenharmony_ci struct drbd_request *req; 21318c2ecf20Sopenharmony_ci 21328c2ecf20Sopenharmony_ci /* Request object according to our peer */ 21338c2ecf20Sopenharmony_ci req = (struct drbd_request *)(unsigned long)id; 21348c2ecf20Sopenharmony_ci if (drbd_contains_interval(root, sector, &req->i) && req->i.local) 21358c2ecf20Sopenharmony_ci return req; 21368c2ecf20Sopenharmony_ci if (!missing_ok) { 21378c2ecf20Sopenharmony_ci drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func, 21388c2ecf20Sopenharmony_ci (unsigned long)id, (unsigned long long)sector); 21398c2ecf20Sopenharmony_ci } 21408c2ecf20Sopenharmony_ci return NULL; 21418c2ecf20Sopenharmony_ci} 21428c2ecf20Sopenharmony_ci 21438c2ecf20Sopenharmony_cistatic int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi) 21448c2ecf20Sopenharmony_ci{ 21458c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 21468c2ecf20Sopenharmony_ci struct drbd_device *device; 21478c2ecf20Sopenharmony_ci struct drbd_request *req; 21488c2ecf20Sopenharmony_ci sector_t sector; 21498c2ecf20Sopenharmony_ci int err; 21508c2ecf20Sopenharmony_ci struct p_data *p = pi->data; 21518c2ecf20Sopenharmony_ci 21528c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 21538c2ecf20Sopenharmony_ci if (!peer_device) 21548c2ecf20Sopenharmony_ci return -EIO; 21558c2ecf20Sopenharmony_ci device = peer_device->device; 21568c2ecf20Sopenharmony_ci 21578c2ecf20Sopenharmony_ci sector = be64_to_cpu(p->sector); 21588c2ecf20Sopenharmony_ci 21598c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 21608c2ecf20Sopenharmony_ci req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__); 21618c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 21628c2ecf20Sopenharmony_ci if (unlikely(!req)) 21638c2ecf20Sopenharmony_ci return -EIO; 21648c2ecf20Sopenharmony_ci 21658c2ecf20Sopenharmony_ci /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid 21668c2ecf20Sopenharmony_ci * special casing it there for the various failure cases. 21678c2ecf20Sopenharmony_ci * still no race with drbd_fail_pending_reads */ 21688c2ecf20Sopenharmony_ci err = recv_dless_read(peer_device, req, sector, pi->size); 21698c2ecf20Sopenharmony_ci if (!err) 21708c2ecf20Sopenharmony_ci req_mod(req, DATA_RECEIVED); 21718c2ecf20Sopenharmony_ci /* else: nothing. handled from drbd_disconnect... 21728c2ecf20Sopenharmony_ci * I don't think we may complete this just yet 21738c2ecf20Sopenharmony_ci * in case we are "on-disconnect: freeze" */ 21748c2ecf20Sopenharmony_ci 21758c2ecf20Sopenharmony_ci return err; 21768c2ecf20Sopenharmony_ci} 21778c2ecf20Sopenharmony_ci 21788c2ecf20Sopenharmony_cistatic int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi) 21798c2ecf20Sopenharmony_ci{ 21808c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 21818c2ecf20Sopenharmony_ci struct drbd_device *device; 21828c2ecf20Sopenharmony_ci sector_t sector; 21838c2ecf20Sopenharmony_ci int err; 21848c2ecf20Sopenharmony_ci struct p_data *p = pi->data; 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 21878c2ecf20Sopenharmony_ci if (!peer_device) 21888c2ecf20Sopenharmony_ci return -EIO; 21898c2ecf20Sopenharmony_ci device = peer_device->device; 21908c2ecf20Sopenharmony_ci 21918c2ecf20Sopenharmony_ci sector = be64_to_cpu(p->sector); 21928c2ecf20Sopenharmony_ci D_ASSERT(device, p->block_id == ID_SYNCER); 21938c2ecf20Sopenharmony_ci 21948c2ecf20Sopenharmony_ci if (get_ldev(device)) { 21958c2ecf20Sopenharmony_ci /* data is submitted to disk within recv_resync_read. 21968c2ecf20Sopenharmony_ci * corresponding put_ldev done below on error, 21978c2ecf20Sopenharmony_ci * or in drbd_peer_request_endio. */ 21988c2ecf20Sopenharmony_ci err = recv_resync_read(peer_device, sector, pi); 21998c2ecf20Sopenharmony_ci } else { 22008c2ecf20Sopenharmony_ci if (__ratelimit(&drbd_ratelimit_state)) 22018c2ecf20Sopenharmony_ci drbd_err(device, "Can not write resync data to local disk.\n"); 22028c2ecf20Sopenharmony_ci 22038c2ecf20Sopenharmony_ci err = drbd_drain_block(peer_device, pi->size); 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 22068c2ecf20Sopenharmony_ci } 22078c2ecf20Sopenharmony_ci 22088c2ecf20Sopenharmony_ci atomic_add(pi->size >> 9, &device->rs_sect_in); 22098c2ecf20Sopenharmony_ci 22108c2ecf20Sopenharmony_ci return err; 22118c2ecf20Sopenharmony_ci} 22128c2ecf20Sopenharmony_ci 22138c2ecf20Sopenharmony_cistatic void restart_conflicting_writes(struct drbd_device *device, 22148c2ecf20Sopenharmony_ci sector_t sector, int size) 22158c2ecf20Sopenharmony_ci{ 22168c2ecf20Sopenharmony_ci struct drbd_interval *i; 22178c2ecf20Sopenharmony_ci struct drbd_request *req; 22188c2ecf20Sopenharmony_ci 22198c2ecf20Sopenharmony_ci drbd_for_each_overlap(i, &device->write_requests, sector, size) { 22208c2ecf20Sopenharmony_ci if (!i->local) 22218c2ecf20Sopenharmony_ci continue; 22228c2ecf20Sopenharmony_ci req = container_of(i, struct drbd_request, i); 22238c2ecf20Sopenharmony_ci if (req->rq_state & RQ_LOCAL_PENDING || 22248c2ecf20Sopenharmony_ci !(req->rq_state & RQ_POSTPONED)) 22258c2ecf20Sopenharmony_ci continue; 22268c2ecf20Sopenharmony_ci /* as it is RQ_POSTPONED, this will cause it to 22278c2ecf20Sopenharmony_ci * be queued on the retry workqueue. */ 22288c2ecf20Sopenharmony_ci __req_mod(req, CONFLICT_RESOLVED, NULL); 22298c2ecf20Sopenharmony_ci } 22308c2ecf20Sopenharmony_ci} 22318c2ecf20Sopenharmony_ci 22328c2ecf20Sopenharmony_ci/* 22338c2ecf20Sopenharmony_ci * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs(). 22348c2ecf20Sopenharmony_ci */ 22358c2ecf20Sopenharmony_cistatic int e_end_block(struct drbd_work *w, int cancel) 22368c2ecf20Sopenharmony_ci{ 22378c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = 22388c2ecf20Sopenharmony_ci container_of(w, struct drbd_peer_request, w); 22398c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 22408c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 22418c2ecf20Sopenharmony_ci sector_t sector = peer_req->i.sector; 22428c2ecf20Sopenharmony_ci int err = 0, pcmd; 22438c2ecf20Sopenharmony_ci 22448c2ecf20Sopenharmony_ci if (peer_req->flags & EE_SEND_WRITE_ACK) { 22458c2ecf20Sopenharmony_ci if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 22468c2ecf20Sopenharmony_ci pcmd = (device->state.conn >= C_SYNC_SOURCE && 22478c2ecf20Sopenharmony_ci device->state.conn <= C_PAUSED_SYNC_T && 22488c2ecf20Sopenharmony_ci peer_req->flags & EE_MAY_SET_IN_SYNC) ? 22498c2ecf20Sopenharmony_ci P_RS_WRITE_ACK : P_WRITE_ACK; 22508c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, pcmd, peer_req); 22518c2ecf20Sopenharmony_ci if (pcmd == P_RS_WRITE_ACK) 22528c2ecf20Sopenharmony_ci drbd_set_in_sync(device, sector, peer_req->i.size); 22538c2ecf20Sopenharmony_ci } else { 22548c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 22558c2ecf20Sopenharmony_ci /* we expect it to be marked out of sync anyways... 22568c2ecf20Sopenharmony_ci * maybe assert this? */ 22578c2ecf20Sopenharmony_ci } 22588c2ecf20Sopenharmony_ci dec_unacked(device); 22598c2ecf20Sopenharmony_ci } 22608c2ecf20Sopenharmony_ci 22618c2ecf20Sopenharmony_ci /* we delete from the conflict detection hash _after_ we sent out the 22628c2ecf20Sopenharmony_ci * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 22638c2ecf20Sopenharmony_ci if (peer_req->flags & EE_IN_INTERVAL_TREE) { 22648c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 22658c2ecf20Sopenharmony_ci D_ASSERT(device, !drbd_interval_empty(&peer_req->i)); 22668c2ecf20Sopenharmony_ci drbd_remove_epoch_entry_interval(device, peer_req); 22678c2ecf20Sopenharmony_ci if (peer_req->flags & EE_RESTART_REQUESTS) 22688c2ecf20Sopenharmony_ci restart_conflicting_writes(device, sector, peer_req->i.size); 22698c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 22708c2ecf20Sopenharmony_ci } else 22718c2ecf20Sopenharmony_ci D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 22728c2ecf20Sopenharmony_ci 22738c2ecf20Sopenharmony_ci drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); 22748c2ecf20Sopenharmony_ci 22758c2ecf20Sopenharmony_ci return err; 22768c2ecf20Sopenharmony_ci} 22778c2ecf20Sopenharmony_ci 22788c2ecf20Sopenharmony_cistatic int e_send_ack(struct drbd_work *w, enum drbd_packet ack) 22798c2ecf20Sopenharmony_ci{ 22808c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = 22818c2ecf20Sopenharmony_ci container_of(w, struct drbd_peer_request, w); 22828c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = peer_req->peer_device; 22838c2ecf20Sopenharmony_ci int err; 22848c2ecf20Sopenharmony_ci 22858c2ecf20Sopenharmony_ci err = drbd_send_ack(peer_device, ack, peer_req); 22868c2ecf20Sopenharmony_ci dec_unacked(peer_device->device); 22878c2ecf20Sopenharmony_ci 22888c2ecf20Sopenharmony_ci return err; 22898c2ecf20Sopenharmony_ci} 22908c2ecf20Sopenharmony_ci 22918c2ecf20Sopenharmony_cistatic int e_send_superseded(struct drbd_work *w, int unused) 22928c2ecf20Sopenharmony_ci{ 22938c2ecf20Sopenharmony_ci return e_send_ack(w, P_SUPERSEDED); 22948c2ecf20Sopenharmony_ci} 22958c2ecf20Sopenharmony_ci 22968c2ecf20Sopenharmony_cistatic int e_send_retry_write(struct drbd_work *w, int unused) 22978c2ecf20Sopenharmony_ci{ 22988c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req = 22998c2ecf20Sopenharmony_ci container_of(w, struct drbd_peer_request, w); 23008c2ecf20Sopenharmony_ci struct drbd_connection *connection = peer_req->peer_device->connection; 23018c2ecf20Sopenharmony_ci 23028c2ecf20Sopenharmony_ci return e_send_ack(w, connection->agreed_pro_version >= 100 ? 23038c2ecf20Sopenharmony_ci P_RETRY_WRITE : P_SUPERSEDED); 23048c2ecf20Sopenharmony_ci} 23058c2ecf20Sopenharmony_ci 23068c2ecf20Sopenharmony_cistatic bool seq_greater(u32 a, u32 b) 23078c2ecf20Sopenharmony_ci{ 23088c2ecf20Sopenharmony_ci /* 23098c2ecf20Sopenharmony_ci * We assume 32-bit wrap-around here. 23108c2ecf20Sopenharmony_ci * For 24-bit wrap-around, we would have to shift: 23118c2ecf20Sopenharmony_ci * a <<= 8; b <<= 8; 23128c2ecf20Sopenharmony_ci */ 23138c2ecf20Sopenharmony_ci return (s32)a - (s32)b > 0; 23148c2ecf20Sopenharmony_ci} 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_cistatic u32 seq_max(u32 a, u32 b) 23178c2ecf20Sopenharmony_ci{ 23188c2ecf20Sopenharmony_ci return seq_greater(a, b) ? a : b; 23198c2ecf20Sopenharmony_ci} 23208c2ecf20Sopenharmony_ci 23218c2ecf20Sopenharmony_cistatic void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq) 23228c2ecf20Sopenharmony_ci{ 23238c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 23248c2ecf20Sopenharmony_ci unsigned int newest_peer_seq; 23258c2ecf20Sopenharmony_ci 23268c2ecf20Sopenharmony_ci if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) { 23278c2ecf20Sopenharmony_ci spin_lock(&device->peer_seq_lock); 23288c2ecf20Sopenharmony_ci newest_peer_seq = seq_max(device->peer_seq, peer_seq); 23298c2ecf20Sopenharmony_ci device->peer_seq = newest_peer_seq; 23308c2ecf20Sopenharmony_ci spin_unlock(&device->peer_seq_lock); 23318c2ecf20Sopenharmony_ci /* wake up only if we actually changed device->peer_seq */ 23328c2ecf20Sopenharmony_ci if (peer_seq == newest_peer_seq) 23338c2ecf20Sopenharmony_ci wake_up(&device->seq_wait); 23348c2ecf20Sopenharmony_ci } 23358c2ecf20Sopenharmony_ci} 23368c2ecf20Sopenharmony_ci 23378c2ecf20Sopenharmony_cistatic inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) 23388c2ecf20Sopenharmony_ci{ 23398c2ecf20Sopenharmony_ci return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); 23408c2ecf20Sopenharmony_ci} 23418c2ecf20Sopenharmony_ci 23428c2ecf20Sopenharmony_ci/* maybe change sync_ee into interval trees as well? */ 23438c2ecf20Sopenharmony_cistatic bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) 23448c2ecf20Sopenharmony_ci{ 23458c2ecf20Sopenharmony_ci struct drbd_peer_request *rs_req; 23468c2ecf20Sopenharmony_ci bool rv = false; 23478c2ecf20Sopenharmony_ci 23488c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 23498c2ecf20Sopenharmony_ci list_for_each_entry(rs_req, &device->sync_ee, w.list) { 23508c2ecf20Sopenharmony_ci if (overlaps(peer_req->i.sector, peer_req->i.size, 23518c2ecf20Sopenharmony_ci rs_req->i.sector, rs_req->i.size)) { 23528c2ecf20Sopenharmony_ci rv = true; 23538c2ecf20Sopenharmony_ci break; 23548c2ecf20Sopenharmony_ci } 23558c2ecf20Sopenharmony_ci } 23568c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 23578c2ecf20Sopenharmony_ci 23588c2ecf20Sopenharmony_ci return rv; 23598c2ecf20Sopenharmony_ci} 23608c2ecf20Sopenharmony_ci 23618c2ecf20Sopenharmony_ci/* Called from receive_Data. 23628c2ecf20Sopenharmony_ci * Synchronize packets on sock with packets on msock. 23638c2ecf20Sopenharmony_ci * 23648c2ecf20Sopenharmony_ci * This is here so even when a P_DATA packet traveling via sock overtook an Ack 23658c2ecf20Sopenharmony_ci * packet traveling on msock, they are still processed in the order they have 23668c2ecf20Sopenharmony_ci * been sent. 23678c2ecf20Sopenharmony_ci * 23688c2ecf20Sopenharmony_ci * Note: we don't care for Ack packets overtaking P_DATA packets. 23698c2ecf20Sopenharmony_ci * 23708c2ecf20Sopenharmony_ci * In case packet_seq is larger than device->peer_seq number, there are 23718c2ecf20Sopenharmony_ci * outstanding packets on the msock. We wait for them to arrive. 23728c2ecf20Sopenharmony_ci * In case we are the logically next packet, we update device->peer_seq 23738c2ecf20Sopenharmony_ci * ourselves. Correctly handles 32bit wrap around. 23748c2ecf20Sopenharmony_ci * 23758c2ecf20Sopenharmony_ci * Assume we have a 10 GBit connection, that is about 1<<30 byte per second, 23768c2ecf20Sopenharmony_ci * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds 23778c2ecf20Sopenharmony_ci * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have 23788c2ecf20Sopenharmony_ci * 1<<9 == 512 seconds aka ages for the 32bit wrap around... 23798c2ecf20Sopenharmony_ci * 23808c2ecf20Sopenharmony_ci * returns 0 if we may process the packet, 23818c2ecf20Sopenharmony_ci * -ERESTARTSYS if we were interrupted (by disconnect signal). */ 23828c2ecf20Sopenharmony_cistatic int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq) 23838c2ecf20Sopenharmony_ci{ 23848c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 23858c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 23868c2ecf20Sopenharmony_ci long timeout; 23878c2ecf20Sopenharmony_ci int ret = 0, tp; 23888c2ecf20Sopenharmony_ci 23898c2ecf20Sopenharmony_ci if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) 23908c2ecf20Sopenharmony_ci return 0; 23918c2ecf20Sopenharmony_ci 23928c2ecf20Sopenharmony_ci spin_lock(&device->peer_seq_lock); 23938c2ecf20Sopenharmony_ci for (;;) { 23948c2ecf20Sopenharmony_ci if (!seq_greater(peer_seq - 1, device->peer_seq)) { 23958c2ecf20Sopenharmony_ci device->peer_seq = seq_max(device->peer_seq, peer_seq); 23968c2ecf20Sopenharmony_ci break; 23978c2ecf20Sopenharmony_ci } 23988c2ecf20Sopenharmony_ci 23998c2ecf20Sopenharmony_ci if (signal_pending(current)) { 24008c2ecf20Sopenharmony_ci ret = -ERESTARTSYS; 24018c2ecf20Sopenharmony_ci break; 24028c2ecf20Sopenharmony_ci } 24038c2ecf20Sopenharmony_ci 24048c2ecf20Sopenharmony_ci rcu_read_lock(); 24058c2ecf20Sopenharmony_ci tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; 24068c2ecf20Sopenharmony_ci rcu_read_unlock(); 24078c2ecf20Sopenharmony_ci 24088c2ecf20Sopenharmony_ci if (!tp) 24098c2ecf20Sopenharmony_ci break; 24108c2ecf20Sopenharmony_ci 24118c2ecf20Sopenharmony_ci /* Only need to wait if two_primaries is enabled */ 24128c2ecf20Sopenharmony_ci prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE); 24138c2ecf20Sopenharmony_ci spin_unlock(&device->peer_seq_lock); 24148c2ecf20Sopenharmony_ci rcu_read_lock(); 24158c2ecf20Sopenharmony_ci timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10; 24168c2ecf20Sopenharmony_ci rcu_read_unlock(); 24178c2ecf20Sopenharmony_ci timeout = schedule_timeout(timeout); 24188c2ecf20Sopenharmony_ci spin_lock(&device->peer_seq_lock); 24198c2ecf20Sopenharmony_ci if (!timeout) { 24208c2ecf20Sopenharmony_ci ret = -ETIMEDOUT; 24218c2ecf20Sopenharmony_ci drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n"); 24228c2ecf20Sopenharmony_ci break; 24238c2ecf20Sopenharmony_ci } 24248c2ecf20Sopenharmony_ci } 24258c2ecf20Sopenharmony_ci spin_unlock(&device->peer_seq_lock); 24268c2ecf20Sopenharmony_ci finish_wait(&device->seq_wait, &wait); 24278c2ecf20Sopenharmony_ci return ret; 24288c2ecf20Sopenharmony_ci} 24298c2ecf20Sopenharmony_ci 24308c2ecf20Sopenharmony_ci/* see also bio_flags_to_wire() 24318c2ecf20Sopenharmony_ci * DRBD_REQ_*, because we need to semantically map the flags to data packet 24328c2ecf20Sopenharmony_ci * flags and back. We may replicate to other kernel versions. */ 24338c2ecf20Sopenharmony_cistatic unsigned long wire_flags_to_bio_flags(u32 dpf) 24348c2ecf20Sopenharmony_ci{ 24358c2ecf20Sopenharmony_ci return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | 24368c2ecf20Sopenharmony_ci (dpf & DP_FUA ? REQ_FUA : 0) | 24378c2ecf20Sopenharmony_ci (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); 24388c2ecf20Sopenharmony_ci} 24398c2ecf20Sopenharmony_ci 24408c2ecf20Sopenharmony_cistatic unsigned long wire_flags_to_bio_op(u32 dpf) 24418c2ecf20Sopenharmony_ci{ 24428c2ecf20Sopenharmony_ci if (dpf & DP_ZEROES) 24438c2ecf20Sopenharmony_ci return REQ_OP_WRITE_ZEROES; 24448c2ecf20Sopenharmony_ci if (dpf & DP_DISCARD) 24458c2ecf20Sopenharmony_ci return REQ_OP_DISCARD; 24468c2ecf20Sopenharmony_ci if (dpf & DP_WSAME) 24478c2ecf20Sopenharmony_ci return REQ_OP_WRITE_SAME; 24488c2ecf20Sopenharmony_ci else 24498c2ecf20Sopenharmony_ci return REQ_OP_WRITE; 24508c2ecf20Sopenharmony_ci} 24518c2ecf20Sopenharmony_ci 24528c2ecf20Sopenharmony_cistatic void fail_postponed_requests(struct drbd_device *device, sector_t sector, 24538c2ecf20Sopenharmony_ci unsigned int size) 24548c2ecf20Sopenharmony_ci{ 24558c2ecf20Sopenharmony_ci struct drbd_interval *i; 24568c2ecf20Sopenharmony_ci 24578c2ecf20Sopenharmony_ci repeat: 24588c2ecf20Sopenharmony_ci drbd_for_each_overlap(i, &device->write_requests, sector, size) { 24598c2ecf20Sopenharmony_ci struct drbd_request *req; 24608c2ecf20Sopenharmony_ci struct bio_and_error m; 24618c2ecf20Sopenharmony_ci 24628c2ecf20Sopenharmony_ci if (!i->local) 24638c2ecf20Sopenharmony_ci continue; 24648c2ecf20Sopenharmony_ci req = container_of(i, struct drbd_request, i); 24658c2ecf20Sopenharmony_ci if (!(req->rq_state & RQ_POSTPONED)) 24668c2ecf20Sopenharmony_ci continue; 24678c2ecf20Sopenharmony_ci req->rq_state &= ~RQ_POSTPONED; 24688c2ecf20Sopenharmony_ci __req_mod(req, NEG_ACKED, &m); 24698c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 24708c2ecf20Sopenharmony_ci if (m.bio) 24718c2ecf20Sopenharmony_ci complete_master_bio(device, &m); 24728c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 24738c2ecf20Sopenharmony_ci goto repeat; 24748c2ecf20Sopenharmony_ci } 24758c2ecf20Sopenharmony_ci} 24768c2ecf20Sopenharmony_ci 24778c2ecf20Sopenharmony_cistatic int handle_write_conflicts(struct drbd_device *device, 24788c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req) 24798c2ecf20Sopenharmony_ci{ 24808c2ecf20Sopenharmony_ci struct drbd_connection *connection = peer_req->peer_device->connection; 24818c2ecf20Sopenharmony_ci bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags); 24828c2ecf20Sopenharmony_ci sector_t sector = peer_req->i.sector; 24838c2ecf20Sopenharmony_ci const unsigned int size = peer_req->i.size; 24848c2ecf20Sopenharmony_ci struct drbd_interval *i; 24858c2ecf20Sopenharmony_ci bool equal; 24868c2ecf20Sopenharmony_ci int err; 24878c2ecf20Sopenharmony_ci 24888c2ecf20Sopenharmony_ci /* 24898c2ecf20Sopenharmony_ci * Inserting the peer request into the write_requests tree will prevent 24908c2ecf20Sopenharmony_ci * new conflicting local requests from being added. 24918c2ecf20Sopenharmony_ci */ 24928c2ecf20Sopenharmony_ci drbd_insert_interval(&device->write_requests, &peer_req->i); 24938c2ecf20Sopenharmony_ci 24948c2ecf20Sopenharmony_ci repeat: 24958c2ecf20Sopenharmony_ci drbd_for_each_overlap(i, &device->write_requests, sector, size) { 24968c2ecf20Sopenharmony_ci if (i == &peer_req->i) 24978c2ecf20Sopenharmony_ci continue; 24988c2ecf20Sopenharmony_ci if (i->completed) 24998c2ecf20Sopenharmony_ci continue; 25008c2ecf20Sopenharmony_ci 25018c2ecf20Sopenharmony_ci if (!i->local) { 25028c2ecf20Sopenharmony_ci /* 25038c2ecf20Sopenharmony_ci * Our peer has sent a conflicting remote request; this 25048c2ecf20Sopenharmony_ci * should not happen in a two-node setup. Wait for the 25058c2ecf20Sopenharmony_ci * earlier peer request to complete. 25068c2ecf20Sopenharmony_ci */ 25078c2ecf20Sopenharmony_ci err = drbd_wait_misc(device, i); 25088c2ecf20Sopenharmony_ci if (err) 25098c2ecf20Sopenharmony_ci goto out; 25108c2ecf20Sopenharmony_ci goto repeat; 25118c2ecf20Sopenharmony_ci } 25128c2ecf20Sopenharmony_ci 25138c2ecf20Sopenharmony_ci equal = i->sector == sector && i->size == size; 25148c2ecf20Sopenharmony_ci if (resolve_conflicts) { 25158c2ecf20Sopenharmony_ci /* 25168c2ecf20Sopenharmony_ci * If the peer request is fully contained within the 25178c2ecf20Sopenharmony_ci * overlapping request, it can be considered overwritten 25188c2ecf20Sopenharmony_ci * and thus superseded; otherwise, it will be retried 25198c2ecf20Sopenharmony_ci * once all overlapping requests have completed. 25208c2ecf20Sopenharmony_ci */ 25218c2ecf20Sopenharmony_ci bool superseded = i->sector <= sector && i->sector + 25228c2ecf20Sopenharmony_ci (i->size >> 9) >= sector + (size >> 9); 25238c2ecf20Sopenharmony_ci 25248c2ecf20Sopenharmony_ci if (!equal) 25258c2ecf20Sopenharmony_ci drbd_alert(device, "Concurrent writes detected: " 25268c2ecf20Sopenharmony_ci "local=%llus +%u, remote=%llus +%u, " 25278c2ecf20Sopenharmony_ci "assuming %s came first\n", 25288c2ecf20Sopenharmony_ci (unsigned long long)i->sector, i->size, 25298c2ecf20Sopenharmony_ci (unsigned long long)sector, size, 25308c2ecf20Sopenharmony_ci superseded ? "local" : "remote"); 25318c2ecf20Sopenharmony_ci 25328c2ecf20Sopenharmony_ci peer_req->w.cb = superseded ? e_send_superseded : 25338c2ecf20Sopenharmony_ci e_send_retry_write; 25348c2ecf20Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->done_ee); 25358c2ecf20Sopenharmony_ci queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work); 25368c2ecf20Sopenharmony_ci 25378c2ecf20Sopenharmony_ci err = -ENOENT; 25388c2ecf20Sopenharmony_ci goto out; 25398c2ecf20Sopenharmony_ci } else { 25408c2ecf20Sopenharmony_ci struct drbd_request *req = 25418c2ecf20Sopenharmony_ci container_of(i, struct drbd_request, i); 25428c2ecf20Sopenharmony_ci 25438c2ecf20Sopenharmony_ci if (!equal) 25448c2ecf20Sopenharmony_ci drbd_alert(device, "Concurrent writes detected: " 25458c2ecf20Sopenharmony_ci "local=%llus +%u, remote=%llus +%u\n", 25468c2ecf20Sopenharmony_ci (unsigned long long)i->sector, i->size, 25478c2ecf20Sopenharmony_ci (unsigned long long)sector, size); 25488c2ecf20Sopenharmony_ci 25498c2ecf20Sopenharmony_ci if (req->rq_state & RQ_LOCAL_PENDING || 25508c2ecf20Sopenharmony_ci !(req->rq_state & RQ_POSTPONED)) { 25518c2ecf20Sopenharmony_ci /* 25528c2ecf20Sopenharmony_ci * Wait for the node with the discard flag to 25538c2ecf20Sopenharmony_ci * decide if this request has been superseded 25548c2ecf20Sopenharmony_ci * or needs to be retried. 25558c2ecf20Sopenharmony_ci * Requests that have been superseded will 25568c2ecf20Sopenharmony_ci * disappear from the write_requests tree. 25578c2ecf20Sopenharmony_ci * 25588c2ecf20Sopenharmony_ci * In addition, wait for the conflicting 25598c2ecf20Sopenharmony_ci * request to finish locally before submitting 25608c2ecf20Sopenharmony_ci * the conflicting peer request. 25618c2ecf20Sopenharmony_ci */ 25628c2ecf20Sopenharmony_ci err = drbd_wait_misc(device, &req->i); 25638c2ecf20Sopenharmony_ci if (err) { 25648c2ecf20Sopenharmony_ci _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD); 25658c2ecf20Sopenharmony_ci fail_postponed_requests(device, sector, size); 25668c2ecf20Sopenharmony_ci goto out; 25678c2ecf20Sopenharmony_ci } 25688c2ecf20Sopenharmony_ci goto repeat; 25698c2ecf20Sopenharmony_ci } 25708c2ecf20Sopenharmony_ci /* 25718c2ecf20Sopenharmony_ci * Remember to restart the conflicting requests after 25728c2ecf20Sopenharmony_ci * the new peer request has completed. 25738c2ecf20Sopenharmony_ci */ 25748c2ecf20Sopenharmony_ci peer_req->flags |= EE_RESTART_REQUESTS; 25758c2ecf20Sopenharmony_ci } 25768c2ecf20Sopenharmony_ci } 25778c2ecf20Sopenharmony_ci err = 0; 25788c2ecf20Sopenharmony_ci 25798c2ecf20Sopenharmony_ci out: 25808c2ecf20Sopenharmony_ci if (err) 25818c2ecf20Sopenharmony_ci drbd_remove_epoch_entry_interval(device, peer_req); 25828c2ecf20Sopenharmony_ci return err; 25838c2ecf20Sopenharmony_ci} 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci/* mirrored write */ 25868c2ecf20Sopenharmony_cistatic int receive_Data(struct drbd_connection *connection, struct packet_info *pi) 25878c2ecf20Sopenharmony_ci{ 25888c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 25898c2ecf20Sopenharmony_ci struct drbd_device *device; 25908c2ecf20Sopenharmony_ci struct net_conf *nc; 25918c2ecf20Sopenharmony_ci sector_t sector; 25928c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req; 25938c2ecf20Sopenharmony_ci struct p_data *p = pi->data; 25948c2ecf20Sopenharmony_ci u32 peer_seq = be32_to_cpu(p->seq_num); 25958c2ecf20Sopenharmony_ci int op, op_flags; 25968c2ecf20Sopenharmony_ci u32 dp_flags; 25978c2ecf20Sopenharmony_ci int err, tp; 25988c2ecf20Sopenharmony_ci 25998c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 26008c2ecf20Sopenharmony_ci if (!peer_device) 26018c2ecf20Sopenharmony_ci return -EIO; 26028c2ecf20Sopenharmony_ci device = peer_device->device; 26038c2ecf20Sopenharmony_ci 26048c2ecf20Sopenharmony_ci if (!get_ldev(device)) { 26058c2ecf20Sopenharmony_ci int err2; 26068c2ecf20Sopenharmony_ci 26078c2ecf20Sopenharmony_ci err = wait_for_and_update_peer_seq(peer_device, peer_seq); 26088c2ecf20Sopenharmony_ci drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 26098c2ecf20Sopenharmony_ci atomic_inc(&connection->current_epoch->epoch_size); 26108c2ecf20Sopenharmony_ci err2 = drbd_drain_block(peer_device, pi->size); 26118c2ecf20Sopenharmony_ci if (!err) 26128c2ecf20Sopenharmony_ci err = err2; 26138c2ecf20Sopenharmony_ci return err; 26148c2ecf20Sopenharmony_ci } 26158c2ecf20Sopenharmony_ci 26168c2ecf20Sopenharmony_ci /* 26178c2ecf20Sopenharmony_ci * Corresponding put_ldev done either below (on various errors), or in 26188c2ecf20Sopenharmony_ci * drbd_peer_request_endio, if we successfully submit the data at the 26198c2ecf20Sopenharmony_ci * end of this function. 26208c2ecf20Sopenharmony_ci */ 26218c2ecf20Sopenharmony_ci 26228c2ecf20Sopenharmony_ci sector = be64_to_cpu(p->sector); 26238c2ecf20Sopenharmony_ci peer_req = read_in_block(peer_device, p->block_id, sector, pi); 26248c2ecf20Sopenharmony_ci if (!peer_req) { 26258c2ecf20Sopenharmony_ci put_ldev(device); 26268c2ecf20Sopenharmony_ci return -EIO; 26278c2ecf20Sopenharmony_ci } 26288c2ecf20Sopenharmony_ci 26298c2ecf20Sopenharmony_ci peer_req->w.cb = e_end_block; 26308c2ecf20Sopenharmony_ci peer_req->submit_jif = jiffies; 26318c2ecf20Sopenharmony_ci peer_req->flags |= EE_APPLICATION; 26328c2ecf20Sopenharmony_ci 26338c2ecf20Sopenharmony_ci dp_flags = be32_to_cpu(p->dp_flags); 26348c2ecf20Sopenharmony_ci op = wire_flags_to_bio_op(dp_flags); 26358c2ecf20Sopenharmony_ci op_flags = wire_flags_to_bio_flags(dp_flags); 26368c2ecf20Sopenharmony_ci if (pi->cmd == P_TRIM) { 26378c2ecf20Sopenharmony_ci D_ASSERT(peer_device, peer_req->i.size > 0); 26388c2ecf20Sopenharmony_ci D_ASSERT(peer_device, op == REQ_OP_DISCARD); 26398c2ecf20Sopenharmony_ci D_ASSERT(peer_device, peer_req->pages == NULL); 26408c2ecf20Sopenharmony_ci /* need to play safe: an older DRBD sender 26418c2ecf20Sopenharmony_ci * may mean zero-out while sending P_TRIM. */ 26428c2ecf20Sopenharmony_ci if (0 == (connection->agreed_features & DRBD_FF_WZEROES)) 26438c2ecf20Sopenharmony_ci peer_req->flags |= EE_ZEROOUT; 26448c2ecf20Sopenharmony_ci } else if (pi->cmd == P_ZEROES) { 26458c2ecf20Sopenharmony_ci D_ASSERT(peer_device, peer_req->i.size > 0); 26468c2ecf20Sopenharmony_ci D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); 26478c2ecf20Sopenharmony_ci D_ASSERT(peer_device, peer_req->pages == NULL); 26488c2ecf20Sopenharmony_ci /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */ 26498c2ecf20Sopenharmony_ci if (dp_flags & DP_DISCARD) 26508c2ecf20Sopenharmony_ci peer_req->flags |= EE_TRIM; 26518c2ecf20Sopenharmony_ci } else if (peer_req->pages == NULL) { 26528c2ecf20Sopenharmony_ci D_ASSERT(device, peer_req->i.size == 0); 26538c2ecf20Sopenharmony_ci D_ASSERT(device, dp_flags & DP_FLUSH); 26548c2ecf20Sopenharmony_ci } 26558c2ecf20Sopenharmony_ci 26568c2ecf20Sopenharmony_ci if (dp_flags & DP_MAY_SET_IN_SYNC) 26578c2ecf20Sopenharmony_ci peer_req->flags |= EE_MAY_SET_IN_SYNC; 26588c2ecf20Sopenharmony_ci 26598c2ecf20Sopenharmony_ci spin_lock(&connection->epoch_lock); 26608c2ecf20Sopenharmony_ci peer_req->epoch = connection->current_epoch; 26618c2ecf20Sopenharmony_ci atomic_inc(&peer_req->epoch->epoch_size); 26628c2ecf20Sopenharmony_ci atomic_inc(&peer_req->epoch->active); 26638c2ecf20Sopenharmony_ci spin_unlock(&connection->epoch_lock); 26648c2ecf20Sopenharmony_ci 26658c2ecf20Sopenharmony_ci rcu_read_lock(); 26668c2ecf20Sopenharmony_ci nc = rcu_dereference(peer_device->connection->net_conf); 26678c2ecf20Sopenharmony_ci tp = nc->two_primaries; 26688c2ecf20Sopenharmony_ci if (peer_device->connection->agreed_pro_version < 100) { 26698c2ecf20Sopenharmony_ci switch (nc->wire_protocol) { 26708c2ecf20Sopenharmony_ci case DRBD_PROT_C: 26718c2ecf20Sopenharmony_ci dp_flags |= DP_SEND_WRITE_ACK; 26728c2ecf20Sopenharmony_ci break; 26738c2ecf20Sopenharmony_ci case DRBD_PROT_B: 26748c2ecf20Sopenharmony_ci dp_flags |= DP_SEND_RECEIVE_ACK; 26758c2ecf20Sopenharmony_ci break; 26768c2ecf20Sopenharmony_ci } 26778c2ecf20Sopenharmony_ci } 26788c2ecf20Sopenharmony_ci rcu_read_unlock(); 26798c2ecf20Sopenharmony_ci 26808c2ecf20Sopenharmony_ci if (dp_flags & DP_SEND_WRITE_ACK) { 26818c2ecf20Sopenharmony_ci peer_req->flags |= EE_SEND_WRITE_ACK; 26828c2ecf20Sopenharmony_ci inc_unacked(device); 26838c2ecf20Sopenharmony_ci /* corresponding dec_unacked() in e_end_block() 26848c2ecf20Sopenharmony_ci * respective _drbd_clear_done_ee */ 26858c2ecf20Sopenharmony_ci } 26868c2ecf20Sopenharmony_ci 26878c2ecf20Sopenharmony_ci if (dp_flags & DP_SEND_RECEIVE_ACK) { 26888c2ecf20Sopenharmony_ci /* I really don't like it that the receiver thread 26898c2ecf20Sopenharmony_ci * sends on the msock, but anyways */ 26908c2ecf20Sopenharmony_ci drbd_send_ack(peer_device, P_RECV_ACK, peer_req); 26918c2ecf20Sopenharmony_ci } 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_ci if (tp) { 26948c2ecf20Sopenharmony_ci /* two primaries implies protocol C */ 26958c2ecf20Sopenharmony_ci D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK); 26968c2ecf20Sopenharmony_ci peer_req->flags |= EE_IN_INTERVAL_TREE; 26978c2ecf20Sopenharmony_ci err = wait_for_and_update_peer_seq(peer_device, peer_seq); 26988c2ecf20Sopenharmony_ci if (err) 26998c2ecf20Sopenharmony_ci goto out_interrupted; 27008c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 27018c2ecf20Sopenharmony_ci err = handle_write_conflicts(device, peer_req); 27028c2ecf20Sopenharmony_ci if (err) { 27038c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 27048c2ecf20Sopenharmony_ci if (err == -ENOENT) { 27058c2ecf20Sopenharmony_ci put_ldev(device); 27068c2ecf20Sopenharmony_ci return 0; 27078c2ecf20Sopenharmony_ci } 27088c2ecf20Sopenharmony_ci goto out_interrupted; 27098c2ecf20Sopenharmony_ci } 27108c2ecf20Sopenharmony_ci } else { 27118c2ecf20Sopenharmony_ci update_peer_seq(peer_device, peer_seq); 27128c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 27138c2ecf20Sopenharmony_ci } 27148c2ecf20Sopenharmony_ci /* TRIM and WRITE_SAME are processed synchronously, 27158c2ecf20Sopenharmony_ci * we wait for all pending requests, respectively wait for 27168c2ecf20Sopenharmony_ci * active_ee to become empty in drbd_submit_peer_request(); 27178c2ecf20Sopenharmony_ci * better not add ourselves here. */ 27188c2ecf20Sopenharmony_ci if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0) 27198c2ecf20Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->active_ee); 27208c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 27218c2ecf20Sopenharmony_ci 27228c2ecf20Sopenharmony_ci if (device->state.conn == C_SYNC_TARGET) 27238c2ecf20Sopenharmony_ci wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); 27248c2ecf20Sopenharmony_ci 27258c2ecf20Sopenharmony_ci if (device->state.pdsk < D_INCONSISTENT) { 27268c2ecf20Sopenharmony_ci /* In case we have the only disk of the cluster, */ 27278c2ecf20Sopenharmony_ci drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); 27288c2ecf20Sopenharmony_ci peer_req->flags &= ~EE_MAY_SET_IN_SYNC; 27298c2ecf20Sopenharmony_ci drbd_al_begin_io(device, &peer_req->i); 27308c2ecf20Sopenharmony_ci peer_req->flags |= EE_CALL_AL_COMPLETE_IO; 27318c2ecf20Sopenharmony_ci } 27328c2ecf20Sopenharmony_ci 27338c2ecf20Sopenharmony_ci err = drbd_submit_peer_request(device, peer_req, op, op_flags, 27348c2ecf20Sopenharmony_ci DRBD_FAULT_DT_WR); 27358c2ecf20Sopenharmony_ci if (!err) 27368c2ecf20Sopenharmony_ci return 0; 27378c2ecf20Sopenharmony_ci 27388c2ecf20Sopenharmony_ci /* don't care for the reason here */ 27398c2ecf20Sopenharmony_ci drbd_err(device, "submit failed, triggering re-connect\n"); 27408c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 27418c2ecf20Sopenharmony_ci list_del(&peer_req->w.list); 27428c2ecf20Sopenharmony_ci drbd_remove_epoch_entry_interval(device, peer_req); 27438c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 27448c2ecf20Sopenharmony_ci if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) { 27458c2ecf20Sopenharmony_ci peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 27468c2ecf20Sopenharmony_ci drbd_al_complete_io(device, &peer_req->i); 27478c2ecf20Sopenharmony_ci } 27488c2ecf20Sopenharmony_ci 27498c2ecf20Sopenharmony_ciout_interrupted: 27508c2ecf20Sopenharmony_ci drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP); 27518c2ecf20Sopenharmony_ci put_ldev(device); 27528c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 27538c2ecf20Sopenharmony_ci return err; 27548c2ecf20Sopenharmony_ci} 27558c2ecf20Sopenharmony_ci 27568c2ecf20Sopenharmony_ci/* We may throttle resync, if the lower device seems to be busy, 27578c2ecf20Sopenharmony_ci * and current sync rate is above c_min_rate. 27588c2ecf20Sopenharmony_ci * 27598c2ecf20Sopenharmony_ci * To decide whether or not the lower device is busy, we use a scheme similar 27608c2ecf20Sopenharmony_ci * to MD RAID is_mddev_idle(): if the partition stats reveal "significant" 27618c2ecf20Sopenharmony_ci * (more than 64 sectors) of activity we cannot account for with our own resync 27628c2ecf20Sopenharmony_ci * activity, it obviously is "busy". 27638c2ecf20Sopenharmony_ci * 27648c2ecf20Sopenharmony_ci * The current sync rate used here uses only the most recent two step marks, 27658c2ecf20Sopenharmony_ci * to have a short time average so we can react faster. 27668c2ecf20Sopenharmony_ci */ 27678c2ecf20Sopenharmony_cibool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, 27688c2ecf20Sopenharmony_ci bool throttle_if_app_is_waiting) 27698c2ecf20Sopenharmony_ci{ 27708c2ecf20Sopenharmony_ci struct lc_element *tmp; 27718c2ecf20Sopenharmony_ci bool throttle = drbd_rs_c_min_rate_throttle(device); 27728c2ecf20Sopenharmony_ci 27738c2ecf20Sopenharmony_ci if (!throttle || throttle_if_app_is_waiting) 27748c2ecf20Sopenharmony_ci return throttle; 27758c2ecf20Sopenharmony_ci 27768c2ecf20Sopenharmony_ci spin_lock_irq(&device->al_lock); 27778c2ecf20Sopenharmony_ci tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); 27788c2ecf20Sopenharmony_ci if (tmp) { 27798c2ecf20Sopenharmony_ci struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 27808c2ecf20Sopenharmony_ci if (test_bit(BME_PRIORITY, &bm_ext->flags)) 27818c2ecf20Sopenharmony_ci throttle = false; 27828c2ecf20Sopenharmony_ci /* Do not slow down if app IO is already waiting for this extent, 27838c2ecf20Sopenharmony_ci * and our progress is necessary for application IO to complete. */ 27848c2ecf20Sopenharmony_ci } 27858c2ecf20Sopenharmony_ci spin_unlock_irq(&device->al_lock); 27868c2ecf20Sopenharmony_ci 27878c2ecf20Sopenharmony_ci return throttle; 27888c2ecf20Sopenharmony_ci} 27898c2ecf20Sopenharmony_ci 27908c2ecf20Sopenharmony_cibool drbd_rs_c_min_rate_throttle(struct drbd_device *device) 27918c2ecf20Sopenharmony_ci{ 27928c2ecf20Sopenharmony_ci struct gendisk *disk = device->ldev->backing_bdev->bd_disk; 27938c2ecf20Sopenharmony_ci unsigned long db, dt, dbdt; 27948c2ecf20Sopenharmony_ci unsigned int c_min_rate; 27958c2ecf20Sopenharmony_ci int curr_events; 27968c2ecf20Sopenharmony_ci 27978c2ecf20Sopenharmony_ci rcu_read_lock(); 27988c2ecf20Sopenharmony_ci c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; 27998c2ecf20Sopenharmony_ci rcu_read_unlock(); 28008c2ecf20Sopenharmony_ci 28018c2ecf20Sopenharmony_ci /* feature disabled? */ 28028c2ecf20Sopenharmony_ci if (c_min_rate == 0) 28038c2ecf20Sopenharmony_ci return false; 28048c2ecf20Sopenharmony_ci 28058c2ecf20Sopenharmony_ci curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - 28068c2ecf20Sopenharmony_ci atomic_read(&device->rs_sect_ev); 28078c2ecf20Sopenharmony_ci 28088c2ecf20Sopenharmony_ci if (atomic_read(&device->ap_actlog_cnt) 28098c2ecf20Sopenharmony_ci || curr_events - device->rs_last_events > 64) { 28108c2ecf20Sopenharmony_ci unsigned long rs_left; 28118c2ecf20Sopenharmony_ci int i; 28128c2ecf20Sopenharmony_ci 28138c2ecf20Sopenharmony_ci device->rs_last_events = curr_events; 28148c2ecf20Sopenharmony_ci 28158c2ecf20Sopenharmony_ci /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, 28168c2ecf20Sopenharmony_ci * approx. */ 28178c2ecf20Sopenharmony_ci i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; 28188c2ecf20Sopenharmony_ci 28198c2ecf20Sopenharmony_ci if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 28208c2ecf20Sopenharmony_ci rs_left = device->ov_left; 28218c2ecf20Sopenharmony_ci else 28228c2ecf20Sopenharmony_ci rs_left = drbd_bm_total_weight(device) - device->rs_failed; 28238c2ecf20Sopenharmony_ci 28248c2ecf20Sopenharmony_ci dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ; 28258c2ecf20Sopenharmony_ci if (!dt) 28268c2ecf20Sopenharmony_ci dt++; 28278c2ecf20Sopenharmony_ci db = device->rs_mark_left[i] - rs_left; 28288c2ecf20Sopenharmony_ci dbdt = Bit2KB(db/dt); 28298c2ecf20Sopenharmony_ci 28308c2ecf20Sopenharmony_ci if (dbdt > c_min_rate) 28318c2ecf20Sopenharmony_ci return true; 28328c2ecf20Sopenharmony_ci } 28338c2ecf20Sopenharmony_ci return false; 28348c2ecf20Sopenharmony_ci} 28358c2ecf20Sopenharmony_ci 28368c2ecf20Sopenharmony_cistatic int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) 28378c2ecf20Sopenharmony_ci{ 28388c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 28398c2ecf20Sopenharmony_ci struct drbd_device *device; 28408c2ecf20Sopenharmony_ci sector_t sector; 28418c2ecf20Sopenharmony_ci sector_t capacity; 28428c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req; 28438c2ecf20Sopenharmony_ci struct digest_info *di = NULL; 28448c2ecf20Sopenharmony_ci int size, verb; 28458c2ecf20Sopenharmony_ci unsigned int fault_type; 28468c2ecf20Sopenharmony_ci struct p_block_req *p = pi->data; 28478c2ecf20Sopenharmony_ci 28488c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 28498c2ecf20Sopenharmony_ci if (!peer_device) 28508c2ecf20Sopenharmony_ci return -EIO; 28518c2ecf20Sopenharmony_ci device = peer_device->device; 28528c2ecf20Sopenharmony_ci capacity = get_capacity(device->vdisk); 28538c2ecf20Sopenharmony_ci 28548c2ecf20Sopenharmony_ci sector = be64_to_cpu(p->sector); 28558c2ecf20Sopenharmony_ci size = be32_to_cpu(p->blksize); 28568c2ecf20Sopenharmony_ci 28578c2ecf20Sopenharmony_ci if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { 28588c2ecf20Sopenharmony_ci drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 28598c2ecf20Sopenharmony_ci (unsigned long long)sector, size); 28608c2ecf20Sopenharmony_ci return -EINVAL; 28618c2ecf20Sopenharmony_ci } 28628c2ecf20Sopenharmony_ci if (sector + (size>>9) > capacity) { 28638c2ecf20Sopenharmony_ci drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 28648c2ecf20Sopenharmony_ci (unsigned long long)sector, size); 28658c2ecf20Sopenharmony_ci return -EINVAL; 28668c2ecf20Sopenharmony_ci } 28678c2ecf20Sopenharmony_ci 28688c2ecf20Sopenharmony_ci if (!get_ldev_if_state(device, D_UP_TO_DATE)) { 28698c2ecf20Sopenharmony_ci verb = 1; 28708c2ecf20Sopenharmony_ci switch (pi->cmd) { 28718c2ecf20Sopenharmony_ci case P_DATA_REQUEST: 28728c2ecf20Sopenharmony_ci drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p); 28738c2ecf20Sopenharmony_ci break; 28748c2ecf20Sopenharmony_ci case P_RS_THIN_REQ: 28758c2ecf20Sopenharmony_ci case P_RS_DATA_REQUEST: 28768c2ecf20Sopenharmony_ci case P_CSUM_RS_REQUEST: 28778c2ecf20Sopenharmony_ci case P_OV_REQUEST: 28788c2ecf20Sopenharmony_ci drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p); 28798c2ecf20Sopenharmony_ci break; 28808c2ecf20Sopenharmony_ci case P_OV_REPLY: 28818c2ecf20Sopenharmony_ci verb = 0; 28828c2ecf20Sopenharmony_ci dec_rs_pending(device); 28838c2ecf20Sopenharmony_ci drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); 28848c2ecf20Sopenharmony_ci break; 28858c2ecf20Sopenharmony_ci default: 28868c2ecf20Sopenharmony_ci BUG(); 28878c2ecf20Sopenharmony_ci } 28888c2ecf20Sopenharmony_ci if (verb && __ratelimit(&drbd_ratelimit_state)) 28898c2ecf20Sopenharmony_ci drbd_err(device, "Can not satisfy peer's read request, " 28908c2ecf20Sopenharmony_ci "no local data.\n"); 28918c2ecf20Sopenharmony_ci 28928c2ecf20Sopenharmony_ci /* drain possibly payload */ 28938c2ecf20Sopenharmony_ci return drbd_drain_block(peer_device, pi->size); 28948c2ecf20Sopenharmony_ci } 28958c2ecf20Sopenharmony_ci 28968c2ecf20Sopenharmony_ci /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 28978c2ecf20Sopenharmony_ci * "criss-cross" setup, that might cause write-out on some other DRBD, 28988c2ecf20Sopenharmony_ci * which in turn might block on the other node at this very place. */ 28998c2ecf20Sopenharmony_ci peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, 29008c2ecf20Sopenharmony_ci size, GFP_NOIO); 29018c2ecf20Sopenharmony_ci if (!peer_req) { 29028c2ecf20Sopenharmony_ci put_ldev(device); 29038c2ecf20Sopenharmony_ci return -ENOMEM; 29048c2ecf20Sopenharmony_ci } 29058c2ecf20Sopenharmony_ci 29068c2ecf20Sopenharmony_ci switch (pi->cmd) { 29078c2ecf20Sopenharmony_ci case P_DATA_REQUEST: 29088c2ecf20Sopenharmony_ci peer_req->w.cb = w_e_end_data_req; 29098c2ecf20Sopenharmony_ci fault_type = DRBD_FAULT_DT_RD; 29108c2ecf20Sopenharmony_ci /* application IO, don't drbd_rs_begin_io */ 29118c2ecf20Sopenharmony_ci peer_req->flags |= EE_APPLICATION; 29128c2ecf20Sopenharmony_ci goto submit; 29138c2ecf20Sopenharmony_ci 29148c2ecf20Sopenharmony_ci case P_RS_THIN_REQ: 29158c2ecf20Sopenharmony_ci /* If at some point in the future we have a smart way to 29168c2ecf20Sopenharmony_ci find out if this data block is completely deallocated, 29178c2ecf20Sopenharmony_ci then we would do something smarter here than reading 29188c2ecf20Sopenharmony_ci the block... */ 29198c2ecf20Sopenharmony_ci peer_req->flags |= EE_RS_THIN_REQ; 29208c2ecf20Sopenharmony_ci fallthrough; 29218c2ecf20Sopenharmony_ci case P_RS_DATA_REQUEST: 29228c2ecf20Sopenharmony_ci peer_req->w.cb = w_e_end_rsdata_req; 29238c2ecf20Sopenharmony_ci fault_type = DRBD_FAULT_RS_RD; 29248c2ecf20Sopenharmony_ci /* used in the sector offset progress display */ 29258c2ecf20Sopenharmony_ci device->bm_resync_fo = BM_SECT_TO_BIT(sector); 29268c2ecf20Sopenharmony_ci break; 29278c2ecf20Sopenharmony_ci 29288c2ecf20Sopenharmony_ci case P_OV_REPLY: 29298c2ecf20Sopenharmony_ci case P_CSUM_RS_REQUEST: 29308c2ecf20Sopenharmony_ci fault_type = DRBD_FAULT_RS_RD; 29318c2ecf20Sopenharmony_ci di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); 29328c2ecf20Sopenharmony_ci if (!di) 29338c2ecf20Sopenharmony_ci goto out_free_e; 29348c2ecf20Sopenharmony_ci 29358c2ecf20Sopenharmony_ci di->digest_size = pi->size; 29368c2ecf20Sopenharmony_ci di->digest = (((char *)di)+sizeof(struct digest_info)); 29378c2ecf20Sopenharmony_ci 29388c2ecf20Sopenharmony_ci peer_req->digest = di; 29398c2ecf20Sopenharmony_ci peer_req->flags |= EE_HAS_DIGEST; 29408c2ecf20Sopenharmony_ci 29418c2ecf20Sopenharmony_ci if (drbd_recv_all(peer_device->connection, di->digest, pi->size)) 29428c2ecf20Sopenharmony_ci goto out_free_e; 29438c2ecf20Sopenharmony_ci 29448c2ecf20Sopenharmony_ci if (pi->cmd == P_CSUM_RS_REQUEST) { 29458c2ecf20Sopenharmony_ci D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 29468c2ecf20Sopenharmony_ci peer_req->w.cb = w_e_end_csum_rs_req; 29478c2ecf20Sopenharmony_ci /* used in the sector offset progress display */ 29488c2ecf20Sopenharmony_ci device->bm_resync_fo = BM_SECT_TO_BIT(sector); 29498c2ecf20Sopenharmony_ci /* remember to report stats in drbd_resync_finished */ 29508c2ecf20Sopenharmony_ci device->use_csums = true; 29518c2ecf20Sopenharmony_ci } else if (pi->cmd == P_OV_REPLY) { 29528c2ecf20Sopenharmony_ci /* track progress, we may need to throttle */ 29538c2ecf20Sopenharmony_ci atomic_add(size >> 9, &device->rs_sect_in); 29548c2ecf20Sopenharmony_ci peer_req->w.cb = w_e_end_ov_reply; 29558c2ecf20Sopenharmony_ci dec_rs_pending(device); 29568c2ecf20Sopenharmony_ci /* drbd_rs_begin_io done when we sent this request, 29578c2ecf20Sopenharmony_ci * but accounting still needs to be done. */ 29588c2ecf20Sopenharmony_ci goto submit_for_resync; 29598c2ecf20Sopenharmony_ci } 29608c2ecf20Sopenharmony_ci break; 29618c2ecf20Sopenharmony_ci 29628c2ecf20Sopenharmony_ci case P_OV_REQUEST: 29638c2ecf20Sopenharmony_ci if (device->ov_start_sector == ~(sector_t)0 && 29648c2ecf20Sopenharmony_ci peer_device->connection->agreed_pro_version >= 90) { 29658c2ecf20Sopenharmony_ci unsigned long now = jiffies; 29668c2ecf20Sopenharmony_ci int i; 29678c2ecf20Sopenharmony_ci device->ov_start_sector = sector; 29688c2ecf20Sopenharmony_ci device->ov_position = sector; 29698c2ecf20Sopenharmony_ci device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector); 29708c2ecf20Sopenharmony_ci device->rs_total = device->ov_left; 29718c2ecf20Sopenharmony_ci for (i = 0; i < DRBD_SYNC_MARKS; i++) { 29728c2ecf20Sopenharmony_ci device->rs_mark_left[i] = device->ov_left; 29738c2ecf20Sopenharmony_ci device->rs_mark_time[i] = now; 29748c2ecf20Sopenharmony_ci } 29758c2ecf20Sopenharmony_ci drbd_info(device, "Online Verify start sector: %llu\n", 29768c2ecf20Sopenharmony_ci (unsigned long long)sector); 29778c2ecf20Sopenharmony_ci } 29788c2ecf20Sopenharmony_ci peer_req->w.cb = w_e_end_ov_req; 29798c2ecf20Sopenharmony_ci fault_type = DRBD_FAULT_RS_RD; 29808c2ecf20Sopenharmony_ci break; 29818c2ecf20Sopenharmony_ci 29828c2ecf20Sopenharmony_ci default: 29838c2ecf20Sopenharmony_ci BUG(); 29848c2ecf20Sopenharmony_ci } 29858c2ecf20Sopenharmony_ci 29868c2ecf20Sopenharmony_ci /* Throttle, drbd_rs_begin_io and submit should become asynchronous 29878c2ecf20Sopenharmony_ci * wrt the receiver, but it is not as straightforward as it may seem. 29888c2ecf20Sopenharmony_ci * Various places in the resync start and stop logic assume resync 29898c2ecf20Sopenharmony_ci * requests are processed in order, requeuing this on the worker thread 29908c2ecf20Sopenharmony_ci * introduces a bunch of new code for synchronization between threads. 29918c2ecf20Sopenharmony_ci * 29928c2ecf20Sopenharmony_ci * Unlimited throttling before drbd_rs_begin_io may stall the resync 29938c2ecf20Sopenharmony_ci * "forever", throttling after drbd_rs_begin_io will lock that extent 29948c2ecf20Sopenharmony_ci * for application writes for the same time. For now, just throttle 29958c2ecf20Sopenharmony_ci * here, where the rest of the code expects the receiver to sleep for 29968c2ecf20Sopenharmony_ci * a while, anyways. 29978c2ecf20Sopenharmony_ci */ 29988c2ecf20Sopenharmony_ci 29998c2ecf20Sopenharmony_ci /* Throttle before drbd_rs_begin_io, as that locks out application IO; 30008c2ecf20Sopenharmony_ci * this defers syncer requests for some time, before letting at least 30018c2ecf20Sopenharmony_ci * on request through. The resync controller on the receiving side 30028c2ecf20Sopenharmony_ci * will adapt to the incoming rate accordingly. 30038c2ecf20Sopenharmony_ci * 30048c2ecf20Sopenharmony_ci * We cannot throttle here if remote is Primary/SyncTarget: 30058c2ecf20Sopenharmony_ci * we would also throttle its application reads. 30068c2ecf20Sopenharmony_ci * In that case, throttling is done on the SyncTarget only. 30078c2ecf20Sopenharmony_ci */ 30088c2ecf20Sopenharmony_ci 30098c2ecf20Sopenharmony_ci /* Even though this may be a resync request, we do add to "read_ee"; 30108c2ecf20Sopenharmony_ci * "sync_ee" is only used for resync WRITEs. 30118c2ecf20Sopenharmony_ci * Add to list early, so debugfs can find this request 30128c2ecf20Sopenharmony_ci * even if we have to sleep below. */ 30138c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 30148c2ecf20Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->read_ee); 30158c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 30168c2ecf20Sopenharmony_ci 30178c2ecf20Sopenharmony_ci update_receiver_timing_details(connection, drbd_rs_should_slow_down); 30188c2ecf20Sopenharmony_ci if (device->state.peer != R_PRIMARY 30198c2ecf20Sopenharmony_ci && drbd_rs_should_slow_down(device, sector, false)) 30208c2ecf20Sopenharmony_ci schedule_timeout_uninterruptible(HZ/10); 30218c2ecf20Sopenharmony_ci update_receiver_timing_details(connection, drbd_rs_begin_io); 30228c2ecf20Sopenharmony_ci if (drbd_rs_begin_io(device, sector)) 30238c2ecf20Sopenharmony_ci goto out_free_e; 30248c2ecf20Sopenharmony_ci 30258c2ecf20Sopenharmony_cisubmit_for_resync: 30268c2ecf20Sopenharmony_ci atomic_add(size >> 9, &device->rs_sect_ev); 30278c2ecf20Sopenharmony_ci 30288c2ecf20Sopenharmony_cisubmit: 30298c2ecf20Sopenharmony_ci update_receiver_timing_details(connection, drbd_submit_peer_request); 30308c2ecf20Sopenharmony_ci inc_unacked(device); 30318c2ecf20Sopenharmony_ci if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0, 30328c2ecf20Sopenharmony_ci fault_type) == 0) 30338c2ecf20Sopenharmony_ci return 0; 30348c2ecf20Sopenharmony_ci 30358c2ecf20Sopenharmony_ci /* don't care for the reason here */ 30368c2ecf20Sopenharmony_ci drbd_err(device, "submit failed, triggering re-connect\n"); 30378c2ecf20Sopenharmony_ci 30388c2ecf20Sopenharmony_ciout_free_e: 30398c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 30408c2ecf20Sopenharmony_ci list_del(&peer_req->w.list); 30418c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 30428c2ecf20Sopenharmony_ci /* no drbd_rs_complete_io(), we are dropping the connection anyways */ 30438c2ecf20Sopenharmony_ci 30448c2ecf20Sopenharmony_ci put_ldev(device); 30458c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 30468c2ecf20Sopenharmony_ci return -EIO; 30478c2ecf20Sopenharmony_ci} 30488c2ecf20Sopenharmony_ci 30498c2ecf20Sopenharmony_ci/** 30508c2ecf20Sopenharmony_ci * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries 30518c2ecf20Sopenharmony_ci */ 30528c2ecf20Sopenharmony_cistatic int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local) 30538c2ecf20Sopenharmony_ci{ 30548c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 30558c2ecf20Sopenharmony_ci int self, peer, rv = -100; 30568c2ecf20Sopenharmony_ci unsigned long ch_self, ch_peer; 30578c2ecf20Sopenharmony_ci enum drbd_after_sb_p after_sb_0p; 30588c2ecf20Sopenharmony_ci 30598c2ecf20Sopenharmony_ci self = device->ldev->md.uuid[UI_BITMAP] & 1; 30608c2ecf20Sopenharmony_ci peer = device->p_uuid[UI_BITMAP] & 1; 30618c2ecf20Sopenharmony_ci 30628c2ecf20Sopenharmony_ci ch_peer = device->p_uuid[UI_SIZE]; 30638c2ecf20Sopenharmony_ci ch_self = device->comm_bm_set; 30648c2ecf20Sopenharmony_ci 30658c2ecf20Sopenharmony_ci rcu_read_lock(); 30668c2ecf20Sopenharmony_ci after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p; 30678c2ecf20Sopenharmony_ci rcu_read_unlock(); 30688c2ecf20Sopenharmony_ci switch (after_sb_0p) { 30698c2ecf20Sopenharmony_ci case ASB_CONSENSUS: 30708c2ecf20Sopenharmony_ci case ASB_DISCARD_SECONDARY: 30718c2ecf20Sopenharmony_ci case ASB_CALL_HELPER: 30728c2ecf20Sopenharmony_ci case ASB_VIOLENTLY: 30738c2ecf20Sopenharmony_ci drbd_err(device, "Configuration error.\n"); 30748c2ecf20Sopenharmony_ci break; 30758c2ecf20Sopenharmony_ci case ASB_DISCONNECT: 30768c2ecf20Sopenharmony_ci break; 30778c2ecf20Sopenharmony_ci case ASB_DISCARD_YOUNGER_PRI: 30788c2ecf20Sopenharmony_ci if (self == 0 && peer == 1) { 30798c2ecf20Sopenharmony_ci rv = -1; 30808c2ecf20Sopenharmony_ci break; 30818c2ecf20Sopenharmony_ci } 30828c2ecf20Sopenharmony_ci if (self == 1 && peer == 0) { 30838c2ecf20Sopenharmony_ci rv = 1; 30848c2ecf20Sopenharmony_ci break; 30858c2ecf20Sopenharmony_ci } 30868c2ecf20Sopenharmony_ci fallthrough; /* to one of the other strategies */ 30878c2ecf20Sopenharmony_ci case ASB_DISCARD_OLDER_PRI: 30888c2ecf20Sopenharmony_ci if (self == 0 && peer == 1) { 30898c2ecf20Sopenharmony_ci rv = 1; 30908c2ecf20Sopenharmony_ci break; 30918c2ecf20Sopenharmony_ci } 30928c2ecf20Sopenharmony_ci if (self == 1 && peer == 0) { 30938c2ecf20Sopenharmony_ci rv = -1; 30948c2ecf20Sopenharmony_ci break; 30958c2ecf20Sopenharmony_ci } 30968c2ecf20Sopenharmony_ci /* Else fall through to one of the other strategies... */ 30978c2ecf20Sopenharmony_ci drbd_warn(device, "Discard younger/older primary did not find a decision\n" 30988c2ecf20Sopenharmony_ci "Using discard-least-changes instead\n"); 30998c2ecf20Sopenharmony_ci fallthrough; 31008c2ecf20Sopenharmony_ci case ASB_DISCARD_ZERO_CHG: 31018c2ecf20Sopenharmony_ci if (ch_peer == 0 && ch_self == 0) { 31028c2ecf20Sopenharmony_ci rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 31038c2ecf20Sopenharmony_ci ? -1 : 1; 31048c2ecf20Sopenharmony_ci break; 31058c2ecf20Sopenharmony_ci } else { 31068c2ecf20Sopenharmony_ci if (ch_peer == 0) { rv = 1; break; } 31078c2ecf20Sopenharmony_ci if (ch_self == 0) { rv = -1; break; } 31088c2ecf20Sopenharmony_ci } 31098c2ecf20Sopenharmony_ci if (after_sb_0p == ASB_DISCARD_ZERO_CHG) 31108c2ecf20Sopenharmony_ci break; 31118c2ecf20Sopenharmony_ci fallthrough; 31128c2ecf20Sopenharmony_ci case ASB_DISCARD_LEAST_CHG: 31138c2ecf20Sopenharmony_ci if (ch_self < ch_peer) 31148c2ecf20Sopenharmony_ci rv = -1; 31158c2ecf20Sopenharmony_ci else if (ch_self > ch_peer) 31168c2ecf20Sopenharmony_ci rv = 1; 31178c2ecf20Sopenharmony_ci else /* ( ch_self == ch_peer ) */ 31188c2ecf20Sopenharmony_ci /* Well, then use something else. */ 31198c2ecf20Sopenharmony_ci rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 31208c2ecf20Sopenharmony_ci ? -1 : 1; 31218c2ecf20Sopenharmony_ci break; 31228c2ecf20Sopenharmony_ci case ASB_DISCARD_LOCAL: 31238c2ecf20Sopenharmony_ci rv = -1; 31248c2ecf20Sopenharmony_ci break; 31258c2ecf20Sopenharmony_ci case ASB_DISCARD_REMOTE: 31268c2ecf20Sopenharmony_ci rv = 1; 31278c2ecf20Sopenharmony_ci } 31288c2ecf20Sopenharmony_ci 31298c2ecf20Sopenharmony_ci return rv; 31308c2ecf20Sopenharmony_ci} 31318c2ecf20Sopenharmony_ci 31328c2ecf20Sopenharmony_ci/** 31338c2ecf20Sopenharmony_ci * drbd_asb_recover_1p - Recover after split-brain with one remaining primary 31348c2ecf20Sopenharmony_ci */ 31358c2ecf20Sopenharmony_cistatic int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local) 31368c2ecf20Sopenharmony_ci{ 31378c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 31388c2ecf20Sopenharmony_ci int hg, rv = -100; 31398c2ecf20Sopenharmony_ci enum drbd_after_sb_p after_sb_1p; 31408c2ecf20Sopenharmony_ci 31418c2ecf20Sopenharmony_ci rcu_read_lock(); 31428c2ecf20Sopenharmony_ci after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p; 31438c2ecf20Sopenharmony_ci rcu_read_unlock(); 31448c2ecf20Sopenharmony_ci switch (after_sb_1p) { 31458c2ecf20Sopenharmony_ci case ASB_DISCARD_YOUNGER_PRI: 31468c2ecf20Sopenharmony_ci case ASB_DISCARD_OLDER_PRI: 31478c2ecf20Sopenharmony_ci case ASB_DISCARD_LEAST_CHG: 31488c2ecf20Sopenharmony_ci case ASB_DISCARD_LOCAL: 31498c2ecf20Sopenharmony_ci case ASB_DISCARD_REMOTE: 31508c2ecf20Sopenharmony_ci case ASB_DISCARD_ZERO_CHG: 31518c2ecf20Sopenharmony_ci drbd_err(device, "Configuration error.\n"); 31528c2ecf20Sopenharmony_ci break; 31538c2ecf20Sopenharmony_ci case ASB_DISCONNECT: 31548c2ecf20Sopenharmony_ci break; 31558c2ecf20Sopenharmony_ci case ASB_CONSENSUS: 31568c2ecf20Sopenharmony_ci hg = drbd_asb_recover_0p(peer_device); 31578c2ecf20Sopenharmony_ci if (hg == -1 && device->state.role == R_SECONDARY) 31588c2ecf20Sopenharmony_ci rv = hg; 31598c2ecf20Sopenharmony_ci if (hg == 1 && device->state.role == R_PRIMARY) 31608c2ecf20Sopenharmony_ci rv = hg; 31618c2ecf20Sopenharmony_ci break; 31628c2ecf20Sopenharmony_ci case ASB_VIOLENTLY: 31638c2ecf20Sopenharmony_ci rv = drbd_asb_recover_0p(peer_device); 31648c2ecf20Sopenharmony_ci break; 31658c2ecf20Sopenharmony_ci case ASB_DISCARD_SECONDARY: 31668c2ecf20Sopenharmony_ci return device->state.role == R_PRIMARY ? 1 : -1; 31678c2ecf20Sopenharmony_ci case ASB_CALL_HELPER: 31688c2ecf20Sopenharmony_ci hg = drbd_asb_recover_0p(peer_device); 31698c2ecf20Sopenharmony_ci if (hg == -1 && device->state.role == R_PRIMARY) { 31708c2ecf20Sopenharmony_ci enum drbd_state_rv rv2; 31718c2ecf20Sopenharmony_ci 31728c2ecf20Sopenharmony_ci /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 31738c2ecf20Sopenharmony_ci * we might be here in C_WF_REPORT_PARAMS which is transient. 31748c2ecf20Sopenharmony_ci * we do not need to wait for the after state change work either. */ 31758c2ecf20Sopenharmony_ci rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 31768c2ecf20Sopenharmony_ci if (rv2 != SS_SUCCESS) { 31778c2ecf20Sopenharmony_ci drbd_khelper(device, "pri-lost-after-sb"); 31788c2ecf20Sopenharmony_ci } else { 31798c2ecf20Sopenharmony_ci drbd_warn(device, "Successfully gave up primary role.\n"); 31808c2ecf20Sopenharmony_ci rv = hg; 31818c2ecf20Sopenharmony_ci } 31828c2ecf20Sopenharmony_ci } else 31838c2ecf20Sopenharmony_ci rv = hg; 31848c2ecf20Sopenharmony_ci } 31858c2ecf20Sopenharmony_ci 31868c2ecf20Sopenharmony_ci return rv; 31878c2ecf20Sopenharmony_ci} 31888c2ecf20Sopenharmony_ci 31898c2ecf20Sopenharmony_ci/** 31908c2ecf20Sopenharmony_ci * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries 31918c2ecf20Sopenharmony_ci */ 31928c2ecf20Sopenharmony_cistatic int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local) 31938c2ecf20Sopenharmony_ci{ 31948c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 31958c2ecf20Sopenharmony_ci int hg, rv = -100; 31968c2ecf20Sopenharmony_ci enum drbd_after_sb_p after_sb_2p; 31978c2ecf20Sopenharmony_ci 31988c2ecf20Sopenharmony_ci rcu_read_lock(); 31998c2ecf20Sopenharmony_ci after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p; 32008c2ecf20Sopenharmony_ci rcu_read_unlock(); 32018c2ecf20Sopenharmony_ci switch (after_sb_2p) { 32028c2ecf20Sopenharmony_ci case ASB_DISCARD_YOUNGER_PRI: 32038c2ecf20Sopenharmony_ci case ASB_DISCARD_OLDER_PRI: 32048c2ecf20Sopenharmony_ci case ASB_DISCARD_LEAST_CHG: 32058c2ecf20Sopenharmony_ci case ASB_DISCARD_LOCAL: 32068c2ecf20Sopenharmony_ci case ASB_DISCARD_REMOTE: 32078c2ecf20Sopenharmony_ci case ASB_CONSENSUS: 32088c2ecf20Sopenharmony_ci case ASB_DISCARD_SECONDARY: 32098c2ecf20Sopenharmony_ci case ASB_DISCARD_ZERO_CHG: 32108c2ecf20Sopenharmony_ci drbd_err(device, "Configuration error.\n"); 32118c2ecf20Sopenharmony_ci break; 32128c2ecf20Sopenharmony_ci case ASB_VIOLENTLY: 32138c2ecf20Sopenharmony_ci rv = drbd_asb_recover_0p(peer_device); 32148c2ecf20Sopenharmony_ci break; 32158c2ecf20Sopenharmony_ci case ASB_DISCONNECT: 32168c2ecf20Sopenharmony_ci break; 32178c2ecf20Sopenharmony_ci case ASB_CALL_HELPER: 32188c2ecf20Sopenharmony_ci hg = drbd_asb_recover_0p(peer_device); 32198c2ecf20Sopenharmony_ci if (hg == -1) { 32208c2ecf20Sopenharmony_ci enum drbd_state_rv rv2; 32218c2ecf20Sopenharmony_ci 32228c2ecf20Sopenharmony_ci /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 32238c2ecf20Sopenharmony_ci * we might be here in C_WF_REPORT_PARAMS which is transient. 32248c2ecf20Sopenharmony_ci * we do not need to wait for the after state change work either. */ 32258c2ecf20Sopenharmony_ci rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 32268c2ecf20Sopenharmony_ci if (rv2 != SS_SUCCESS) { 32278c2ecf20Sopenharmony_ci drbd_khelper(device, "pri-lost-after-sb"); 32288c2ecf20Sopenharmony_ci } else { 32298c2ecf20Sopenharmony_ci drbd_warn(device, "Successfully gave up primary role.\n"); 32308c2ecf20Sopenharmony_ci rv = hg; 32318c2ecf20Sopenharmony_ci } 32328c2ecf20Sopenharmony_ci } else 32338c2ecf20Sopenharmony_ci rv = hg; 32348c2ecf20Sopenharmony_ci } 32358c2ecf20Sopenharmony_ci 32368c2ecf20Sopenharmony_ci return rv; 32378c2ecf20Sopenharmony_ci} 32388c2ecf20Sopenharmony_ci 32398c2ecf20Sopenharmony_cistatic void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, 32408c2ecf20Sopenharmony_ci u64 bits, u64 flags) 32418c2ecf20Sopenharmony_ci{ 32428c2ecf20Sopenharmony_ci if (!uuid) { 32438c2ecf20Sopenharmony_ci drbd_info(device, "%s uuid info vanished while I was looking!\n", text); 32448c2ecf20Sopenharmony_ci return; 32458c2ecf20Sopenharmony_ci } 32468c2ecf20Sopenharmony_ci drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", 32478c2ecf20Sopenharmony_ci text, 32488c2ecf20Sopenharmony_ci (unsigned long long)uuid[UI_CURRENT], 32498c2ecf20Sopenharmony_ci (unsigned long long)uuid[UI_BITMAP], 32508c2ecf20Sopenharmony_ci (unsigned long long)uuid[UI_HISTORY_START], 32518c2ecf20Sopenharmony_ci (unsigned long long)uuid[UI_HISTORY_END], 32528c2ecf20Sopenharmony_ci (unsigned long long)bits, 32538c2ecf20Sopenharmony_ci (unsigned long long)flags); 32548c2ecf20Sopenharmony_ci} 32558c2ecf20Sopenharmony_ci 32568c2ecf20Sopenharmony_ci/* 32578c2ecf20Sopenharmony_ci 100 after split brain try auto recover 32588c2ecf20Sopenharmony_ci 2 C_SYNC_SOURCE set BitMap 32598c2ecf20Sopenharmony_ci 1 C_SYNC_SOURCE use BitMap 32608c2ecf20Sopenharmony_ci 0 no Sync 32618c2ecf20Sopenharmony_ci -1 C_SYNC_TARGET use BitMap 32628c2ecf20Sopenharmony_ci -2 C_SYNC_TARGET set BitMap 32638c2ecf20Sopenharmony_ci -100 after split brain, disconnect 32648c2ecf20Sopenharmony_ci-1000 unrelated data 32658c2ecf20Sopenharmony_ci-1091 requires proto 91 32668c2ecf20Sopenharmony_ci-1096 requires proto 96 32678c2ecf20Sopenharmony_ci */ 32688c2ecf20Sopenharmony_ci 32698c2ecf20Sopenharmony_cistatic int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local) 32708c2ecf20Sopenharmony_ci{ 32718c2ecf20Sopenharmony_ci struct drbd_peer_device *const peer_device = first_peer_device(device); 32728c2ecf20Sopenharmony_ci struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 32738c2ecf20Sopenharmony_ci u64 self, peer; 32748c2ecf20Sopenharmony_ci int i, j; 32758c2ecf20Sopenharmony_ci 32768c2ecf20Sopenharmony_ci self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 32778c2ecf20Sopenharmony_ci peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 32788c2ecf20Sopenharmony_ci 32798c2ecf20Sopenharmony_ci *rule_nr = 10; 32808c2ecf20Sopenharmony_ci if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) 32818c2ecf20Sopenharmony_ci return 0; 32828c2ecf20Sopenharmony_ci 32838c2ecf20Sopenharmony_ci *rule_nr = 20; 32848c2ecf20Sopenharmony_ci if ((self == UUID_JUST_CREATED || self == (u64)0) && 32858c2ecf20Sopenharmony_ci peer != UUID_JUST_CREATED) 32868c2ecf20Sopenharmony_ci return -2; 32878c2ecf20Sopenharmony_ci 32888c2ecf20Sopenharmony_ci *rule_nr = 30; 32898c2ecf20Sopenharmony_ci if (self != UUID_JUST_CREATED && 32908c2ecf20Sopenharmony_ci (peer == UUID_JUST_CREATED || peer == (u64)0)) 32918c2ecf20Sopenharmony_ci return 2; 32928c2ecf20Sopenharmony_ci 32938c2ecf20Sopenharmony_ci if (self == peer) { 32948c2ecf20Sopenharmony_ci int rct, dc; /* roles at crash time */ 32958c2ecf20Sopenharmony_ci 32968c2ecf20Sopenharmony_ci if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { 32978c2ecf20Sopenharmony_ci 32988c2ecf20Sopenharmony_ci if (connection->agreed_pro_version < 91) 32998c2ecf20Sopenharmony_ci return -1091; 33008c2ecf20Sopenharmony_ci 33018c2ecf20Sopenharmony_ci if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && 33028c2ecf20Sopenharmony_ci (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { 33038c2ecf20Sopenharmony_ci drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n"); 33048c2ecf20Sopenharmony_ci drbd_uuid_move_history(device); 33058c2ecf20Sopenharmony_ci device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; 33068c2ecf20Sopenharmony_ci device->ldev->md.uuid[UI_BITMAP] = 0; 33078c2ecf20Sopenharmony_ci 33088c2ecf20Sopenharmony_ci drbd_uuid_dump(device, "self", device->ldev->md.uuid, 33098c2ecf20Sopenharmony_ci device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 33108c2ecf20Sopenharmony_ci *rule_nr = 34; 33118c2ecf20Sopenharmony_ci } else { 33128c2ecf20Sopenharmony_ci drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n"); 33138c2ecf20Sopenharmony_ci *rule_nr = 36; 33148c2ecf20Sopenharmony_ci } 33158c2ecf20Sopenharmony_ci 33168c2ecf20Sopenharmony_ci return 1; 33178c2ecf20Sopenharmony_ci } 33188c2ecf20Sopenharmony_ci 33198c2ecf20Sopenharmony_ci if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { 33208c2ecf20Sopenharmony_ci 33218c2ecf20Sopenharmony_ci if (connection->agreed_pro_version < 91) 33228c2ecf20Sopenharmony_ci return -1091; 33238c2ecf20Sopenharmony_ci 33248c2ecf20Sopenharmony_ci if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && 33258c2ecf20Sopenharmony_ci (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) { 33268c2ecf20Sopenharmony_ci drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n"); 33278c2ecf20Sopenharmony_ci 33288c2ecf20Sopenharmony_ci device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START]; 33298c2ecf20Sopenharmony_ci device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP]; 33308c2ecf20Sopenharmony_ci device->p_uuid[UI_BITMAP] = 0UL; 33318c2ecf20Sopenharmony_ci 33328c2ecf20Sopenharmony_ci drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 33338c2ecf20Sopenharmony_ci *rule_nr = 35; 33348c2ecf20Sopenharmony_ci } else { 33358c2ecf20Sopenharmony_ci drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n"); 33368c2ecf20Sopenharmony_ci *rule_nr = 37; 33378c2ecf20Sopenharmony_ci } 33388c2ecf20Sopenharmony_ci 33398c2ecf20Sopenharmony_ci return -1; 33408c2ecf20Sopenharmony_ci } 33418c2ecf20Sopenharmony_ci 33428c2ecf20Sopenharmony_ci /* Common power [off|failure] */ 33438c2ecf20Sopenharmony_ci rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) + 33448c2ecf20Sopenharmony_ci (device->p_uuid[UI_FLAGS] & 2); 33458c2ecf20Sopenharmony_ci /* lowest bit is set when we were primary, 33468c2ecf20Sopenharmony_ci * next bit (weight 2) is set when peer was primary */ 33478c2ecf20Sopenharmony_ci *rule_nr = 40; 33488c2ecf20Sopenharmony_ci 33498c2ecf20Sopenharmony_ci /* Neither has the "crashed primary" flag set, 33508c2ecf20Sopenharmony_ci * only a replication link hickup. */ 33518c2ecf20Sopenharmony_ci if (rct == 0) 33528c2ecf20Sopenharmony_ci return 0; 33538c2ecf20Sopenharmony_ci 33548c2ecf20Sopenharmony_ci /* Current UUID equal and no bitmap uuid; does not necessarily 33558c2ecf20Sopenharmony_ci * mean this was a "simultaneous hard crash", maybe IO was 33568c2ecf20Sopenharmony_ci * frozen, so no UUID-bump happened. 33578c2ecf20Sopenharmony_ci * This is a protocol change, overload DRBD_FF_WSAME as flag 33588c2ecf20Sopenharmony_ci * for "new-enough" peer DRBD version. */ 33598c2ecf20Sopenharmony_ci if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) { 33608c2ecf20Sopenharmony_ci *rule_nr = 41; 33618c2ecf20Sopenharmony_ci if (!(connection->agreed_features & DRBD_FF_WSAME)) { 33628c2ecf20Sopenharmony_ci drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n"); 33638c2ecf20Sopenharmony_ci return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8)); 33648c2ecf20Sopenharmony_ci } 33658c2ecf20Sopenharmony_ci if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) { 33668c2ecf20Sopenharmony_ci /* At least one has the "crashed primary" bit set, 33678c2ecf20Sopenharmony_ci * both are primary now, but neither has rotated its UUIDs? 33688c2ecf20Sopenharmony_ci * "Can not happen." */ 33698c2ecf20Sopenharmony_ci drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n"); 33708c2ecf20Sopenharmony_ci return -100; 33718c2ecf20Sopenharmony_ci } 33728c2ecf20Sopenharmony_ci if (device->state.role == R_PRIMARY) 33738c2ecf20Sopenharmony_ci return 1; 33748c2ecf20Sopenharmony_ci return -1; 33758c2ecf20Sopenharmony_ci } 33768c2ecf20Sopenharmony_ci 33778c2ecf20Sopenharmony_ci /* Both are secondary. 33788c2ecf20Sopenharmony_ci * Really looks like recovery from simultaneous hard crash. 33798c2ecf20Sopenharmony_ci * Check which had been primary before, and arbitrate. */ 33808c2ecf20Sopenharmony_ci switch (rct) { 33818c2ecf20Sopenharmony_ci case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */ 33828c2ecf20Sopenharmony_ci case 1: /* self_pri && !peer_pri */ return 1; 33838c2ecf20Sopenharmony_ci case 2: /* !self_pri && peer_pri */ return -1; 33848c2ecf20Sopenharmony_ci case 3: /* self_pri && peer_pri */ 33858c2ecf20Sopenharmony_ci dc = test_bit(RESOLVE_CONFLICTS, &connection->flags); 33868c2ecf20Sopenharmony_ci return dc ? -1 : 1; 33878c2ecf20Sopenharmony_ci } 33888c2ecf20Sopenharmony_ci } 33898c2ecf20Sopenharmony_ci 33908c2ecf20Sopenharmony_ci *rule_nr = 50; 33918c2ecf20Sopenharmony_ci peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 33928c2ecf20Sopenharmony_ci if (self == peer) 33938c2ecf20Sopenharmony_ci return -1; 33948c2ecf20Sopenharmony_ci 33958c2ecf20Sopenharmony_ci *rule_nr = 51; 33968c2ecf20Sopenharmony_ci peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); 33978c2ecf20Sopenharmony_ci if (self == peer) { 33988c2ecf20Sopenharmony_ci if (connection->agreed_pro_version < 96 ? 33998c2ecf20Sopenharmony_ci (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == 34008c2ecf20Sopenharmony_ci (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : 34018c2ecf20Sopenharmony_ci peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { 34028c2ecf20Sopenharmony_ci /* The last P_SYNC_UUID did not get though. Undo the last start of 34038c2ecf20Sopenharmony_ci resync as sync source modifications of the peer's UUIDs. */ 34048c2ecf20Sopenharmony_ci 34058c2ecf20Sopenharmony_ci if (connection->agreed_pro_version < 91) 34068c2ecf20Sopenharmony_ci return -1091; 34078c2ecf20Sopenharmony_ci 34088c2ecf20Sopenharmony_ci device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; 34098c2ecf20Sopenharmony_ci device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1]; 34108c2ecf20Sopenharmony_ci 34118c2ecf20Sopenharmony_ci drbd_info(device, "Lost last syncUUID packet, corrected:\n"); 34128c2ecf20Sopenharmony_ci drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 34138c2ecf20Sopenharmony_ci 34148c2ecf20Sopenharmony_ci return -1; 34158c2ecf20Sopenharmony_ci } 34168c2ecf20Sopenharmony_ci } 34178c2ecf20Sopenharmony_ci 34188c2ecf20Sopenharmony_ci *rule_nr = 60; 34198c2ecf20Sopenharmony_ci self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 34208c2ecf20Sopenharmony_ci for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 34218c2ecf20Sopenharmony_ci peer = device->p_uuid[i] & ~((u64)1); 34228c2ecf20Sopenharmony_ci if (self == peer) 34238c2ecf20Sopenharmony_ci return -2; 34248c2ecf20Sopenharmony_ci } 34258c2ecf20Sopenharmony_ci 34268c2ecf20Sopenharmony_ci *rule_nr = 70; 34278c2ecf20Sopenharmony_ci self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 34288c2ecf20Sopenharmony_ci peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 34298c2ecf20Sopenharmony_ci if (self == peer) 34308c2ecf20Sopenharmony_ci return 1; 34318c2ecf20Sopenharmony_ci 34328c2ecf20Sopenharmony_ci *rule_nr = 71; 34338c2ecf20Sopenharmony_ci self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 34348c2ecf20Sopenharmony_ci if (self == peer) { 34358c2ecf20Sopenharmony_ci if (connection->agreed_pro_version < 96 ? 34368c2ecf20Sopenharmony_ci (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == 34378c2ecf20Sopenharmony_ci (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : 34388c2ecf20Sopenharmony_ci self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { 34398c2ecf20Sopenharmony_ci /* The last P_SYNC_UUID did not get though. Undo the last start of 34408c2ecf20Sopenharmony_ci resync as sync source modifications of our UUIDs. */ 34418c2ecf20Sopenharmony_ci 34428c2ecf20Sopenharmony_ci if (connection->agreed_pro_version < 91) 34438c2ecf20Sopenharmony_ci return -1091; 34448c2ecf20Sopenharmony_ci 34458c2ecf20Sopenharmony_ci __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); 34468c2ecf20Sopenharmony_ci __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]); 34478c2ecf20Sopenharmony_ci 34488c2ecf20Sopenharmony_ci drbd_info(device, "Last syncUUID did not get through, corrected:\n"); 34498c2ecf20Sopenharmony_ci drbd_uuid_dump(device, "self", device->ldev->md.uuid, 34508c2ecf20Sopenharmony_ci device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 34518c2ecf20Sopenharmony_ci 34528c2ecf20Sopenharmony_ci return 1; 34538c2ecf20Sopenharmony_ci } 34548c2ecf20Sopenharmony_ci } 34558c2ecf20Sopenharmony_ci 34568c2ecf20Sopenharmony_ci 34578c2ecf20Sopenharmony_ci *rule_nr = 80; 34588c2ecf20Sopenharmony_ci peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 34598c2ecf20Sopenharmony_ci for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 34608c2ecf20Sopenharmony_ci self = device->ldev->md.uuid[i] & ~((u64)1); 34618c2ecf20Sopenharmony_ci if (self == peer) 34628c2ecf20Sopenharmony_ci return 2; 34638c2ecf20Sopenharmony_ci } 34648c2ecf20Sopenharmony_ci 34658c2ecf20Sopenharmony_ci *rule_nr = 90; 34668c2ecf20Sopenharmony_ci self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 34678c2ecf20Sopenharmony_ci peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 34688c2ecf20Sopenharmony_ci if (self == peer && self != ((u64)0)) 34698c2ecf20Sopenharmony_ci return 100; 34708c2ecf20Sopenharmony_ci 34718c2ecf20Sopenharmony_ci *rule_nr = 100; 34728c2ecf20Sopenharmony_ci for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 34738c2ecf20Sopenharmony_ci self = device->ldev->md.uuid[i] & ~((u64)1); 34748c2ecf20Sopenharmony_ci for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { 34758c2ecf20Sopenharmony_ci peer = device->p_uuid[j] & ~((u64)1); 34768c2ecf20Sopenharmony_ci if (self == peer) 34778c2ecf20Sopenharmony_ci return -100; 34788c2ecf20Sopenharmony_ci } 34798c2ecf20Sopenharmony_ci } 34808c2ecf20Sopenharmony_ci 34818c2ecf20Sopenharmony_ci return -1000; 34828c2ecf20Sopenharmony_ci} 34838c2ecf20Sopenharmony_ci 34848c2ecf20Sopenharmony_ci/* drbd_sync_handshake() returns the new conn state on success, or 34858c2ecf20Sopenharmony_ci CONN_MASK (-1) on failure. 34868c2ecf20Sopenharmony_ci */ 34878c2ecf20Sopenharmony_cistatic enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, 34888c2ecf20Sopenharmony_ci enum drbd_role peer_role, 34898c2ecf20Sopenharmony_ci enum drbd_disk_state peer_disk) __must_hold(local) 34908c2ecf20Sopenharmony_ci{ 34918c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 34928c2ecf20Sopenharmony_ci enum drbd_conns rv = C_MASK; 34938c2ecf20Sopenharmony_ci enum drbd_disk_state mydisk; 34948c2ecf20Sopenharmony_ci struct net_conf *nc; 34958c2ecf20Sopenharmony_ci int hg, rule_nr, rr_conflict, tentative, always_asbp; 34968c2ecf20Sopenharmony_ci 34978c2ecf20Sopenharmony_ci mydisk = device->state.disk; 34988c2ecf20Sopenharmony_ci if (mydisk == D_NEGOTIATING) 34998c2ecf20Sopenharmony_ci mydisk = device->new_state_tmp.disk; 35008c2ecf20Sopenharmony_ci 35018c2ecf20Sopenharmony_ci drbd_info(device, "drbd_sync_handshake:\n"); 35028c2ecf20Sopenharmony_ci 35038c2ecf20Sopenharmony_ci spin_lock_irq(&device->ldev->md.uuid_lock); 35048c2ecf20Sopenharmony_ci drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0); 35058c2ecf20Sopenharmony_ci drbd_uuid_dump(device, "peer", device->p_uuid, 35068c2ecf20Sopenharmony_ci device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 35078c2ecf20Sopenharmony_ci 35088c2ecf20Sopenharmony_ci hg = drbd_uuid_compare(device, peer_role, &rule_nr); 35098c2ecf20Sopenharmony_ci spin_unlock_irq(&device->ldev->md.uuid_lock); 35108c2ecf20Sopenharmony_ci 35118c2ecf20Sopenharmony_ci drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); 35128c2ecf20Sopenharmony_ci 35138c2ecf20Sopenharmony_ci if (hg == -1000) { 35148c2ecf20Sopenharmony_ci drbd_alert(device, "Unrelated data, aborting!\n"); 35158c2ecf20Sopenharmony_ci return C_MASK; 35168c2ecf20Sopenharmony_ci } 35178c2ecf20Sopenharmony_ci if (hg < -0x10000) { 35188c2ecf20Sopenharmony_ci int proto, fflags; 35198c2ecf20Sopenharmony_ci hg = -hg; 35208c2ecf20Sopenharmony_ci proto = hg & 0xff; 35218c2ecf20Sopenharmony_ci fflags = (hg >> 8) & 0xff; 35228c2ecf20Sopenharmony_ci drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n", 35238c2ecf20Sopenharmony_ci proto, fflags); 35248c2ecf20Sopenharmony_ci return C_MASK; 35258c2ecf20Sopenharmony_ci } 35268c2ecf20Sopenharmony_ci if (hg < -1000) { 35278c2ecf20Sopenharmony_ci drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); 35288c2ecf20Sopenharmony_ci return C_MASK; 35298c2ecf20Sopenharmony_ci } 35308c2ecf20Sopenharmony_ci 35318c2ecf20Sopenharmony_ci if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) || 35328c2ecf20Sopenharmony_ci (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) { 35338c2ecf20Sopenharmony_ci int f = (hg == -100) || abs(hg) == 2; 35348c2ecf20Sopenharmony_ci hg = mydisk > D_INCONSISTENT ? 1 : -1; 35358c2ecf20Sopenharmony_ci if (f) 35368c2ecf20Sopenharmony_ci hg = hg*2; 35378c2ecf20Sopenharmony_ci drbd_info(device, "Becoming sync %s due to disk states.\n", 35388c2ecf20Sopenharmony_ci hg > 0 ? "source" : "target"); 35398c2ecf20Sopenharmony_ci } 35408c2ecf20Sopenharmony_ci 35418c2ecf20Sopenharmony_ci if (abs(hg) == 100) 35428c2ecf20Sopenharmony_ci drbd_khelper(device, "initial-split-brain"); 35438c2ecf20Sopenharmony_ci 35448c2ecf20Sopenharmony_ci rcu_read_lock(); 35458c2ecf20Sopenharmony_ci nc = rcu_dereference(peer_device->connection->net_conf); 35468c2ecf20Sopenharmony_ci always_asbp = nc->always_asbp; 35478c2ecf20Sopenharmony_ci rr_conflict = nc->rr_conflict; 35488c2ecf20Sopenharmony_ci tentative = nc->tentative; 35498c2ecf20Sopenharmony_ci rcu_read_unlock(); 35508c2ecf20Sopenharmony_ci 35518c2ecf20Sopenharmony_ci if (hg == 100 || (hg == -100 && always_asbp)) { 35528c2ecf20Sopenharmony_ci int pcount = (device->state.role == R_PRIMARY) 35538c2ecf20Sopenharmony_ci + (peer_role == R_PRIMARY); 35548c2ecf20Sopenharmony_ci int forced = (hg == -100); 35558c2ecf20Sopenharmony_ci 35568c2ecf20Sopenharmony_ci switch (pcount) { 35578c2ecf20Sopenharmony_ci case 0: 35588c2ecf20Sopenharmony_ci hg = drbd_asb_recover_0p(peer_device); 35598c2ecf20Sopenharmony_ci break; 35608c2ecf20Sopenharmony_ci case 1: 35618c2ecf20Sopenharmony_ci hg = drbd_asb_recover_1p(peer_device); 35628c2ecf20Sopenharmony_ci break; 35638c2ecf20Sopenharmony_ci case 2: 35648c2ecf20Sopenharmony_ci hg = drbd_asb_recover_2p(peer_device); 35658c2ecf20Sopenharmony_ci break; 35668c2ecf20Sopenharmony_ci } 35678c2ecf20Sopenharmony_ci if (abs(hg) < 100) { 35688c2ecf20Sopenharmony_ci drbd_warn(device, "Split-Brain detected, %d primaries, " 35698c2ecf20Sopenharmony_ci "automatically solved. Sync from %s node\n", 35708c2ecf20Sopenharmony_ci pcount, (hg < 0) ? "peer" : "this"); 35718c2ecf20Sopenharmony_ci if (forced) { 35728c2ecf20Sopenharmony_ci drbd_warn(device, "Doing a full sync, since" 35738c2ecf20Sopenharmony_ci " UUIDs where ambiguous.\n"); 35748c2ecf20Sopenharmony_ci hg = hg*2; 35758c2ecf20Sopenharmony_ci } 35768c2ecf20Sopenharmony_ci } 35778c2ecf20Sopenharmony_ci } 35788c2ecf20Sopenharmony_ci 35798c2ecf20Sopenharmony_ci if (hg == -100) { 35808c2ecf20Sopenharmony_ci if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1)) 35818c2ecf20Sopenharmony_ci hg = -1; 35828c2ecf20Sopenharmony_ci if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1)) 35838c2ecf20Sopenharmony_ci hg = 1; 35848c2ecf20Sopenharmony_ci 35858c2ecf20Sopenharmony_ci if (abs(hg) < 100) 35868c2ecf20Sopenharmony_ci drbd_warn(device, "Split-Brain detected, manually solved. " 35878c2ecf20Sopenharmony_ci "Sync from %s node\n", 35888c2ecf20Sopenharmony_ci (hg < 0) ? "peer" : "this"); 35898c2ecf20Sopenharmony_ci } 35908c2ecf20Sopenharmony_ci 35918c2ecf20Sopenharmony_ci if (hg == -100) { 35928c2ecf20Sopenharmony_ci /* FIXME this log message is not correct if we end up here 35938c2ecf20Sopenharmony_ci * after an attempted attach on a diskless node. 35948c2ecf20Sopenharmony_ci * We just refuse to attach -- well, we drop the "connection" 35958c2ecf20Sopenharmony_ci * to that disk, in a way... */ 35968c2ecf20Sopenharmony_ci drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n"); 35978c2ecf20Sopenharmony_ci drbd_khelper(device, "split-brain"); 35988c2ecf20Sopenharmony_ci return C_MASK; 35998c2ecf20Sopenharmony_ci } 36008c2ecf20Sopenharmony_ci 36018c2ecf20Sopenharmony_ci if (hg > 0 && mydisk <= D_INCONSISTENT) { 36028c2ecf20Sopenharmony_ci drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n"); 36038c2ecf20Sopenharmony_ci return C_MASK; 36048c2ecf20Sopenharmony_ci } 36058c2ecf20Sopenharmony_ci 36068c2ecf20Sopenharmony_ci if (hg < 0 && /* by intention we do not use mydisk here. */ 36078c2ecf20Sopenharmony_ci device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) { 36088c2ecf20Sopenharmony_ci switch (rr_conflict) { 36098c2ecf20Sopenharmony_ci case ASB_CALL_HELPER: 36108c2ecf20Sopenharmony_ci drbd_khelper(device, "pri-lost"); 36118c2ecf20Sopenharmony_ci fallthrough; 36128c2ecf20Sopenharmony_ci case ASB_DISCONNECT: 36138c2ecf20Sopenharmony_ci drbd_err(device, "I shall become SyncTarget, but I am primary!\n"); 36148c2ecf20Sopenharmony_ci return C_MASK; 36158c2ecf20Sopenharmony_ci case ASB_VIOLENTLY: 36168c2ecf20Sopenharmony_ci drbd_warn(device, "Becoming SyncTarget, violating the stable-data" 36178c2ecf20Sopenharmony_ci "assumption\n"); 36188c2ecf20Sopenharmony_ci } 36198c2ecf20Sopenharmony_ci } 36208c2ecf20Sopenharmony_ci 36218c2ecf20Sopenharmony_ci if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) { 36228c2ecf20Sopenharmony_ci if (hg == 0) 36238c2ecf20Sopenharmony_ci drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n"); 36248c2ecf20Sopenharmony_ci else 36258c2ecf20Sopenharmony_ci drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.", 36268c2ecf20Sopenharmony_ci drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), 36278c2ecf20Sopenharmony_ci abs(hg) >= 2 ? "full" : "bit-map based"); 36288c2ecf20Sopenharmony_ci return C_MASK; 36298c2ecf20Sopenharmony_ci } 36308c2ecf20Sopenharmony_ci 36318c2ecf20Sopenharmony_ci if (abs(hg) >= 2) { 36328c2ecf20Sopenharmony_ci drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); 36338c2ecf20Sopenharmony_ci if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", 36348c2ecf20Sopenharmony_ci BM_LOCKED_SET_ALLOWED)) 36358c2ecf20Sopenharmony_ci return C_MASK; 36368c2ecf20Sopenharmony_ci } 36378c2ecf20Sopenharmony_ci 36388c2ecf20Sopenharmony_ci if (hg > 0) { /* become sync source. */ 36398c2ecf20Sopenharmony_ci rv = C_WF_BITMAP_S; 36408c2ecf20Sopenharmony_ci } else if (hg < 0) { /* become sync target */ 36418c2ecf20Sopenharmony_ci rv = C_WF_BITMAP_T; 36428c2ecf20Sopenharmony_ci } else { 36438c2ecf20Sopenharmony_ci rv = C_CONNECTED; 36448c2ecf20Sopenharmony_ci if (drbd_bm_total_weight(device)) { 36458c2ecf20Sopenharmony_ci drbd_info(device, "No resync, but %lu bits in bitmap!\n", 36468c2ecf20Sopenharmony_ci drbd_bm_total_weight(device)); 36478c2ecf20Sopenharmony_ci } 36488c2ecf20Sopenharmony_ci } 36498c2ecf20Sopenharmony_ci 36508c2ecf20Sopenharmony_ci return rv; 36518c2ecf20Sopenharmony_ci} 36528c2ecf20Sopenharmony_ci 36538c2ecf20Sopenharmony_cistatic enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer) 36548c2ecf20Sopenharmony_ci{ 36558c2ecf20Sopenharmony_ci /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ 36568c2ecf20Sopenharmony_ci if (peer == ASB_DISCARD_REMOTE) 36578c2ecf20Sopenharmony_ci return ASB_DISCARD_LOCAL; 36588c2ecf20Sopenharmony_ci 36598c2ecf20Sopenharmony_ci /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ 36608c2ecf20Sopenharmony_ci if (peer == ASB_DISCARD_LOCAL) 36618c2ecf20Sopenharmony_ci return ASB_DISCARD_REMOTE; 36628c2ecf20Sopenharmony_ci 36638c2ecf20Sopenharmony_ci /* everything else is valid if they are equal on both sides. */ 36648c2ecf20Sopenharmony_ci return peer; 36658c2ecf20Sopenharmony_ci} 36668c2ecf20Sopenharmony_ci 36678c2ecf20Sopenharmony_cistatic int receive_protocol(struct drbd_connection *connection, struct packet_info *pi) 36688c2ecf20Sopenharmony_ci{ 36698c2ecf20Sopenharmony_ci struct p_protocol *p = pi->data; 36708c2ecf20Sopenharmony_ci enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 36718c2ecf20Sopenharmony_ci int p_proto, p_discard_my_data, p_two_primaries, cf; 36728c2ecf20Sopenharmony_ci struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; 36738c2ecf20Sopenharmony_ci char integrity_alg[SHARED_SECRET_MAX] = ""; 36748c2ecf20Sopenharmony_ci struct crypto_shash *peer_integrity_tfm = NULL; 36758c2ecf20Sopenharmony_ci void *int_dig_in = NULL, *int_dig_vv = NULL; 36768c2ecf20Sopenharmony_ci 36778c2ecf20Sopenharmony_ci p_proto = be32_to_cpu(p->protocol); 36788c2ecf20Sopenharmony_ci p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 36798c2ecf20Sopenharmony_ci p_after_sb_1p = be32_to_cpu(p->after_sb_1p); 36808c2ecf20Sopenharmony_ci p_after_sb_2p = be32_to_cpu(p->after_sb_2p); 36818c2ecf20Sopenharmony_ci p_two_primaries = be32_to_cpu(p->two_primaries); 36828c2ecf20Sopenharmony_ci cf = be32_to_cpu(p->conn_flags); 36838c2ecf20Sopenharmony_ci p_discard_my_data = cf & CF_DISCARD_MY_DATA; 36848c2ecf20Sopenharmony_ci 36858c2ecf20Sopenharmony_ci if (connection->agreed_pro_version >= 87) { 36868c2ecf20Sopenharmony_ci int err; 36878c2ecf20Sopenharmony_ci 36888c2ecf20Sopenharmony_ci if (pi->size > sizeof(integrity_alg)) 36898c2ecf20Sopenharmony_ci return -EIO; 36908c2ecf20Sopenharmony_ci err = drbd_recv_all(connection, integrity_alg, pi->size); 36918c2ecf20Sopenharmony_ci if (err) 36928c2ecf20Sopenharmony_ci return err; 36938c2ecf20Sopenharmony_ci integrity_alg[SHARED_SECRET_MAX - 1] = 0; 36948c2ecf20Sopenharmony_ci } 36958c2ecf20Sopenharmony_ci 36968c2ecf20Sopenharmony_ci if (pi->cmd != P_PROTOCOL_UPDATE) { 36978c2ecf20Sopenharmony_ci clear_bit(CONN_DRY_RUN, &connection->flags); 36988c2ecf20Sopenharmony_ci 36998c2ecf20Sopenharmony_ci if (cf & CF_DRY_RUN) 37008c2ecf20Sopenharmony_ci set_bit(CONN_DRY_RUN, &connection->flags); 37018c2ecf20Sopenharmony_ci 37028c2ecf20Sopenharmony_ci rcu_read_lock(); 37038c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 37048c2ecf20Sopenharmony_ci 37058c2ecf20Sopenharmony_ci if (p_proto != nc->wire_protocol) { 37068c2ecf20Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "protocol"); 37078c2ecf20Sopenharmony_ci goto disconnect_rcu_unlock; 37088c2ecf20Sopenharmony_ci } 37098c2ecf20Sopenharmony_ci 37108c2ecf20Sopenharmony_ci if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { 37118c2ecf20Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri"); 37128c2ecf20Sopenharmony_ci goto disconnect_rcu_unlock; 37138c2ecf20Sopenharmony_ci } 37148c2ecf20Sopenharmony_ci 37158c2ecf20Sopenharmony_ci if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { 37168c2ecf20Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri"); 37178c2ecf20Sopenharmony_ci goto disconnect_rcu_unlock; 37188c2ecf20Sopenharmony_ci } 37198c2ecf20Sopenharmony_ci 37208c2ecf20Sopenharmony_ci if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { 37218c2ecf20Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri"); 37228c2ecf20Sopenharmony_ci goto disconnect_rcu_unlock; 37238c2ecf20Sopenharmony_ci } 37248c2ecf20Sopenharmony_ci 37258c2ecf20Sopenharmony_ci if (p_discard_my_data && nc->discard_my_data) { 37268c2ecf20Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "discard-my-data"); 37278c2ecf20Sopenharmony_ci goto disconnect_rcu_unlock; 37288c2ecf20Sopenharmony_ci } 37298c2ecf20Sopenharmony_ci 37308c2ecf20Sopenharmony_ci if (p_two_primaries != nc->two_primaries) { 37318c2ecf20Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries"); 37328c2ecf20Sopenharmony_ci goto disconnect_rcu_unlock; 37338c2ecf20Sopenharmony_ci } 37348c2ecf20Sopenharmony_ci 37358c2ecf20Sopenharmony_ci if (strcmp(integrity_alg, nc->integrity_alg)) { 37368c2ecf20Sopenharmony_ci drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg"); 37378c2ecf20Sopenharmony_ci goto disconnect_rcu_unlock; 37388c2ecf20Sopenharmony_ci } 37398c2ecf20Sopenharmony_ci 37408c2ecf20Sopenharmony_ci rcu_read_unlock(); 37418c2ecf20Sopenharmony_ci } 37428c2ecf20Sopenharmony_ci 37438c2ecf20Sopenharmony_ci if (integrity_alg[0]) { 37448c2ecf20Sopenharmony_ci int hash_size; 37458c2ecf20Sopenharmony_ci 37468c2ecf20Sopenharmony_ci /* 37478c2ecf20Sopenharmony_ci * We can only change the peer data integrity algorithm 37488c2ecf20Sopenharmony_ci * here. Changing our own data integrity algorithm 37498c2ecf20Sopenharmony_ci * requires that we send a P_PROTOCOL_UPDATE packet at 37508c2ecf20Sopenharmony_ci * the same time; otherwise, the peer has no way to 37518c2ecf20Sopenharmony_ci * tell between which packets the algorithm should 37528c2ecf20Sopenharmony_ci * change. 37538c2ecf20Sopenharmony_ci */ 37548c2ecf20Sopenharmony_ci 37558c2ecf20Sopenharmony_ci peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0); 37568c2ecf20Sopenharmony_ci if (IS_ERR(peer_integrity_tfm)) { 37578c2ecf20Sopenharmony_ci peer_integrity_tfm = NULL; 37588c2ecf20Sopenharmony_ci drbd_err(connection, "peer data-integrity-alg %s not supported\n", 37598c2ecf20Sopenharmony_ci integrity_alg); 37608c2ecf20Sopenharmony_ci goto disconnect; 37618c2ecf20Sopenharmony_ci } 37628c2ecf20Sopenharmony_ci 37638c2ecf20Sopenharmony_ci hash_size = crypto_shash_digestsize(peer_integrity_tfm); 37648c2ecf20Sopenharmony_ci int_dig_in = kmalloc(hash_size, GFP_KERNEL); 37658c2ecf20Sopenharmony_ci int_dig_vv = kmalloc(hash_size, GFP_KERNEL); 37668c2ecf20Sopenharmony_ci if (!(int_dig_in && int_dig_vv)) { 37678c2ecf20Sopenharmony_ci drbd_err(connection, "Allocation of buffers for data integrity checking failed\n"); 37688c2ecf20Sopenharmony_ci goto disconnect; 37698c2ecf20Sopenharmony_ci } 37708c2ecf20Sopenharmony_ci } 37718c2ecf20Sopenharmony_ci 37728c2ecf20Sopenharmony_ci new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); 37738c2ecf20Sopenharmony_ci if (!new_net_conf) { 37748c2ecf20Sopenharmony_ci drbd_err(connection, "Allocation of new net_conf failed\n"); 37758c2ecf20Sopenharmony_ci goto disconnect; 37768c2ecf20Sopenharmony_ci } 37778c2ecf20Sopenharmony_ci 37788c2ecf20Sopenharmony_ci mutex_lock(&connection->data.mutex); 37798c2ecf20Sopenharmony_ci mutex_lock(&connection->resource->conf_update); 37808c2ecf20Sopenharmony_ci old_net_conf = connection->net_conf; 37818c2ecf20Sopenharmony_ci *new_net_conf = *old_net_conf; 37828c2ecf20Sopenharmony_ci 37838c2ecf20Sopenharmony_ci new_net_conf->wire_protocol = p_proto; 37848c2ecf20Sopenharmony_ci new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p); 37858c2ecf20Sopenharmony_ci new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p); 37868c2ecf20Sopenharmony_ci new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); 37878c2ecf20Sopenharmony_ci new_net_conf->two_primaries = p_two_primaries; 37888c2ecf20Sopenharmony_ci 37898c2ecf20Sopenharmony_ci rcu_assign_pointer(connection->net_conf, new_net_conf); 37908c2ecf20Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 37918c2ecf20Sopenharmony_ci mutex_unlock(&connection->data.mutex); 37928c2ecf20Sopenharmony_ci 37938c2ecf20Sopenharmony_ci crypto_free_shash(connection->peer_integrity_tfm); 37948c2ecf20Sopenharmony_ci kfree(connection->int_dig_in); 37958c2ecf20Sopenharmony_ci kfree(connection->int_dig_vv); 37968c2ecf20Sopenharmony_ci connection->peer_integrity_tfm = peer_integrity_tfm; 37978c2ecf20Sopenharmony_ci connection->int_dig_in = int_dig_in; 37988c2ecf20Sopenharmony_ci connection->int_dig_vv = int_dig_vv; 37998c2ecf20Sopenharmony_ci 38008c2ecf20Sopenharmony_ci if (strcmp(old_net_conf->integrity_alg, integrity_alg)) 38018c2ecf20Sopenharmony_ci drbd_info(connection, "peer data-integrity-alg: %s\n", 38028c2ecf20Sopenharmony_ci integrity_alg[0] ? integrity_alg : "(none)"); 38038c2ecf20Sopenharmony_ci 38048c2ecf20Sopenharmony_ci synchronize_rcu(); 38058c2ecf20Sopenharmony_ci kfree(old_net_conf); 38068c2ecf20Sopenharmony_ci return 0; 38078c2ecf20Sopenharmony_ci 38088c2ecf20Sopenharmony_cidisconnect_rcu_unlock: 38098c2ecf20Sopenharmony_ci rcu_read_unlock(); 38108c2ecf20Sopenharmony_cidisconnect: 38118c2ecf20Sopenharmony_ci crypto_free_shash(peer_integrity_tfm); 38128c2ecf20Sopenharmony_ci kfree(int_dig_in); 38138c2ecf20Sopenharmony_ci kfree(int_dig_vv); 38148c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 38158c2ecf20Sopenharmony_ci return -EIO; 38168c2ecf20Sopenharmony_ci} 38178c2ecf20Sopenharmony_ci 38188c2ecf20Sopenharmony_ci/* helper function 38198c2ecf20Sopenharmony_ci * input: alg name, feature name 38208c2ecf20Sopenharmony_ci * return: NULL (alg name was "") 38218c2ecf20Sopenharmony_ci * ERR_PTR(error) if something goes wrong 38228c2ecf20Sopenharmony_ci * or the crypto hash ptr, if it worked out ok. */ 38238c2ecf20Sopenharmony_cistatic struct crypto_shash *drbd_crypto_alloc_digest_safe( 38248c2ecf20Sopenharmony_ci const struct drbd_device *device, 38258c2ecf20Sopenharmony_ci const char *alg, const char *name) 38268c2ecf20Sopenharmony_ci{ 38278c2ecf20Sopenharmony_ci struct crypto_shash *tfm; 38288c2ecf20Sopenharmony_ci 38298c2ecf20Sopenharmony_ci if (!alg[0]) 38308c2ecf20Sopenharmony_ci return NULL; 38318c2ecf20Sopenharmony_ci 38328c2ecf20Sopenharmony_ci tfm = crypto_alloc_shash(alg, 0, 0); 38338c2ecf20Sopenharmony_ci if (IS_ERR(tfm)) { 38348c2ecf20Sopenharmony_ci drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n", 38358c2ecf20Sopenharmony_ci alg, name, PTR_ERR(tfm)); 38368c2ecf20Sopenharmony_ci return tfm; 38378c2ecf20Sopenharmony_ci } 38388c2ecf20Sopenharmony_ci return tfm; 38398c2ecf20Sopenharmony_ci} 38408c2ecf20Sopenharmony_ci 38418c2ecf20Sopenharmony_cistatic int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi) 38428c2ecf20Sopenharmony_ci{ 38438c2ecf20Sopenharmony_ci void *buffer = connection->data.rbuf; 38448c2ecf20Sopenharmony_ci int size = pi->size; 38458c2ecf20Sopenharmony_ci 38468c2ecf20Sopenharmony_ci while (size) { 38478c2ecf20Sopenharmony_ci int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE); 38488c2ecf20Sopenharmony_ci s = drbd_recv(connection, buffer, s); 38498c2ecf20Sopenharmony_ci if (s <= 0) { 38508c2ecf20Sopenharmony_ci if (s < 0) 38518c2ecf20Sopenharmony_ci return s; 38528c2ecf20Sopenharmony_ci break; 38538c2ecf20Sopenharmony_ci } 38548c2ecf20Sopenharmony_ci size -= s; 38558c2ecf20Sopenharmony_ci } 38568c2ecf20Sopenharmony_ci if (size) 38578c2ecf20Sopenharmony_ci return -EIO; 38588c2ecf20Sopenharmony_ci return 0; 38598c2ecf20Sopenharmony_ci} 38608c2ecf20Sopenharmony_ci 38618c2ecf20Sopenharmony_ci/* 38628c2ecf20Sopenharmony_ci * config_unknown_volume - device configuration command for unknown volume 38638c2ecf20Sopenharmony_ci * 38648c2ecf20Sopenharmony_ci * When a device is added to an existing connection, the node on which the 38658c2ecf20Sopenharmony_ci * device is added first will send configuration commands to its peer but the 38668c2ecf20Sopenharmony_ci * peer will not know about the device yet. It will warn and ignore these 38678c2ecf20Sopenharmony_ci * commands. Once the device is added on the second node, the second node will 38688c2ecf20Sopenharmony_ci * send the same device configuration commands, but in the other direction. 38698c2ecf20Sopenharmony_ci * 38708c2ecf20Sopenharmony_ci * (We can also end up here if drbd is misconfigured.) 38718c2ecf20Sopenharmony_ci */ 38728c2ecf20Sopenharmony_cistatic int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi) 38738c2ecf20Sopenharmony_ci{ 38748c2ecf20Sopenharmony_ci drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n", 38758c2ecf20Sopenharmony_ci cmdname(pi->cmd), pi->vnr); 38768c2ecf20Sopenharmony_ci return ignore_remaining_packet(connection, pi); 38778c2ecf20Sopenharmony_ci} 38788c2ecf20Sopenharmony_ci 38798c2ecf20Sopenharmony_cistatic int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi) 38808c2ecf20Sopenharmony_ci{ 38818c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 38828c2ecf20Sopenharmony_ci struct drbd_device *device; 38838c2ecf20Sopenharmony_ci struct p_rs_param_95 *p; 38848c2ecf20Sopenharmony_ci unsigned int header_size, data_size, exp_max_sz; 38858c2ecf20Sopenharmony_ci struct crypto_shash *verify_tfm = NULL; 38868c2ecf20Sopenharmony_ci struct crypto_shash *csums_tfm = NULL; 38878c2ecf20Sopenharmony_ci struct net_conf *old_net_conf, *new_net_conf = NULL; 38888c2ecf20Sopenharmony_ci struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; 38898c2ecf20Sopenharmony_ci const int apv = connection->agreed_pro_version; 38908c2ecf20Sopenharmony_ci struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 38918c2ecf20Sopenharmony_ci unsigned int fifo_size = 0; 38928c2ecf20Sopenharmony_ci int err; 38938c2ecf20Sopenharmony_ci 38948c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 38958c2ecf20Sopenharmony_ci if (!peer_device) 38968c2ecf20Sopenharmony_ci return config_unknown_volume(connection, pi); 38978c2ecf20Sopenharmony_ci device = peer_device->device; 38988c2ecf20Sopenharmony_ci 38998c2ecf20Sopenharmony_ci exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 39008c2ecf20Sopenharmony_ci : apv == 88 ? sizeof(struct p_rs_param) 39018c2ecf20Sopenharmony_ci + SHARED_SECRET_MAX 39028c2ecf20Sopenharmony_ci : apv <= 94 ? sizeof(struct p_rs_param_89) 39038c2ecf20Sopenharmony_ci : /* apv >= 95 */ sizeof(struct p_rs_param_95); 39048c2ecf20Sopenharmony_ci 39058c2ecf20Sopenharmony_ci if (pi->size > exp_max_sz) { 39068c2ecf20Sopenharmony_ci drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n", 39078c2ecf20Sopenharmony_ci pi->size, exp_max_sz); 39088c2ecf20Sopenharmony_ci return -EIO; 39098c2ecf20Sopenharmony_ci } 39108c2ecf20Sopenharmony_ci 39118c2ecf20Sopenharmony_ci if (apv <= 88) { 39128c2ecf20Sopenharmony_ci header_size = sizeof(struct p_rs_param); 39138c2ecf20Sopenharmony_ci data_size = pi->size - header_size; 39148c2ecf20Sopenharmony_ci } else if (apv <= 94) { 39158c2ecf20Sopenharmony_ci header_size = sizeof(struct p_rs_param_89); 39168c2ecf20Sopenharmony_ci data_size = pi->size - header_size; 39178c2ecf20Sopenharmony_ci D_ASSERT(device, data_size == 0); 39188c2ecf20Sopenharmony_ci } else { 39198c2ecf20Sopenharmony_ci header_size = sizeof(struct p_rs_param_95); 39208c2ecf20Sopenharmony_ci data_size = pi->size - header_size; 39218c2ecf20Sopenharmony_ci D_ASSERT(device, data_size == 0); 39228c2ecf20Sopenharmony_ci } 39238c2ecf20Sopenharmony_ci 39248c2ecf20Sopenharmony_ci /* initialize verify_alg and csums_alg */ 39258c2ecf20Sopenharmony_ci p = pi->data; 39268c2ecf20Sopenharmony_ci memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 39278c2ecf20Sopenharmony_ci 39288c2ecf20Sopenharmony_ci err = drbd_recv_all(peer_device->connection, p, header_size); 39298c2ecf20Sopenharmony_ci if (err) 39308c2ecf20Sopenharmony_ci return err; 39318c2ecf20Sopenharmony_ci 39328c2ecf20Sopenharmony_ci mutex_lock(&connection->resource->conf_update); 39338c2ecf20Sopenharmony_ci old_net_conf = peer_device->connection->net_conf; 39348c2ecf20Sopenharmony_ci if (get_ldev(device)) { 39358c2ecf20Sopenharmony_ci new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 39368c2ecf20Sopenharmony_ci if (!new_disk_conf) { 39378c2ecf20Sopenharmony_ci put_ldev(device); 39388c2ecf20Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 39398c2ecf20Sopenharmony_ci drbd_err(device, "Allocation of new disk_conf failed\n"); 39408c2ecf20Sopenharmony_ci return -ENOMEM; 39418c2ecf20Sopenharmony_ci } 39428c2ecf20Sopenharmony_ci 39438c2ecf20Sopenharmony_ci old_disk_conf = device->ldev->disk_conf; 39448c2ecf20Sopenharmony_ci *new_disk_conf = *old_disk_conf; 39458c2ecf20Sopenharmony_ci 39468c2ecf20Sopenharmony_ci new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); 39478c2ecf20Sopenharmony_ci } 39488c2ecf20Sopenharmony_ci 39498c2ecf20Sopenharmony_ci if (apv >= 88) { 39508c2ecf20Sopenharmony_ci if (apv == 88) { 39518c2ecf20Sopenharmony_ci if (data_size > SHARED_SECRET_MAX || data_size == 0) { 39528c2ecf20Sopenharmony_ci drbd_err(device, "verify-alg of wrong size, " 39538c2ecf20Sopenharmony_ci "peer wants %u, accepting only up to %u byte\n", 39548c2ecf20Sopenharmony_ci data_size, SHARED_SECRET_MAX); 39558c2ecf20Sopenharmony_ci err = -EIO; 39568c2ecf20Sopenharmony_ci goto reconnect; 39578c2ecf20Sopenharmony_ci } 39588c2ecf20Sopenharmony_ci 39598c2ecf20Sopenharmony_ci err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size); 39608c2ecf20Sopenharmony_ci if (err) 39618c2ecf20Sopenharmony_ci goto reconnect; 39628c2ecf20Sopenharmony_ci /* we expect NUL terminated string */ 39638c2ecf20Sopenharmony_ci /* but just in case someone tries to be evil */ 39648c2ecf20Sopenharmony_ci D_ASSERT(device, p->verify_alg[data_size-1] == 0); 39658c2ecf20Sopenharmony_ci p->verify_alg[data_size-1] = 0; 39668c2ecf20Sopenharmony_ci 39678c2ecf20Sopenharmony_ci } else /* apv >= 89 */ { 39688c2ecf20Sopenharmony_ci /* we still expect NUL terminated strings */ 39698c2ecf20Sopenharmony_ci /* but just in case someone tries to be evil */ 39708c2ecf20Sopenharmony_ci D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0); 39718c2ecf20Sopenharmony_ci D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0); 39728c2ecf20Sopenharmony_ci p->verify_alg[SHARED_SECRET_MAX-1] = 0; 39738c2ecf20Sopenharmony_ci p->csums_alg[SHARED_SECRET_MAX-1] = 0; 39748c2ecf20Sopenharmony_ci } 39758c2ecf20Sopenharmony_ci 39768c2ecf20Sopenharmony_ci if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { 39778c2ecf20Sopenharmony_ci if (device->state.conn == C_WF_REPORT_PARAMS) { 39788c2ecf20Sopenharmony_ci drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", 39798c2ecf20Sopenharmony_ci old_net_conf->verify_alg, p->verify_alg); 39808c2ecf20Sopenharmony_ci goto disconnect; 39818c2ecf20Sopenharmony_ci } 39828c2ecf20Sopenharmony_ci verify_tfm = drbd_crypto_alloc_digest_safe(device, 39838c2ecf20Sopenharmony_ci p->verify_alg, "verify-alg"); 39848c2ecf20Sopenharmony_ci if (IS_ERR(verify_tfm)) { 39858c2ecf20Sopenharmony_ci verify_tfm = NULL; 39868c2ecf20Sopenharmony_ci goto disconnect; 39878c2ecf20Sopenharmony_ci } 39888c2ecf20Sopenharmony_ci } 39898c2ecf20Sopenharmony_ci 39908c2ecf20Sopenharmony_ci if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) { 39918c2ecf20Sopenharmony_ci if (device->state.conn == C_WF_REPORT_PARAMS) { 39928c2ecf20Sopenharmony_ci drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", 39938c2ecf20Sopenharmony_ci old_net_conf->csums_alg, p->csums_alg); 39948c2ecf20Sopenharmony_ci goto disconnect; 39958c2ecf20Sopenharmony_ci } 39968c2ecf20Sopenharmony_ci csums_tfm = drbd_crypto_alloc_digest_safe(device, 39978c2ecf20Sopenharmony_ci p->csums_alg, "csums-alg"); 39988c2ecf20Sopenharmony_ci if (IS_ERR(csums_tfm)) { 39998c2ecf20Sopenharmony_ci csums_tfm = NULL; 40008c2ecf20Sopenharmony_ci goto disconnect; 40018c2ecf20Sopenharmony_ci } 40028c2ecf20Sopenharmony_ci } 40038c2ecf20Sopenharmony_ci 40048c2ecf20Sopenharmony_ci if (apv > 94 && new_disk_conf) { 40058c2ecf20Sopenharmony_ci new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); 40068c2ecf20Sopenharmony_ci new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); 40078c2ecf20Sopenharmony_ci new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); 40088c2ecf20Sopenharmony_ci new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); 40098c2ecf20Sopenharmony_ci 40108c2ecf20Sopenharmony_ci fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; 40118c2ecf20Sopenharmony_ci if (fifo_size != device->rs_plan_s->size) { 40128c2ecf20Sopenharmony_ci new_plan = fifo_alloc(fifo_size); 40138c2ecf20Sopenharmony_ci if (!new_plan) { 40148c2ecf20Sopenharmony_ci drbd_err(device, "kmalloc of fifo_buffer failed"); 40158c2ecf20Sopenharmony_ci put_ldev(device); 40168c2ecf20Sopenharmony_ci goto disconnect; 40178c2ecf20Sopenharmony_ci } 40188c2ecf20Sopenharmony_ci } 40198c2ecf20Sopenharmony_ci } 40208c2ecf20Sopenharmony_ci 40218c2ecf20Sopenharmony_ci if (verify_tfm || csums_tfm) { 40228c2ecf20Sopenharmony_ci new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 40238c2ecf20Sopenharmony_ci if (!new_net_conf) { 40248c2ecf20Sopenharmony_ci drbd_err(device, "Allocation of new net_conf failed\n"); 40258c2ecf20Sopenharmony_ci goto disconnect; 40268c2ecf20Sopenharmony_ci } 40278c2ecf20Sopenharmony_ci 40288c2ecf20Sopenharmony_ci *new_net_conf = *old_net_conf; 40298c2ecf20Sopenharmony_ci 40308c2ecf20Sopenharmony_ci if (verify_tfm) { 40318c2ecf20Sopenharmony_ci strcpy(new_net_conf->verify_alg, p->verify_alg); 40328c2ecf20Sopenharmony_ci new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; 40338c2ecf20Sopenharmony_ci crypto_free_shash(peer_device->connection->verify_tfm); 40348c2ecf20Sopenharmony_ci peer_device->connection->verify_tfm = verify_tfm; 40358c2ecf20Sopenharmony_ci drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); 40368c2ecf20Sopenharmony_ci } 40378c2ecf20Sopenharmony_ci if (csums_tfm) { 40388c2ecf20Sopenharmony_ci strcpy(new_net_conf->csums_alg, p->csums_alg); 40398c2ecf20Sopenharmony_ci new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; 40408c2ecf20Sopenharmony_ci crypto_free_shash(peer_device->connection->csums_tfm); 40418c2ecf20Sopenharmony_ci peer_device->connection->csums_tfm = csums_tfm; 40428c2ecf20Sopenharmony_ci drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg); 40438c2ecf20Sopenharmony_ci } 40448c2ecf20Sopenharmony_ci rcu_assign_pointer(connection->net_conf, new_net_conf); 40458c2ecf20Sopenharmony_ci } 40468c2ecf20Sopenharmony_ci } 40478c2ecf20Sopenharmony_ci 40488c2ecf20Sopenharmony_ci if (new_disk_conf) { 40498c2ecf20Sopenharmony_ci rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 40508c2ecf20Sopenharmony_ci put_ldev(device); 40518c2ecf20Sopenharmony_ci } 40528c2ecf20Sopenharmony_ci 40538c2ecf20Sopenharmony_ci if (new_plan) { 40548c2ecf20Sopenharmony_ci old_plan = device->rs_plan_s; 40558c2ecf20Sopenharmony_ci rcu_assign_pointer(device->rs_plan_s, new_plan); 40568c2ecf20Sopenharmony_ci } 40578c2ecf20Sopenharmony_ci 40588c2ecf20Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 40598c2ecf20Sopenharmony_ci synchronize_rcu(); 40608c2ecf20Sopenharmony_ci if (new_net_conf) 40618c2ecf20Sopenharmony_ci kfree(old_net_conf); 40628c2ecf20Sopenharmony_ci kfree(old_disk_conf); 40638c2ecf20Sopenharmony_ci kfree(old_plan); 40648c2ecf20Sopenharmony_ci 40658c2ecf20Sopenharmony_ci return 0; 40668c2ecf20Sopenharmony_ci 40678c2ecf20Sopenharmony_cireconnect: 40688c2ecf20Sopenharmony_ci if (new_disk_conf) { 40698c2ecf20Sopenharmony_ci put_ldev(device); 40708c2ecf20Sopenharmony_ci kfree(new_disk_conf); 40718c2ecf20Sopenharmony_ci } 40728c2ecf20Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 40738c2ecf20Sopenharmony_ci return -EIO; 40748c2ecf20Sopenharmony_ci 40758c2ecf20Sopenharmony_cidisconnect: 40768c2ecf20Sopenharmony_ci kfree(new_plan); 40778c2ecf20Sopenharmony_ci if (new_disk_conf) { 40788c2ecf20Sopenharmony_ci put_ldev(device); 40798c2ecf20Sopenharmony_ci kfree(new_disk_conf); 40808c2ecf20Sopenharmony_ci } 40818c2ecf20Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 40828c2ecf20Sopenharmony_ci /* just for completeness: actually not needed, 40838c2ecf20Sopenharmony_ci * as this is not reached if csums_tfm was ok. */ 40848c2ecf20Sopenharmony_ci crypto_free_shash(csums_tfm); 40858c2ecf20Sopenharmony_ci /* but free the verify_tfm again, if csums_tfm did not work out */ 40868c2ecf20Sopenharmony_ci crypto_free_shash(verify_tfm); 40878c2ecf20Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 40888c2ecf20Sopenharmony_ci return -EIO; 40898c2ecf20Sopenharmony_ci} 40908c2ecf20Sopenharmony_ci 40918c2ecf20Sopenharmony_ci/* warn if the arguments differ by more than 12.5% */ 40928c2ecf20Sopenharmony_cistatic void warn_if_differ_considerably(struct drbd_device *device, 40938c2ecf20Sopenharmony_ci const char *s, sector_t a, sector_t b) 40948c2ecf20Sopenharmony_ci{ 40958c2ecf20Sopenharmony_ci sector_t d; 40968c2ecf20Sopenharmony_ci if (a == 0 || b == 0) 40978c2ecf20Sopenharmony_ci return; 40988c2ecf20Sopenharmony_ci d = (a > b) ? (a - b) : (b - a); 40998c2ecf20Sopenharmony_ci if (d > (a>>3) || d > (b>>3)) 41008c2ecf20Sopenharmony_ci drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s, 41018c2ecf20Sopenharmony_ci (unsigned long long)a, (unsigned long long)b); 41028c2ecf20Sopenharmony_ci} 41038c2ecf20Sopenharmony_ci 41048c2ecf20Sopenharmony_cistatic int receive_sizes(struct drbd_connection *connection, struct packet_info *pi) 41058c2ecf20Sopenharmony_ci{ 41068c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 41078c2ecf20Sopenharmony_ci struct drbd_device *device; 41088c2ecf20Sopenharmony_ci struct p_sizes *p = pi->data; 41098c2ecf20Sopenharmony_ci struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL; 41108c2ecf20Sopenharmony_ci enum determine_dev_size dd = DS_UNCHANGED; 41118c2ecf20Sopenharmony_ci sector_t p_size, p_usize, p_csize, my_usize; 41128c2ecf20Sopenharmony_ci sector_t new_size, cur_size; 41138c2ecf20Sopenharmony_ci int ldsc = 0; /* local disk size changed */ 41148c2ecf20Sopenharmony_ci enum dds_flags ddsf; 41158c2ecf20Sopenharmony_ci 41168c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 41178c2ecf20Sopenharmony_ci if (!peer_device) 41188c2ecf20Sopenharmony_ci return config_unknown_volume(connection, pi); 41198c2ecf20Sopenharmony_ci device = peer_device->device; 41208c2ecf20Sopenharmony_ci cur_size = get_capacity(device->vdisk); 41218c2ecf20Sopenharmony_ci 41228c2ecf20Sopenharmony_ci p_size = be64_to_cpu(p->d_size); 41238c2ecf20Sopenharmony_ci p_usize = be64_to_cpu(p->u_size); 41248c2ecf20Sopenharmony_ci p_csize = be64_to_cpu(p->c_size); 41258c2ecf20Sopenharmony_ci 41268c2ecf20Sopenharmony_ci /* just store the peer's disk size for now. 41278c2ecf20Sopenharmony_ci * we still need to figure out whether we accept that. */ 41288c2ecf20Sopenharmony_ci device->p_size = p_size; 41298c2ecf20Sopenharmony_ci 41308c2ecf20Sopenharmony_ci if (get_ldev(device)) { 41318c2ecf20Sopenharmony_ci rcu_read_lock(); 41328c2ecf20Sopenharmony_ci my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; 41338c2ecf20Sopenharmony_ci rcu_read_unlock(); 41348c2ecf20Sopenharmony_ci 41358c2ecf20Sopenharmony_ci warn_if_differ_considerably(device, "lower level device sizes", 41368c2ecf20Sopenharmony_ci p_size, drbd_get_max_capacity(device->ldev)); 41378c2ecf20Sopenharmony_ci warn_if_differ_considerably(device, "user requested size", 41388c2ecf20Sopenharmony_ci p_usize, my_usize); 41398c2ecf20Sopenharmony_ci 41408c2ecf20Sopenharmony_ci /* if this is the first connect, or an otherwise expected 41418c2ecf20Sopenharmony_ci * param exchange, choose the minimum */ 41428c2ecf20Sopenharmony_ci if (device->state.conn == C_WF_REPORT_PARAMS) 41438c2ecf20Sopenharmony_ci p_usize = min_not_zero(my_usize, p_usize); 41448c2ecf20Sopenharmony_ci 41458c2ecf20Sopenharmony_ci /* Never shrink a device with usable data during connect, 41468c2ecf20Sopenharmony_ci * or "attach" on the peer. 41478c2ecf20Sopenharmony_ci * But allow online shrinking if we are connected. */ 41488c2ecf20Sopenharmony_ci new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0); 41498c2ecf20Sopenharmony_ci if (new_size < cur_size && 41508c2ecf20Sopenharmony_ci device->state.disk >= D_OUTDATED && 41518c2ecf20Sopenharmony_ci (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) { 41528c2ecf20Sopenharmony_ci drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n", 41538c2ecf20Sopenharmony_ci (unsigned long long)new_size, (unsigned long long)cur_size); 41548c2ecf20Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 41558c2ecf20Sopenharmony_ci put_ldev(device); 41568c2ecf20Sopenharmony_ci return -EIO; 41578c2ecf20Sopenharmony_ci } 41588c2ecf20Sopenharmony_ci 41598c2ecf20Sopenharmony_ci if (my_usize != p_usize) { 41608c2ecf20Sopenharmony_ci struct disk_conf *old_disk_conf, *new_disk_conf = NULL; 41618c2ecf20Sopenharmony_ci 41628c2ecf20Sopenharmony_ci new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 41638c2ecf20Sopenharmony_ci if (!new_disk_conf) { 41648c2ecf20Sopenharmony_ci drbd_err(device, "Allocation of new disk_conf failed\n"); 41658c2ecf20Sopenharmony_ci put_ldev(device); 41668c2ecf20Sopenharmony_ci return -ENOMEM; 41678c2ecf20Sopenharmony_ci } 41688c2ecf20Sopenharmony_ci 41698c2ecf20Sopenharmony_ci mutex_lock(&connection->resource->conf_update); 41708c2ecf20Sopenharmony_ci old_disk_conf = device->ldev->disk_conf; 41718c2ecf20Sopenharmony_ci *new_disk_conf = *old_disk_conf; 41728c2ecf20Sopenharmony_ci new_disk_conf->disk_size = p_usize; 41738c2ecf20Sopenharmony_ci 41748c2ecf20Sopenharmony_ci rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 41758c2ecf20Sopenharmony_ci mutex_unlock(&connection->resource->conf_update); 41768c2ecf20Sopenharmony_ci synchronize_rcu(); 41778c2ecf20Sopenharmony_ci kfree(old_disk_conf); 41788c2ecf20Sopenharmony_ci 41798c2ecf20Sopenharmony_ci drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n", 41808c2ecf20Sopenharmony_ci (unsigned long)p_usize, (unsigned long)my_usize); 41818c2ecf20Sopenharmony_ci } 41828c2ecf20Sopenharmony_ci 41838c2ecf20Sopenharmony_ci put_ldev(device); 41848c2ecf20Sopenharmony_ci } 41858c2ecf20Sopenharmony_ci 41868c2ecf20Sopenharmony_ci device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); 41878c2ecf20Sopenharmony_ci /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size(). 41888c2ecf20Sopenharmony_ci In case we cleared the QUEUE_FLAG_DISCARD from our queue in 41898c2ecf20Sopenharmony_ci drbd_reconsider_queue_parameters(), we can be sure that after 41908c2ecf20Sopenharmony_ci drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ 41918c2ecf20Sopenharmony_ci 41928c2ecf20Sopenharmony_ci ddsf = be16_to_cpu(p->dds_flags); 41938c2ecf20Sopenharmony_ci if (get_ldev(device)) { 41948c2ecf20Sopenharmony_ci drbd_reconsider_queue_parameters(device, device->ldev, o); 41958c2ecf20Sopenharmony_ci dd = drbd_determine_dev_size(device, ddsf, NULL); 41968c2ecf20Sopenharmony_ci put_ldev(device); 41978c2ecf20Sopenharmony_ci if (dd == DS_ERROR) 41988c2ecf20Sopenharmony_ci return -EIO; 41998c2ecf20Sopenharmony_ci drbd_md_sync(device); 42008c2ecf20Sopenharmony_ci } else { 42018c2ecf20Sopenharmony_ci /* 42028c2ecf20Sopenharmony_ci * I am diskless, need to accept the peer's *current* size. 42038c2ecf20Sopenharmony_ci * I must NOT accept the peers backing disk size, 42048c2ecf20Sopenharmony_ci * it may have been larger than mine all along... 42058c2ecf20Sopenharmony_ci * 42068c2ecf20Sopenharmony_ci * At this point, the peer knows more about my disk, or at 42078c2ecf20Sopenharmony_ci * least about what we last agreed upon, than myself. 42088c2ecf20Sopenharmony_ci * So if his c_size is less than his d_size, the most likely 42098c2ecf20Sopenharmony_ci * reason is that *my* d_size was smaller last time we checked. 42108c2ecf20Sopenharmony_ci * 42118c2ecf20Sopenharmony_ci * However, if he sends a zero current size, 42128c2ecf20Sopenharmony_ci * take his (user-capped or) backing disk size anyways. 42138c2ecf20Sopenharmony_ci * 42148c2ecf20Sopenharmony_ci * Unless of course he does not have a disk himself. 42158c2ecf20Sopenharmony_ci * In which case we ignore this completely. 42168c2ecf20Sopenharmony_ci */ 42178c2ecf20Sopenharmony_ci sector_t new_size = p_csize ?: p_usize ?: p_size; 42188c2ecf20Sopenharmony_ci drbd_reconsider_queue_parameters(device, NULL, o); 42198c2ecf20Sopenharmony_ci if (new_size == 0) { 42208c2ecf20Sopenharmony_ci /* Ignore, peer does not know nothing. */ 42218c2ecf20Sopenharmony_ci } else if (new_size == cur_size) { 42228c2ecf20Sopenharmony_ci /* nothing to do */ 42238c2ecf20Sopenharmony_ci } else if (cur_size != 0 && p_size == 0) { 42248c2ecf20Sopenharmony_ci drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n", 42258c2ecf20Sopenharmony_ci (unsigned long long)new_size, (unsigned long long)cur_size); 42268c2ecf20Sopenharmony_ci } else if (new_size < cur_size && device->state.role == R_PRIMARY) { 42278c2ecf20Sopenharmony_ci drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n", 42288c2ecf20Sopenharmony_ci (unsigned long long)new_size, (unsigned long long)cur_size); 42298c2ecf20Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 42308c2ecf20Sopenharmony_ci return -EIO; 42318c2ecf20Sopenharmony_ci } else { 42328c2ecf20Sopenharmony_ci /* I believe the peer, if 42338c2ecf20Sopenharmony_ci * - I don't have a current size myself 42348c2ecf20Sopenharmony_ci * - we agree on the size anyways 42358c2ecf20Sopenharmony_ci * - I do have a current size, am Secondary, 42368c2ecf20Sopenharmony_ci * and he has the only disk 42378c2ecf20Sopenharmony_ci * - I do have a current size, am Primary, 42388c2ecf20Sopenharmony_ci * and he has the only disk, 42398c2ecf20Sopenharmony_ci * which is larger than my current size 42408c2ecf20Sopenharmony_ci */ 42418c2ecf20Sopenharmony_ci drbd_set_my_capacity(device, new_size); 42428c2ecf20Sopenharmony_ci } 42438c2ecf20Sopenharmony_ci } 42448c2ecf20Sopenharmony_ci 42458c2ecf20Sopenharmony_ci if (get_ldev(device)) { 42468c2ecf20Sopenharmony_ci if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { 42478c2ecf20Sopenharmony_ci device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); 42488c2ecf20Sopenharmony_ci ldsc = 1; 42498c2ecf20Sopenharmony_ci } 42508c2ecf20Sopenharmony_ci 42518c2ecf20Sopenharmony_ci put_ldev(device); 42528c2ecf20Sopenharmony_ci } 42538c2ecf20Sopenharmony_ci 42548c2ecf20Sopenharmony_ci if (device->state.conn > C_WF_REPORT_PARAMS) { 42558c2ecf20Sopenharmony_ci if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) || 42568c2ecf20Sopenharmony_ci ldsc) { 42578c2ecf20Sopenharmony_ci /* we have different sizes, probably peer 42588c2ecf20Sopenharmony_ci * needs to know my new size... */ 42598c2ecf20Sopenharmony_ci drbd_send_sizes(peer_device, 0, ddsf); 42608c2ecf20Sopenharmony_ci } 42618c2ecf20Sopenharmony_ci if (test_and_clear_bit(RESIZE_PENDING, &device->flags) || 42628c2ecf20Sopenharmony_ci (dd == DS_GREW && device->state.conn == C_CONNECTED)) { 42638c2ecf20Sopenharmony_ci if (device->state.pdsk >= D_INCONSISTENT && 42648c2ecf20Sopenharmony_ci device->state.disk >= D_INCONSISTENT) { 42658c2ecf20Sopenharmony_ci if (ddsf & DDSF_NO_RESYNC) 42668c2ecf20Sopenharmony_ci drbd_info(device, "Resync of new storage suppressed with --assume-clean\n"); 42678c2ecf20Sopenharmony_ci else 42688c2ecf20Sopenharmony_ci resync_after_online_grow(device); 42698c2ecf20Sopenharmony_ci } else 42708c2ecf20Sopenharmony_ci set_bit(RESYNC_AFTER_NEG, &device->flags); 42718c2ecf20Sopenharmony_ci } 42728c2ecf20Sopenharmony_ci } 42738c2ecf20Sopenharmony_ci 42748c2ecf20Sopenharmony_ci return 0; 42758c2ecf20Sopenharmony_ci} 42768c2ecf20Sopenharmony_ci 42778c2ecf20Sopenharmony_cistatic int receive_uuids(struct drbd_connection *connection, struct packet_info *pi) 42788c2ecf20Sopenharmony_ci{ 42798c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 42808c2ecf20Sopenharmony_ci struct drbd_device *device; 42818c2ecf20Sopenharmony_ci struct p_uuids *p = pi->data; 42828c2ecf20Sopenharmony_ci u64 *p_uuid; 42838c2ecf20Sopenharmony_ci int i, updated_uuids = 0; 42848c2ecf20Sopenharmony_ci 42858c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 42868c2ecf20Sopenharmony_ci if (!peer_device) 42878c2ecf20Sopenharmony_ci return config_unknown_volume(connection, pi); 42888c2ecf20Sopenharmony_ci device = peer_device->device; 42898c2ecf20Sopenharmony_ci 42908c2ecf20Sopenharmony_ci p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO); 42918c2ecf20Sopenharmony_ci if (!p_uuid) { 42928c2ecf20Sopenharmony_ci drbd_err(device, "kmalloc of p_uuid failed\n"); 42938c2ecf20Sopenharmony_ci return false; 42948c2ecf20Sopenharmony_ci } 42958c2ecf20Sopenharmony_ci 42968c2ecf20Sopenharmony_ci for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 42978c2ecf20Sopenharmony_ci p_uuid[i] = be64_to_cpu(p->uuid[i]); 42988c2ecf20Sopenharmony_ci 42998c2ecf20Sopenharmony_ci kfree(device->p_uuid); 43008c2ecf20Sopenharmony_ci device->p_uuid = p_uuid; 43018c2ecf20Sopenharmony_ci 43028c2ecf20Sopenharmony_ci if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) && 43038c2ecf20Sopenharmony_ci device->state.disk < D_INCONSISTENT && 43048c2ecf20Sopenharmony_ci device->state.role == R_PRIMARY && 43058c2ecf20Sopenharmony_ci (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { 43068c2ecf20Sopenharmony_ci drbd_err(device, "Can only connect to data with current UUID=%016llX\n", 43078c2ecf20Sopenharmony_ci (unsigned long long)device->ed_uuid); 43088c2ecf20Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 43098c2ecf20Sopenharmony_ci return -EIO; 43108c2ecf20Sopenharmony_ci } 43118c2ecf20Sopenharmony_ci 43128c2ecf20Sopenharmony_ci if (get_ldev(device)) { 43138c2ecf20Sopenharmony_ci int skip_initial_sync = 43148c2ecf20Sopenharmony_ci device->state.conn == C_CONNECTED && 43158c2ecf20Sopenharmony_ci peer_device->connection->agreed_pro_version >= 90 && 43168c2ecf20Sopenharmony_ci device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && 43178c2ecf20Sopenharmony_ci (p_uuid[UI_FLAGS] & 8); 43188c2ecf20Sopenharmony_ci if (skip_initial_sync) { 43198c2ecf20Sopenharmony_ci drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); 43208c2ecf20Sopenharmony_ci drbd_bitmap_io(device, &drbd_bmio_clear_n_write, 43218c2ecf20Sopenharmony_ci "clear_n_write from receive_uuids", 43228c2ecf20Sopenharmony_ci BM_LOCKED_TEST_ALLOWED); 43238c2ecf20Sopenharmony_ci _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); 43248c2ecf20Sopenharmony_ci _drbd_uuid_set(device, UI_BITMAP, 0); 43258c2ecf20Sopenharmony_ci _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 43268c2ecf20Sopenharmony_ci CS_VERBOSE, NULL); 43278c2ecf20Sopenharmony_ci drbd_md_sync(device); 43288c2ecf20Sopenharmony_ci updated_uuids = 1; 43298c2ecf20Sopenharmony_ci } 43308c2ecf20Sopenharmony_ci put_ldev(device); 43318c2ecf20Sopenharmony_ci } else if (device->state.disk < D_INCONSISTENT && 43328c2ecf20Sopenharmony_ci device->state.role == R_PRIMARY) { 43338c2ecf20Sopenharmony_ci /* I am a diskless primary, the peer just created a new current UUID 43348c2ecf20Sopenharmony_ci for me. */ 43358c2ecf20Sopenharmony_ci updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 43368c2ecf20Sopenharmony_ci } 43378c2ecf20Sopenharmony_ci 43388c2ecf20Sopenharmony_ci /* Before we test for the disk state, we should wait until an eventually 43398c2ecf20Sopenharmony_ci ongoing cluster wide state change is finished. That is important if 43408c2ecf20Sopenharmony_ci we are primary and are detaching from our disk. We need to see the 43418c2ecf20Sopenharmony_ci new disk state... */ 43428c2ecf20Sopenharmony_ci mutex_lock(device->state_mutex); 43438c2ecf20Sopenharmony_ci mutex_unlock(device->state_mutex); 43448c2ecf20Sopenharmony_ci if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT) 43458c2ecf20Sopenharmony_ci updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 43468c2ecf20Sopenharmony_ci 43478c2ecf20Sopenharmony_ci if (updated_uuids) 43488c2ecf20Sopenharmony_ci drbd_print_uuids(device, "receiver updated UUIDs to"); 43498c2ecf20Sopenharmony_ci 43508c2ecf20Sopenharmony_ci return 0; 43518c2ecf20Sopenharmony_ci} 43528c2ecf20Sopenharmony_ci 43538c2ecf20Sopenharmony_ci/** 43548c2ecf20Sopenharmony_ci * convert_state() - Converts the peer's view of the cluster state to our point of view 43558c2ecf20Sopenharmony_ci * @ps: The state as seen by the peer. 43568c2ecf20Sopenharmony_ci */ 43578c2ecf20Sopenharmony_cistatic union drbd_state convert_state(union drbd_state ps) 43588c2ecf20Sopenharmony_ci{ 43598c2ecf20Sopenharmony_ci union drbd_state ms; 43608c2ecf20Sopenharmony_ci 43618c2ecf20Sopenharmony_ci static enum drbd_conns c_tab[] = { 43628c2ecf20Sopenharmony_ci [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS, 43638c2ecf20Sopenharmony_ci [C_CONNECTED] = C_CONNECTED, 43648c2ecf20Sopenharmony_ci 43658c2ecf20Sopenharmony_ci [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, 43668c2ecf20Sopenharmony_ci [C_STARTING_SYNC_T] = C_STARTING_SYNC_S, 43678c2ecf20Sopenharmony_ci [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */ 43688c2ecf20Sopenharmony_ci [C_VERIFY_S] = C_VERIFY_T, 43698c2ecf20Sopenharmony_ci [C_MASK] = C_MASK, 43708c2ecf20Sopenharmony_ci }; 43718c2ecf20Sopenharmony_ci 43728c2ecf20Sopenharmony_ci ms.i = ps.i; 43738c2ecf20Sopenharmony_ci 43748c2ecf20Sopenharmony_ci ms.conn = c_tab[ps.conn]; 43758c2ecf20Sopenharmony_ci ms.peer = ps.role; 43768c2ecf20Sopenharmony_ci ms.role = ps.peer; 43778c2ecf20Sopenharmony_ci ms.pdsk = ps.disk; 43788c2ecf20Sopenharmony_ci ms.disk = ps.pdsk; 43798c2ecf20Sopenharmony_ci ms.peer_isp = (ps.aftr_isp | ps.user_isp); 43808c2ecf20Sopenharmony_ci 43818c2ecf20Sopenharmony_ci return ms; 43828c2ecf20Sopenharmony_ci} 43838c2ecf20Sopenharmony_ci 43848c2ecf20Sopenharmony_cistatic int receive_req_state(struct drbd_connection *connection, struct packet_info *pi) 43858c2ecf20Sopenharmony_ci{ 43868c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 43878c2ecf20Sopenharmony_ci struct drbd_device *device; 43888c2ecf20Sopenharmony_ci struct p_req_state *p = pi->data; 43898c2ecf20Sopenharmony_ci union drbd_state mask, val; 43908c2ecf20Sopenharmony_ci enum drbd_state_rv rv; 43918c2ecf20Sopenharmony_ci 43928c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 43938c2ecf20Sopenharmony_ci if (!peer_device) 43948c2ecf20Sopenharmony_ci return -EIO; 43958c2ecf20Sopenharmony_ci device = peer_device->device; 43968c2ecf20Sopenharmony_ci 43978c2ecf20Sopenharmony_ci mask.i = be32_to_cpu(p->mask); 43988c2ecf20Sopenharmony_ci val.i = be32_to_cpu(p->val); 43998c2ecf20Sopenharmony_ci 44008c2ecf20Sopenharmony_ci if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) && 44018c2ecf20Sopenharmony_ci mutex_is_locked(device->state_mutex)) { 44028c2ecf20Sopenharmony_ci drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG); 44038c2ecf20Sopenharmony_ci return 0; 44048c2ecf20Sopenharmony_ci } 44058c2ecf20Sopenharmony_ci 44068c2ecf20Sopenharmony_ci mask = convert_state(mask); 44078c2ecf20Sopenharmony_ci val = convert_state(val); 44088c2ecf20Sopenharmony_ci 44098c2ecf20Sopenharmony_ci rv = drbd_change_state(device, CS_VERBOSE, mask, val); 44108c2ecf20Sopenharmony_ci drbd_send_sr_reply(peer_device, rv); 44118c2ecf20Sopenharmony_ci 44128c2ecf20Sopenharmony_ci drbd_md_sync(device); 44138c2ecf20Sopenharmony_ci 44148c2ecf20Sopenharmony_ci return 0; 44158c2ecf20Sopenharmony_ci} 44168c2ecf20Sopenharmony_ci 44178c2ecf20Sopenharmony_cistatic int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi) 44188c2ecf20Sopenharmony_ci{ 44198c2ecf20Sopenharmony_ci struct p_req_state *p = pi->data; 44208c2ecf20Sopenharmony_ci union drbd_state mask, val; 44218c2ecf20Sopenharmony_ci enum drbd_state_rv rv; 44228c2ecf20Sopenharmony_ci 44238c2ecf20Sopenharmony_ci mask.i = be32_to_cpu(p->mask); 44248c2ecf20Sopenharmony_ci val.i = be32_to_cpu(p->val); 44258c2ecf20Sopenharmony_ci 44268c2ecf20Sopenharmony_ci if (test_bit(RESOLVE_CONFLICTS, &connection->flags) && 44278c2ecf20Sopenharmony_ci mutex_is_locked(&connection->cstate_mutex)) { 44288c2ecf20Sopenharmony_ci conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG); 44298c2ecf20Sopenharmony_ci return 0; 44308c2ecf20Sopenharmony_ci } 44318c2ecf20Sopenharmony_ci 44328c2ecf20Sopenharmony_ci mask = convert_state(mask); 44338c2ecf20Sopenharmony_ci val = convert_state(val); 44348c2ecf20Sopenharmony_ci 44358c2ecf20Sopenharmony_ci rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL); 44368c2ecf20Sopenharmony_ci conn_send_sr_reply(connection, rv); 44378c2ecf20Sopenharmony_ci 44388c2ecf20Sopenharmony_ci return 0; 44398c2ecf20Sopenharmony_ci} 44408c2ecf20Sopenharmony_ci 44418c2ecf20Sopenharmony_cistatic int receive_state(struct drbd_connection *connection, struct packet_info *pi) 44428c2ecf20Sopenharmony_ci{ 44438c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 44448c2ecf20Sopenharmony_ci struct drbd_device *device; 44458c2ecf20Sopenharmony_ci struct p_state *p = pi->data; 44468c2ecf20Sopenharmony_ci union drbd_state os, ns, peer_state; 44478c2ecf20Sopenharmony_ci enum drbd_disk_state real_peer_disk; 44488c2ecf20Sopenharmony_ci enum chg_state_flags cs_flags; 44498c2ecf20Sopenharmony_ci int rv; 44508c2ecf20Sopenharmony_ci 44518c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 44528c2ecf20Sopenharmony_ci if (!peer_device) 44538c2ecf20Sopenharmony_ci return config_unknown_volume(connection, pi); 44548c2ecf20Sopenharmony_ci device = peer_device->device; 44558c2ecf20Sopenharmony_ci 44568c2ecf20Sopenharmony_ci peer_state.i = be32_to_cpu(p->state); 44578c2ecf20Sopenharmony_ci 44588c2ecf20Sopenharmony_ci real_peer_disk = peer_state.disk; 44598c2ecf20Sopenharmony_ci if (peer_state.disk == D_NEGOTIATING) { 44608c2ecf20Sopenharmony_ci real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; 44618c2ecf20Sopenharmony_ci drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); 44628c2ecf20Sopenharmony_ci } 44638c2ecf20Sopenharmony_ci 44648c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 44658c2ecf20Sopenharmony_ci retry: 44668c2ecf20Sopenharmony_ci os = ns = drbd_read_state(device); 44678c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 44688c2ecf20Sopenharmony_ci 44698c2ecf20Sopenharmony_ci /* If some other part of the code (ack_receiver thread, timeout) 44708c2ecf20Sopenharmony_ci * already decided to close the connection again, 44718c2ecf20Sopenharmony_ci * we must not "re-establish" it here. */ 44728c2ecf20Sopenharmony_ci if (os.conn <= C_TEAR_DOWN) 44738c2ecf20Sopenharmony_ci return -ECONNRESET; 44748c2ecf20Sopenharmony_ci 44758c2ecf20Sopenharmony_ci /* If this is the "end of sync" confirmation, usually the peer disk 44768c2ecf20Sopenharmony_ci * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits 44778c2ecf20Sopenharmony_ci * set) resync started in PausedSyncT, or if the timing of pause-/ 44788c2ecf20Sopenharmony_ci * unpause-sync events has been "just right", the peer disk may 44798c2ecf20Sopenharmony_ci * transition from D_CONSISTENT to D_UP_TO_DATE as well. 44808c2ecf20Sopenharmony_ci */ 44818c2ecf20Sopenharmony_ci if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && 44828c2ecf20Sopenharmony_ci real_peer_disk == D_UP_TO_DATE && 44838c2ecf20Sopenharmony_ci os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { 44848c2ecf20Sopenharmony_ci /* If we are (becoming) SyncSource, but peer is still in sync 44858c2ecf20Sopenharmony_ci * preparation, ignore its uptodate-ness to avoid flapping, it 44868c2ecf20Sopenharmony_ci * will change to inconsistent once the peer reaches active 44878c2ecf20Sopenharmony_ci * syncing states. 44888c2ecf20Sopenharmony_ci * It may have changed syncer-paused flags, however, so we 44898c2ecf20Sopenharmony_ci * cannot ignore this completely. */ 44908c2ecf20Sopenharmony_ci if (peer_state.conn > C_CONNECTED && 44918c2ecf20Sopenharmony_ci peer_state.conn < C_SYNC_SOURCE) 44928c2ecf20Sopenharmony_ci real_peer_disk = D_INCONSISTENT; 44938c2ecf20Sopenharmony_ci 44948c2ecf20Sopenharmony_ci /* if peer_state changes to connected at the same time, 44958c2ecf20Sopenharmony_ci * it explicitly notifies us that it finished resync. 44968c2ecf20Sopenharmony_ci * Maybe we should finish it up, too? */ 44978c2ecf20Sopenharmony_ci else if (os.conn >= C_SYNC_SOURCE && 44988c2ecf20Sopenharmony_ci peer_state.conn == C_CONNECTED) { 44998c2ecf20Sopenharmony_ci if (drbd_bm_total_weight(device) <= device->rs_failed) 45008c2ecf20Sopenharmony_ci drbd_resync_finished(device); 45018c2ecf20Sopenharmony_ci return 0; 45028c2ecf20Sopenharmony_ci } 45038c2ecf20Sopenharmony_ci } 45048c2ecf20Sopenharmony_ci 45058c2ecf20Sopenharmony_ci /* explicit verify finished notification, stop sector reached. */ 45068c2ecf20Sopenharmony_ci if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && 45078c2ecf20Sopenharmony_ci peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { 45088c2ecf20Sopenharmony_ci ov_out_of_sync_print(device); 45098c2ecf20Sopenharmony_ci drbd_resync_finished(device); 45108c2ecf20Sopenharmony_ci return 0; 45118c2ecf20Sopenharmony_ci } 45128c2ecf20Sopenharmony_ci 45138c2ecf20Sopenharmony_ci /* peer says his disk is inconsistent, while we think it is uptodate, 45148c2ecf20Sopenharmony_ci * and this happens while the peer still thinks we have a sync going on, 45158c2ecf20Sopenharmony_ci * but we think we are already done with the sync. 45168c2ecf20Sopenharmony_ci * We ignore this to avoid flapping pdsk. 45178c2ecf20Sopenharmony_ci * This should not happen, if the peer is a recent version of drbd. */ 45188c2ecf20Sopenharmony_ci if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT && 45198c2ecf20Sopenharmony_ci os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE) 45208c2ecf20Sopenharmony_ci real_peer_disk = D_UP_TO_DATE; 45218c2ecf20Sopenharmony_ci 45228c2ecf20Sopenharmony_ci if (ns.conn == C_WF_REPORT_PARAMS) 45238c2ecf20Sopenharmony_ci ns.conn = C_CONNECTED; 45248c2ecf20Sopenharmony_ci 45258c2ecf20Sopenharmony_ci if (peer_state.conn == C_AHEAD) 45268c2ecf20Sopenharmony_ci ns.conn = C_BEHIND; 45278c2ecf20Sopenharmony_ci 45288c2ecf20Sopenharmony_ci /* TODO: 45298c2ecf20Sopenharmony_ci * if (primary and diskless and peer uuid != effective uuid) 45308c2ecf20Sopenharmony_ci * abort attach on peer; 45318c2ecf20Sopenharmony_ci * 45328c2ecf20Sopenharmony_ci * If this node does not have good data, was already connected, but 45338c2ecf20Sopenharmony_ci * the peer did a late attach only now, trying to "negotiate" with me, 45348c2ecf20Sopenharmony_ci * AND I am currently Primary, possibly frozen, with some specific 45358c2ecf20Sopenharmony_ci * "effective" uuid, this should never be reached, really, because 45368c2ecf20Sopenharmony_ci * we first send the uuids, then the current state. 45378c2ecf20Sopenharmony_ci * 45388c2ecf20Sopenharmony_ci * In this scenario, we already dropped the connection hard 45398c2ecf20Sopenharmony_ci * when we received the unsuitable uuids (receive_uuids(). 45408c2ecf20Sopenharmony_ci * 45418c2ecf20Sopenharmony_ci * Should we want to change this, that is: not drop the connection in 45428c2ecf20Sopenharmony_ci * receive_uuids() already, then we would need to add a branch here 45438c2ecf20Sopenharmony_ci * that aborts the attach of "unsuitable uuids" on the peer in case 45448c2ecf20Sopenharmony_ci * this node is currently Diskless Primary. 45458c2ecf20Sopenharmony_ci */ 45468c2ecf20Sopenharmony_ci 45478c2ecf20Sopenharmony_ci if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && 45488c2ecf20Sopenharmony_ci get_ldev_if_state(device, D_NEGOTIATING)) { 45498c2ecf20Sopenharmony_ci int cr; /* consider resync */ 45508c2ecf20Sopenharmony_ci 45518c2ecf20Sopenharmony_ci /* if we established a new connection */ 45528c2ecf20Sopenharmony_ci cr = (os.conn < C_CONNECTED); 45538c2ecf20Sopenharmony_ci /* if we had an established connection 45548c2ecf20Sopenharmony_ci * and one of the nodes newly attaches a disk */ 45558c2ecf20Sopenharmony_ci cr |= (os.conn == C_CONNECTED && 45568c2ecf20Sopenharmony_ci (peer_state.disk == D_NEGOTIATING || 45578c2ecf20Sopenharmony_ci os.disk == D_NEGOTIATING)); 45588c2ecf20Sopenharmony_ci /* if we have both been inconsistent, and the peer has been 45598c2ecf20Sopenharmony_ci * forced to be UpToDate with --force */ 45608c2ecf20Sopenharmony_ci cr |= test_bit(CONSIDER_RESYNC, &device->flags); 45618c2ecf20Sopenharmony_ci /* if we had been plain connected, and the admin requested to 45628c2ecf20Sopenharmony_ci * start a sync by "invalidate" or "invalidate-remote" */ 45638c2ecf20Sopenharmony_ci cr |= (os.conn == C_CONNECTED && 45648c2ecf20Sopenharmony_ci (peer_state.conn >= C_STARTING_SYNC_S && 45658c2ecf20Sopenharmony_ci peer_state.conn <= C_WF_BITMAP_T)); 45668c2ecf20Sopenharmony_ci 45678c2ecf20Sopenharmony_ci if (cr) 45688c2ecf20Sopenharmony_ci ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk); 45698c2ecf20Sopenharmony_ci 45708c2ecf20Sopenharmony_ci put_ldev(device); 45718c2ecf20Sopenharmony_ci if (ns.conn == C_MASK) { 45728c2ecf20Sopenharmony_ci ns.conn = C_CONNECTED; 45738c2ecf20Sopenharmony_ci if (device->state.disk == D_NEGOTIATING) { 45748c2ecf20Sopenharmony_ci drbd_force_state(device, NS(disk, D_FAILED)); 45758c2ecf20Sopenharmony_ci } else if (peer_state.disk == D_NEGOTIATING) { 45768c2ecf20Sopenharmony_ci drbd_err(device, "Disk attach process on the peer node was aborted.\n"); 45778c2ecf20Sopenharmony_ci peer_state.disk = D_DISKLESS; 45788c2ecf20Sopenharmony_ci real_peer_disk = D_DISKLESS; 45798c2ecf20Sopenharmony_ci } else { 45808c2ecf20Sopenharmony_ci if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags)) 45818c2ecf20Sopenharmony_ci return -EIO; 45828c2ecf20Sopenharmony_ci D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS); 45838c2ecf20Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 45848c2ecf20Sopenharmony_ci return -EIO; 45858c2ecf20Sopenharmony_ci } 45868c2ecf20Sopenharmony_ci } 45878c2ecf20Sopenharmony_ci } 45888c2ecf20Sopenharmony_ci 45898c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 45908c2ecf20Sopenharmony_ci if (os.i != drbd_read_state(device).i) 45918c2ecf20Sopenharmony_ci goto retry; 45928c2ecf20Sopenharmony_ci clear_bit(CONSIDER_RESYNC, &device->flags); 45938c2ecf20Sopenharmony_ci ns.peer = peer_state.role; 45948c2ecf20Sopenharmony_ci ns.pdsk = real_peer_disk; 45958c2ecf20Sopenharmony_ci ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); 45968c2ecf20Sopenharmony_ci if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) 45978c2ecf20Sopenharmony_ci ns.disk = device->new_state_tmp.disk; 45988c2ecf20Sopenharmony_ci cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); 45998c2ecf20Sopenharmony_ci if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && 46008c2ecf20Sopenharmony_ci test_bit(NEW_CUR_UUID, &device->flags)) { 46018c2ecf20Sopenharmony_ci /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this 46028c2ecf20Sopenharmony_ci for temporal network outages! */ 46038c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 46048c2ecf20Sopenharmony_ci drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); 46058c2ecf20Sopenharmony_ci tl_clear(peer_device->connection); 46068c2ecf20Sopenharmony_ci drbd_uuid_new_current(device); 46078c2ecf20Sopenharmony_ci clear_bit(NEW_CUR_UUID, &device->flags); 46088c2ecf20Sopenharmony_ci conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); 46098c2ecf20Sopenharmony_ci return -EIO; 46108c2ecf20Sopenharmony_ci } 46118c2ecf20Sopenharmony_ci rv = _drbd_set_state(device, ns, cs_flags, NULL); 46128c2ecf20Sopenharmony_ci ns = drbd_read_state(device); 46138c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 46148c2ecf20Sopenharmony_ci 46158c2ecf20Sopenharmony_ci if (rv < SS_SUCCESS) { 46168c2ecf20Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 46178c2ecf20Sopenharmony_ci return -EIO; 46188c2ecf20Sopenharmony_ci } 46198c2ecf20Sopenharmony_ci 46208c2ecf20Sopenharmony_ci if (os.conn > C_WF_REPORT_PARAMS) { 46218c2ecf20Sopenharmony_ci if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED && 46228c2ecf20Sopenharmony_ci peer_state.disk != D_NEGOTIATING ) { 46238c2ecf20Sopenharmony_ci /* we want resync, peer has not yet decided to sync... */ 46248c2ecf20Sopenharmony_ci /* Nowadays only used when forcing a node into primary role and 46258c2ecf20Sopenharmony_ci setting its disk to UpToDate with that */ 46268c2ecf20Sopenharmony_ci drbd_send_uuids(peer_device); 46278c2ecf20Sopenharmony_ci drbd_send_current_state(peer_device); 46288c2ecf20Sopenharmony_ci } 46298c2ecf20Sopenharmony_ci } 46308c2ecf20Sopenharmony_ci 46318c2ecf20Sopenharmony_ci clear_bit(DISCARD_MY_DATA, &device->flags); 46328c2ecf20Sopenharmony_ci 46338c2ecf20Sopenharmony_ci drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */ 46348c2ecf20Sopenharmony_ci 46358c2ecf20Sopenharmony_ci return 0; 46368c2ecf20Sopenharmony_ci} 46378c2ecf20Sopenharmony_ci 46388c2ecf20Sopenharmony_cistatic int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi) 46398c2ecf20Sopenharmony_ci{ 46408c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 46418c2ecf20Sopenharmony_ci struct drbd_device *device; 46428c2ecf20Sopenharmony_ci struct p_rs_uuid *p = pi->data; 46438c2ecf20Sopenharmony_ci 46448c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 46458c2ecf20Sopenharmony_ci if (!peer_device) 46468c2ecf20Sopenharmony_ci return -EIO; 46478c2ecf20Sopenharmony_ci device = peer_device->device; 46488c2ecf20Sopenharmony_ci 46498c2ecf20Sopenharmony_ci wait_event(device->misc_wait, 46508c2ecf20Sopenharmony_ci device->state.conn == C_WF_SYNC_UUID || 46518c2ecf20Sopenharmony_ci device->state.conn == C_BEHIND || 46528c2ecf20Sopenharmony_ci device->state.conn < C_CONNECTED || 46538c2ecf20Sopenharmony_ci device->state.disk < D_NEGOTIATING); 46548c2ecf20Sopenharmony_ci 46558c2ecf20Sopenharmony_ci /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */ 46568c2ecf20Sopenharmony_ci 46578c2ecf20Sopenharmony_ci /* Here the _drbd_uuid_ functions are right, current should 46588c2ecf20Sopenharmony_ci _not_ be rotated into the history */ 46598c2ecf20Sopenharmony_ci if (get_ldev_if_state(device, D_NEGOTIATING)) { 46608c2ecf20Sopenharmony_ci _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid)); 46618c2ecf20Sopenharmony_ci _drbd_uuid_set(device, UI_BITMAP, 0UL); 46628c2ecf20Sopenharmony_ci 46638c2ecf20Sopenharmony_ci drbd_print_uuids(device, "updated sync uuid"); 46648c2ecf20Sopenharmony_ci drbd_start_resync(device, C_SYNC_TARGET); 46658c2ecf20Sopenharmony_ci 46668c2ecf20Sopenharmony_ci put_ldev(device); 46678c2ecf20Sopenharmony_ci } else 46688c2ecf20Sopenharmony_ci drbd_err(device, "Ignoring SyncUUID packet!\n"); 46698c2ecf20Sopenharmony_ci 46708c2ecf20Sopenharmony_ci return 0; 46718c2ecf20Sopenharmony_ci} 46728c2ecf20Sopenharmony_ci 46738c2ecf20Sopenharmony_ci/** 46748c2ecf20Sopenharmony_ci * receive_bitmap_plain 46758c2ecf20Sopenharmony_ci * 46768c2ecf20Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error 46778c2ecf20Sopenharmony_ci * code upon failure. 46788c2ecf20Sopenharmony_ci */ 46798c2ecf20Sopenharmony_cistatic int 46808c2ecf20Sopenharmony_cireceive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size, 46818c2ecf20Sopenharmony_ci unsigned long *p, struct bm_xfer_ctx *c) 46828c2ecf20Sopenharmony_ci{ 46838c2ecf20Sopenharmony_ci unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - 46848c2ecf20Sopenharmony_ci drbd_header_size(peer_device->connection); 46858c2ecf20Sopenharmony_ci unsigned int num_words = min_t(size_t, data_size / sizeof(*p), 46868c2ecf20Sopenharmony_ci c->bm_words - c->word_offset); 46878c2ecf20Sopenharmony_ci unsigned int want = num_words * sizeof(*p); 46888c2ecf20Sopenharmony_ci int err; 46898c2ecf20Sopenharmony_ci 46908c2ecf20Sopenharmony_ci if (want != size) { 46918c2ecf20Sopenharmony_ci drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size); 46928c2ecf20Sopenharmony_ci return -EIO; 46938c2ecf20Sopenharmony_ci } 46948c2ecf20Sopenharmony_ci if (want == 0) 46958c2ecf20Sopenharmony_ci return 0; 46968c2ecf20Sopenharmony_ci err = drbd_recv_all(peer_device->connection, p, want); 46978c2ecf20Sopenharmony_ci if (err) 46988c2ecf20Sopenharmony_ci return err; 46998c2ecf20Sopenharmony_ci 47008c2ecf20Sopenharmony_ci drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p); 47018c2ecf20Sopenharmony_ci 47028c2ecf20Sopenharmony_ci c->word_offset += num_words; 47038c2ecf20Sopenharmony_ci c->bit_offset = c->word_offset * BITS_PER_LONG; 47048c2ecf20Sopenharmony_ci if (c->bit_offset > c->bm_bits) 47058c2ecf20Sopenharmony_ci c->bit_offset = c->bm_bits; 47068c2ecf20Sopenharmony_ci 47078c2ecf20Sopenharmony_ci return 1; 47088c2ecf20Sopenharmony_ci} 47098c2ecf20Sopenharmony_ci 47108c2ecf20Sopenharmony_cistatic enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p) 47118c2ecf20Sopenharmony_ci{ 47128c2ecf20Sopenharmony_ci return (enum drbd_bitmap_code)(p->encoding & 0x0f); 47138c2ecf20Sopenharmony_ci} 47148c2ecf20Sopenharmony_ci 47158c2ecf20Sopenharmony_cistatic int dcbp_get_start(struct p_compressed_bm *p) 47168c2ecf20Sopenharmony_ci{ 47178c2ecf20Sopenharmony_ci return (p->encoding & 0x80) != 0; 47188c2ecf20Sopenharmony_ci} 47198c2ecf20Sopenharmony_ci 47208c2ecf20Sopenharmony_cistatic int dcbp_get_pad_bits(struct p_compressed_bm *p) 47218c2ecf20Sopenharmony_ci{ 47228c2ecf20Sopenharmony_ci return (p->encoding >> 4) & 0x7; 47238c2ecf20Sopenharmony_ci} 47248c2ecf20Sopenharmony_ci 47258c2ecf20Sopenharmony_ci/** 47268c2ecf20Sopenharmony_ci * recv_bm_rle_bits 47278c2ecf20Sopenharmony_ci * 47288c2ecf20Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error 47298c2ecf20Sopenharmony_ci * code upon failure. 47308c2ecf20Sopenharmony_ci */ 47318c2ecf20Sopenharmony_cistatic int 47328c2ecf20Sopenharmony_cirecv_bm_rle_bits(struct drbd_peer_device *peer_device, 47338c2ecf20Sopenharmony_ci struct p_compressed_bm *p, 47348c2ecf20Sopenharmony_ci struct bm_xfer_ctx *c, 47358c2ecf20Sopenharmony_ci unsigned int len) 47368c2ecf20Sopenharmony_ci{ 47378c2ecf20Sopenharmony_ci struct bitstream bs; 47388c2ecf20Sopenharmony_ci u64 look_ahead; 47398c2ecf20Sopenharmony_ci u64 rl; 47408c2ecf20Sopenharmony_ci u64 tmp; 47418c2ecf20Sopenharmony_ci unsigned long s = c->bit_offset; 47428c2ecf20Sopenharmony_ci unsigned long e; 47438c2ecf20Sopenharmony_ci int toggle = dcbp_get_start(p); 47448c2ecf20Sopenharmony_ci int have; 47458c2ecf20Sopenharmony_ci int bits; 47468c2ecf20Sopenharmony_ci 47478c2ecf20Sopenharmony_ci bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p)); 47488c2ecf20Sopenharmony_ci 47498c2ecf20Sopenharmony_ci bits = bitstream_get_bits(&bs, &look_ahead, 64); 47508c2ecf20Sopenharmony_ci if (bits < 0) 47518c2ecf20Sopenharmony_ci return -EIO; 47528c2ecf20Sopenharmony_ci 47538c2ecf20Sopenharmony_ci for (have = bits; have > 0; s += rl, toggle = !toggle) { 47548c2ecf20Sopenharmony_ci bits = vli_decode_bits(&rl, look_ahead); 47558c2ecf20Sopenharmony_ci if (bits <= 0) 47568c2ecf20Sopenharmony_ci return -EIO; 47578c2ecf20Sopenharmony_ci 47588c2ecf20Sopenharmony_ci if (toggle) { 47598c2ecf20Sopenharmony_ci e = s + rl -1; 47608c2ecf20Sopenharmony_ci if (e >= c->bm_bits) { 47618c2ecf20Sopenharmony_ci drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); 47628c2ecf20Sopenharmony_ci return -EIO; 47638c2ecf20Sopenharmony_ci } 47648c2ecf20Sopenharmony_ci _drbd_bm_set_bits(peer_device->device, s, e); 47658c2ecf20Sopenharmony_ci } 47668c2ecf20Sopenharmony_ci 47678c2ecf20Sopenharmony_ci if (have < bits) { 47688c2ecf20Sopenharmony_ci drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", 47698c2ecf20Sopenharmony_ci have, bits, look_ahead, 47708c2ecf20Sopenharmony_ci (unsigned int)(bs.cur.b - p->code), 47718c2ecf20Sopenharmony_ci (unsigned int)bs.buf_len); 47728c2ecf20Sopenharmony_ci return -EIO; 47738c2ecf20Sopenharmony_ci } 47748c2ecf20Sopenharmony_ci /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */ 47758c2ecf20Sopenharmony_ci if (likely(bits < 64)) 47768c2ecf20Sopenharmony_ci look_ahead >>= bits; 47778c2ecf20Sopenharmony_ci else 47788c2ecf20Sopenharmony_ci look_ahead = 0; 47798c2ecf20Sopenharmony_ci have -= bits; 47808c2ecf20Sopenharmony_ci 47818c2ecf20Sopenharmony_ci bits = bitstream_get_bits(&bs, &tmp, 64 - have); 47828c2ecf20Sopenharmony_ci if (bits < 0) 47838c2ecf20Sopenharmony_ci return -EIO; 47848c2ecf20Sopenharmony_ci look_ahead |= tmp << have; 47858c2ecf20Sopenharmony_ci have += bits; 47868c2ecf20Sopenharmony_ci } 47878c2ecf20Sopenharmony_ci 47888c2ecf20Sopenharmony_ci c->bit_offset = s; 47898c2ecf20Sopenharmony_ci bm_xfer_ctx_bit_to_word_offset(c); 47908c2ecf20Sopenharmony_ci 47918c2ecf20Sopenharmony_ci return (s != c->bm_bits); 47928c2ecf20Sopenharmony_ci} 47938c2ecf20Sopenharmony_ci 47948c2ecf20Sopenharmony_ci/** 47958c2ecf20Sopenharmony_ci * decode_bitmap_c 47968c2ecf20Sopenharmony_ci * 47978c2ecf20Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error 47988c2ecf20Sopenharmony_ci * code upon failure. 47998c2ecf20Sopenharmony_ci */ 48008c2ecf20Sopenharmony_cistatic int 48018c2ecf20Sopenharmony_cidecode_bitmap_c(struct drbd_peer_device *peer_device, 48028c2ecf20Sopenharmony_ci struct p_compressed_bm *p, 48038c2ecf20Sopenharmony_ci struct bm_xfer_ctx *c, 48048c2ecf20Sopenharmony_ci unsigned int len) 48058c2ecf20Sopenharmony_ci{ 48068c2ecf20Sopenharmony_ci if (dcbp_get_code(p) == RLE_VLI_Bits) 48078c2ecf20Sopenharmony_ci return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p)); 48088c2ecf20Sopenharmony_ci 48098c2ecf20Sopenharmony_ci /* other variants had been implemented for evaluation, 48108c2ecf20Sopenharmony_ci * but have been dropped as this one turned out to be "best" 48118c2ecf20Sopenharmony_ci * during all our tests. */ 48128c2ecf20Sopenharmony_ci 48138c2ecf20Sopenharmony_ci drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding); 48148c2ecf20Sopenharmony_ci conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 48158c2ecf20Sopenharmony_ci return -EIO; 48168c2ecf20Sopenharmony_ci} 48178c2ecf20Sopenharmony_ci 48188c2ecf20Sopenharmony_civoid INFO_bm_xfer_stats(struct drbd_device *device, 48198c2ecf20Sopenharmony_ci const char *direction, struct bm_xfer_ctx *c) 48208c2ecf20Sopenharmony_ci{ 48218c2ecf20Sopenharmony_ci /* what would it take to transfer it "plaintext" */ 48228c2ecf20Sopenharmony_ci unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); 48238c2ecf20Sopenharmony_ci unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; 48248c2ecf20Sopenharmony_ci unsigned int plain = 48258c2ecf20Sopenharmony_ci header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + 48268c2ecf20Sopenharmony_ci c->bm_words * sizeof(unsigned long); 48278c2ecf20Sopenharmony_ci unsigned int total = c->bytes[0] + c->bytes[1]; 48288c2ecf20Sopenharmony_ci unsigned int r; 48298c2ecf20Sopenharmony_ci 48308c2ecf20Sopenharmony_ci /* total can not be zero. but just in case: */ 48318c2ecf20Sopenharmony_ci if (total == 0) 48328c2ecf20Sopenharmony_ci return; 48338c2ecf20Sopenharmony_ci 48348c2ecf20Sopenharmony_ci /* don't report if not compressed */ 48358c2ecf20Sopenharmony_ci if (total >= plain) 48368c2ecf20Sopenharmony_ci return; 48378c2ecf20Sopenharmony_ci 48388c2ecf20Sopenharmony_ci /* total < plain. check for overflow, still */ 48398c2ecf20Sopenharmony_ci r = (total > UINT_MAX/1000) ? (total / (plain/1000)) 48408c2ecf20Sopenharmony_ci : (1000 * total / plain); 48418c2ecf20Sopenharmony_ci 48428c2ecf20Sopenharmony_ci if (r > 1000) 48438c2ecf20Sopenharmony_ci r = 1000; 48448c2ecf20Sopenharmony_ci 48458c2ecf20Sopenharmony_ci r = 1000 - r; 48468c2ecf20Sopenharmony_ci drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " 48478c2ecf20Sopenharmony_ci "total %u; compression: %u.%u%%\n", 48488c2ecf20Sopenharmony_ci direction, 48498c2ecf20Sopenharmony_ci c->bytes[1], c->packets[1], 48508c2ecf20Sopenharmony_ci c->bytes[0], c->packets[0], 48518c2ecf20Sopenharmony_ci total, r/10, r % 10); 48528c2ecf20Sopenharmony_ci} 48538c2ecf20Sopenharmony_ci 48548c2ecf20Sopenharmony_ci/* Since we are processing the bitfield from lower addresses to higher, 48558c2ecf20Sopenharmony_ci it does not matter if the process it in 32 bit chunks or 64 bit 48568c2ecf20Sopenharmony_ci chunks as long as it is little endian. (Understand it as byte stream, 48578c2ecf20Sopenharmony_ci beginning with the lowest byte...) If we would use big endian 48588c2ecf20Sopenharmony_ci we would need to process it from the highest address to the lowest, 48598c2ecf20Sopenharmony_ci in order to be agnostic to the 32 vs 64 bits issue. 48608c2ecf20Sopenharmony_ci 48618c2ecf20Sopenharmony_ci returns 0 on failure, 1 if we successfully received it. */ 48628c2ecf20Sopenharmony_cistatic int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi) 48638c2ecf20Sopenharmony_ci{ 48648c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 48658c2ecf20Sopenharmony_ci struct drbd_device *device; 48668c2ecf20Sopenharmony_ci struct bm_xfer_ctx c; 48678c2ecf20Sopenharmony_ci int err; 48688c2ecf20Sopenharmony_ci 48698c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 48708c2ecf20Sopenharmony_ci if (!peer_device) 48718c2ecf20Sopenharmony_ci return -EIO; 48728c2ecf20Sopenharmony_ci device = peer_device->device; 48738c2ecf20Sopenharmony_ci 48748c2ecf20Sopenharmony_ci drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED); 48758c2ecf20Sopenharmony_ci /* you are supposed to send additional out-of-sync information 48768c2ecf20Sopenharmony_ci * if you actually set bits during this phase */ 48778c2ecf20Sopenharmony_ci 48788c2ecf20Sopenharmony_ci c = (struct bm_xfer_ctx) { 48798c2ecf20Sopenharmony_ci .bm_bits = drbd_bm_bits(device), 48808c2ecf20Sopenharmony_ci .bm_words = drbd_bm_words(device), 48818c2ecf20Sopenharmony_ci }; 48828c2ecf20Sopenharmony_ci 48838c2ecf20Sopenharmony_ci for(;;) { 48848c2ecf20Sopenharmony_ci if (pi->cmd == P_BITMAP) 48858c2ecf20Sopenharmony_ci err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c); 48868c2ecf20Sopenharmony_ci else if (pi->cmd == P_COMPRESSED_BITMAP) { 48878c2ecf20Sopenharmony_ci /* MAYBE: sanity check that we speak proto >= 90, 48888c2ecf20Sopenharmony_ci * and the feature is enabled! */ 48898c2ecf20Sopenharmony_ci struct p_compressed_bm *p = pi->data; 48908c2ecf20Sopenharmony_ci 48918c2ecf20Sopenharmony_ci if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) { 48928c2ecf20Sopenharmony_ci drbd_err(device, "ReportCBitmap packet too large\n"); 48938c2ecf20Sopenharmony_ci err = -EIO; 48948c2ecf20Sopenharmony_ci goto out; 48958c2ecf20Sopenharmony_ci } 48968c2ecf20Sopenharmony_ci if (pi->size <= sizeof(*p)) { 48978c2ecf20Sopenharmony_ci drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size); 48988c2ecf20Sopenharmony_ci err = -EIO; 48998c2ecf20Sopenharmony_ci goto out; 49008c2ecf20Sopenharmony_ci } 49018c2ecf20Sopenharmony_ci err = drbd_recv_all(peer_device->connection, p, pi->size); 49028c2ecf20Sopenharmony_ci if (err) 49038c2ecf20Sopenharmony_ci goto out; 49048c2ecf20Sopenharmony_ci err = decode_bitmap_c(peer_device, p, &c, pi->size); 49058c2ecf20Sopenharmony_ci } else { 49068c2ecf20Sopenharmony_ci drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); 49078c2ecf20Sopenharmony_ci err = -EIO; 49088c2ecf20Sopenharmony_ci goto out; 49098c2ecf20Sopenharmony_ci } 49108c2ecf20Sopenharmony_ci 49118c2ecf20Sopenharmony_ci c.packets[pi->cmd == P_BITMAP]++; 49128c2ecf20Sopenharmony_ci c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size; 49138c2ecf20Sopenharmony_ci 49148c2ecf20Sopenharmony_ci if (err <= 0) { 49158c2ecf20Sopenharmony_ci if (err < 0) 49168c2ecf20Sopenharmony_ci goto out; 49178c2ecf20Sopenharmony_ci break; 49188c2ecf20Sopenharmony_ci } 49198c2ecf20Sopenharmony_ci err = drbd_recv_header(peer_device->connection, pi); 49208c2ecf20Sopenharmony_ci if (err) 49218c2ecf20Sopenharmony_ci goto out; 49228c2ecf20Sopenharmony_ci } 49238c2ecf20Sopenharmony_ci 49248c2ecf20Sopenharmony_ci INFO_bm_xfer_stats(device, "receive", &c); 49258c2ecf20Sopenharmony_ci 49268c2ecf20Sopenharmony_ci if (device->state.conn == C_WF_BITMAP_T) { 49278c2ecf20Sopenharmony_ci enum drbd_state_rv rv; 49288c2ecf20Sopenharmony_ci 49298c2ecf20Sopenharmony_ci err = drbd_send_bitmap(device); 49308c2ecf20Sopenharmony_ci if (err) 49318c2ecf20Sopenharmony_ci goto out; 49328c2ecf20Sopenharmony_ci /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ 49338c2ecf20Sopenharmony_ci rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); 49348c2ecf20Sopenharmony_ci D_ASSERT(device, rv == SS_SUCCESS); 49358c2ecf20Sopenharmony_ci } else if (device->state.conn != C_WF_BITMAP_S) { 49368c2ecf20Sopenharmony_ci /* admin may have requested C_DISCONNECTING, 49378c2ecf20Sopenharmony_ci * other threads may have noticed network errors */ 49388c2ecf20Sopenharmony_ci drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n", 49398c2ecf20Sopenharmony_ci drbd_conn_str(device->state.conn)); 49408c2ecf20Sopenharmony_ci } 49418c2ecf20Sopenharmony_ci err = 0; 49428c2ecf20Sopenharmony_ci 49438c2ecf20Sopenharmony_ci out: 49448c2ecf20Sopenharmony_ci drbd_bm_unlock(device); 49458c2ecf20Sopenharmony_ci if (!err && device->state.conn == C_WF_BITMAP_S) 49468c2ecf20Sopenharmony_ci drbd_start_resync(device, C_SYNC_SOURCE); 49478c2ecf20Sopenharmony_ci return err; 49488c2ecf20Sopenharmony_ci} 49498c2ecf20Sopenharmony_ci 49508c2ecf20Sopenharmony_cistatic int receive_skip(struct drbd_connection *connection, struct packet_info *pi) 49518c2ecf20Sopenharmony_ci{ 49528c2ecf20Sopenharmony_ci drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n", 49538c2ecf20Sopenharmony_ci pi->cmd, pi->size); 49548c2ecf20Sopenharmony_ci 49558c2ecf20Sopenharmony_ci return ignore_remaining_packet(connection, pi); 49568c2ecf20Sopenharmony_ci} 49578c2ecf20Sopenharmony_ci 49588c2ecf20Sopenharmony_cistatic int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi) 49598c2ecf20Sopenharmony_ci{ 49608c2ecf20Sopenharmony_ci /* Make sure we've acked all the TCP data associated 49618c2ecf20Sopenharmony_ci * with the data requests being unplugged */ 49628c2ecf20Sopenharmony_ci tcp_sock_set_quickack(connection->data.socket->sk, 2); 49638c2ecf20Sopenharmony_ci return 0; 49648c2ecf20Sopenharmony_ci} 49658c2ecf20Sopenharmony_ci 49668c2ecf20Sopenharmony_cistatic int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi) 49678c2ecf20Sopenharmony_ci{ 49688c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 49698c2ecf20Sopenharmony_ci struct drbd_device *device; 49708c2ecf20Sopenharmony_ci struct p_block_desc *p = pi->data; 49718c2ecf20Sopenharmony_ci 49728c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 49738c2ecf20Sopenharmony_ci if (!peer_device) 49748c2ecf20Sopenharmony_ci return -EIO; 49758c2ecf20Sopenharmony_ci device = peer_device->device; 49768c2ecf20Sopenharmony_ci 49778c2ecf20Sopenharmony_ci switch (device->state.conn) { 49788c2ecf20Sopenharmony_ci case C_WF_SYNC_UUID: 49798c2ecf20Sopenharmony_ci case C_WF_BITMAP_T: 49808c2ecf20Sopenharmony_ci case C_BEHIND: 49818c2ecf20Sopenharmony_ci break; 49828c2ecf20Sopenharmony_ci default: 49838c2ecf20Sopenharmony_ci drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", 49848c2ecf20Sopenharmony_ci drbd_conn_str(device->state.conn)); 49858c2ecf20Sopenharmony_ci } 49868c2ecf20Sopenharmony_ci 49878c2ecf20Sopenharmony_ci drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); 49888c2ecf20Sopenharmony_ci 49898c2ecf20Sopenharmony_ci return 0; 49908c2ecf20Sopenharmony_ci} 49918c2ecf20Sopenharmony_ci 49928c2ecf20Sopenharmony_cistatic int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi) 49938c2ecf20Sopenharmony_ci{ 49948c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 49958c2ecf20Sopenharmony_ci struct p_block_desc *p = pi->data; 49968c2ecf20Sopenharmony_ci struct drbd_device *device; 49978c2ecf20Sopenharmony_ci sector_t sector; 49988c2ecf20Sopenharmony_ci int size, err = 0; 49998c2ecf20Sopenharmony_ci 50008c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 50018c2ecf20Sopenharmony_ci if (!peer_device) 50028c2ecf20Sopenharmony_ci return -EIO; 50038c2ecf20Sopenharmony_ci device = peer_device->device; 50048c2ecf20Sopenharmony_ci 50058c2ecf20Sopenharmony_ci sector = be64_to_cpu(p->sector); 50068c2ecf20Sopenharmony_ci size = be32_to_cpu(p->blksize); 50078c2ecf20Sopenharmony_ci 50088c2ecf20Sopenharmony_ci dec_rs_pending(device); 50098c2ecf20Sopenharmony_ci 50108c2ecf20Sopenharmony_ci if (get_ldev(device)) { 50118c2ecf20Sopenharmony_ci struct drbd_peer_request *peer_req; 50128c2ecf20Sopenharmony_ci const int op = REQ_OP_WRITE_ZEROES; 50138c2ecf20Sopenharmony_ci 50148c2ecf20Sopenharmony_ci peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, 50158c2ecf20Sopenharmony_ci size, 0, GFP_NOIO); 50168c2ecf20Sopenharmony_ci if (!peer_req) { 50178c2ecf20Sopenharmony_ci put_ldev(device); 50188c2ecf20Sopenharmony_ci return -ENOMEM; 50198c2ecf20Sopenharmony_ci } 50208c2ecf20Sopenharmony_ci 50218c2ecf20Sopenharmony_ci peer_req->w.cb = e_end_resync_block; 50228c2ecf20Sopenharmony_ci peer_req->submit_jif = jiffies; 50238c2ecf20Sopenharmony_ci peer_req->flags |= EE_TRIM; 50248c2ecf20Sopenharmony_ci 50258c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 50268c2ecf20Sopenharmony_ci list_add_tail(&peer_req->w.list, &device->sync_ee); 50278c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 50288c2ecf20Sopenharmony_ci 50298c2ecf20Sopenharmony_ci atomic_add(pi->size >> 9, &device->rs_sect_ev); 50308c2ecf20Sopenharmony_ci err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR); 50318c2ecf20Sopenharmony_ci 50328c2ecf20Sopenharmony_ci if (err) { 50338c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 50348c2ecf20Sopenharmony_ci list_del(&peer_req->w.list); 50358c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 50368c2ecf20Sopenharmony_ci 50378c2ecf20Sopenharmony_ci drbd_free_peer_req(device, peer_req); 50388c2ecf20Sopenharmony_ci put_ldev(device); 50398c2ecf20Sopenharmony_ci err = 0; 50408c2ecf20Sopenharmony_ci goto fail; 50418c2ecf20Sopenharmony_ci } 50428c2ecf20Sopenharmony_ci 50438c2ecf20Sopenharmony_ci inc_unacked(device); 50448c2ecf20Sopenharmony_ci 50458c2ecf20Sopenharmony_ci /* No put_ldev() here. Gets called in drbd_endio_write_sec_final(), 50468c2ecf20Sopenharmony_ci as well as drbd_rs_complete_io() */ 50478c2ecf20Sopenharmony_ci } else { 50488c2ecf20Sopenharmony_ci fail: 50498c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, sector); 50508c2ecf20Sopenharmony_ci drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER); 50518c2ecf20Sopenharmony_ci } 50528c2ecf20Sopenharmony_ci 50538c2ecf20Sopenharmony_ci atomic_add(size >> 9, &device->rs_sect_in); 50548c2ecf20Sopenharmony_ci 50558c2ecf20Sopenharmony_ci return err; 50568c2ecf20Sopenharmony_ci} 50578c2ecf20Sopenharmony_ci 50588c2ecf20Sopenharmony_cistruct data_cmd { 50598c2ecf20Sopenharmony_ci int expect_payload; 50608c2ecf20Sopenharmony_ci unsigned int pkt_size; 50618c2ecf20Sopenharmony_ci int (*fn)(struct drbd_connection *, struct packet_info *); 50628c2ecf20Sopenharmony_ci}; 50638c2ecf20Sopenharmony_ci 50648c2ecf20Sopenharmony_cistatic struct data_cmd drbd_cmd_handler[] = { 50658c2ecf20Sopenharmony_ci [P_DATA] = { 1, sizeof(struct p_data), receive_Data }, 50668c2ecf20Sopenharmony_ci [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, 50678c2ecf20Sopenharmony_ci [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , 50688c2ecf20Sopenharmony_ci [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , 50698c2ecf20Sopenharmony_ci [P_BITMAP] = { 1, 0, receive_bitmap } , 50708c2ecf20Sopenharmony_ci [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } , 50718c2ecf20Sopenharmony_ci [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote }, 50728c2ecf20Sopenharmony_ci [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 50738c2ecf20Sopenharmony_ci [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 50748c2ecf20Sopenharmony_ci [P_SYNC_PARAM] = { 1, 0, receive_SyncParam }, 50758c2ecf20Sopenharmony_ci [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam }, 50768c2ecf20Sopenharmony_ci [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, 50778c2ecf20Sopenharmony_ci [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, 50788c2ecf20Sopenharmony_ci [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, 50798c2ecf20Sopenharmony_ci [P_STATE] = { 0, sizeof(struct p_state), receive_state }, 50808c2ecf20Sopenharmony_ci [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, 50818c2ecf20Sopenharmony_ci [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid }, 50828c2ecf20Sopenharmony_ci [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 50838c2ecf20Sopenharmony_ci [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 50848c2ecf20Sopenharmony_ci [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 50858c2ecf20Sopenharmony_ci [P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 50868c2ecf20Sopenharmony_ci [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, 50878c2ecf20Sopenharmony_ci [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, 50888c2ecf20Sopenharmony_ci [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, 50898c2ecf20Sopenharmony_ci [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, 50908c2ecf20Sopenharmony_ci [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, 50918c2ecf20Sopenharmony_ci [P_ZEROES] = { 0, sizeof(struct p_trim), receive_Data }, 50928c2ecf20Sopenharmony_ci [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated }, 50938c2ecf20Sopenharmony_ci [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data }, 50948c2ecf20Sopenharmony_ci}; 50958c2ecf20Sopenharmony_ci 50968c2ecf20Sopenharmony_cistatic void drbdd(struct drbd_connection *connection) 50978c2ecf20Sopenharmony_ci{ 50988c2ecf20Sopenharmony_ci struct packet_info pi; 50998c2ecf20Sopenharmony_ci size_t shs; /* sub header size */ 51008c2ecf20Sopenharmony_ci int err; 51018c2ecf20Sopenharmony_ci 51028c2ecf20Sopenharmony_ci while (get_t_state(&connection->receiver) == RUNNING) { 51038c2ecf20Sopenharmony_ci struct data_cmd const *cmd; 51048c2ecf20Sopenharmony_ci 51058c2ecf20Sopenharmony_ci drbd_thread_current_set_cpu(&connection->receiver); 51068c2ecf20Sopenharmony_ci update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug); 51078c2ecf20Sopenharmony_ci if (drbd_recv_header_maybe_unplug(connection, &pi)) 51088c2ecf20Sopenharmony_ci goto err_out; 51098c2ecf20Sopenharmony_ci 51108c2ecf20Sopenharmony_ci cmd = &drbd_cmd_handler[pi.cmd]; 51118c2ecf20Sopenharmony_ci if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) { 51128c2ecf20Sopenharmony_ci drbd_err(connection, "Unexpected data packet %s (0x%04x)", 51138c2ecf20Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 51148c2ecf20Sopenharmony_ci goto err_out; 51158c2ecf20Sopenharmony_ci } 51168c2ecf20Sopenharmony_ci 51178c2ecf20Sopenharmony_ci shs = cmd->pkt_size; 51188c2ecf20Sopenharmony_ci if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME) 51198c2ecf20Sopenharmony_ci shs += sizeof(struct o_qlim); 51208c2ecf20Sopenharmony_ci if (pi.size > shs && !cmd->expect_payload) { 51218c2ecf20Sopenharmony_ci drbd_err(connection, "No payload expected %s l:%d\n", 51228c2ecf20Sopenharmony_ci cmdname(pi.cmd), pi.size); 51238c2ecf20Sopenharmony_ci goto err_out; 51248c2ecf20Sopenharmony_ci } 51258c2ecf20Sopenharmony_ci if (pi.size < shs) { 51268c2ecf20Sopenharmony_ci drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n", 51278c2ecf20Sopenharmony_ci cmdname(pi.cmd), (int)shs, pi.size); 51288c2ecf20Sopenharmony_ci goto err_out; 51298c2ecf20Sopenharmony_ci } 51308c2ecf20Sopenharmony_ci 51318c2ecf20Sopenharmony_ci if (shs) { 51328c2ecf20Sopenharmony_ci update_receiver_timing_details(connection, drbd_recv_all_warn); 51338c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(connection, pi.data, shs); 51348c2ecf20Sopenharmony_ci if (err) 51358c2ecf20Sopenharmony_ci goto err_out; 51368c2ecf20Sopenharmony_ci pi.size -= shs; 51378c2ecf20Sopenharmony_ci } 51388c2ecf20Sopenharmony_ci 51398c2ecf20Sopenharmony_ci update_receiver_timing_details(connection, cmd->fn); 51408c2ecf20Sopenharmony_ci err = cmd->fn(connection, &pi); 51418c2ecf20Sopenharmony_ci if (err) { 51428c2ecf20Sopenharmony_ci drbd_err(connection, "error receiving %s, e: %d l: %d!\n", 51438c2ecf20Sopenharmony_ci cmdname(pi.cmd), err, pi.size); 51448c2ecf20Sopenharmony_ci goto err_out; 51458c2ecf20Sopenharmony_ci } 51468c2ecf20Sopenharmony_ci } 51478c2ecf20Sopenharmony_ci return; 51488c2ecf20Sopenharmony_ci 51498c2ecf20Sopenharmony_ci err_out: 51508c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 51518c2ecf20Sopenharmony_ci} 51528c2ecf20Sopenharmony_ci 51538c2ecf20Sopenharmony_cistatic void conn_disconnect(struct drbd_connection *connection) 51548c2ecf20Sopenharmony_ci{ 51558c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 51568c2ecf20Sopenharmony_ci enum drbd_conns oc; 51578c2ecf20Sopenharmony_ci int vnr; 51588c2ecf20Sopenharmony_ci 51598c2ecf20Sopenharmony_ci if (connection->cstate == C_STANDALONE) 51608c2ecf20Sopenharmony_ci return; 51618c2ecf20Sopenharmony_ci 51628c2ecf20Sopenharmony_ci /* We are about to start the cleanup after connection loss. 51638c2ecf20Sopenharmony_ci * Make sure drbd_make_request knows about that. 51648c2ecf20Sopenharmony_ci * Usually we should be in some network failure state already, 51658c2ecf20Sopenharmony_ci * but just in case we are not, we fix it up here. 51668c2ecf20Sopenharmony_ci */ 51678c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 51688c2ecf20Sopenharmony_ci 51698c2ecf20Sopenharmony_ci /* ack_receiver does not clean up anything. it must not interfere, either */ 51708c2ecf20Sopenharmony_ci drbd_thread_stop(&connection->ack_receiver); 51718c2ecf20Sopenharmony_ci if (connection->ack_sender) { 51728c2ecf20Sopenharmony_ci destroy_workqueue(connection->ack_sender); 51738c2ecf20Sopenharmony_ci connection->ack_sender = NULL; 51748c2ecf20Sopenharmony_ci } 51758c2ecf20Sopenharmony_ci drbd_free_sock(connection); 51768c2ecf20Sopenharmony_ci 51778c2ecf20Sopenharmony_ci rcu_read_lock(); 51788c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 51798c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 51808c2ecf20Sopenharmony_ci kref_get(&device->kref); 51818c2ecf20Sopenharmony_ci rcu_read_unlock(); 51828c2ecf20Sopenharmony_ci drbd_disconnected(peer_device); 51838c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 51848c2ecf20Sopenharmony_ci rcu_read_lock(); 51858c2ecf20Sopenharmony_ci } 51868c2ecf20Sopenharmony_ci rcu_read_unlock(); 51878c2ecf20Sopenharmony_ci 51888c2ecf20Sopenharmony_ci if (!list_empty(&connection->current_epoch->list)) 51898c2ecf20Sopenharmony_ci drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n"); 51908c2ecf20Sopenharmony_ci /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ 51918c2ecf20Sopenharmony_ci atomic_set(&connection->current_epoch->epoch_size, 0); 51928c2ecf20Sopenharmony_ci connection->send.seen_any_write_yet = false; 51938c2ecf20Sopenharmony_ci 51948c2ecf20Sopenharmony_ci drbd_info(connection, "Connection closed\n"); 51958c2ecf20Sopenharmony_ci 51968c2ecf20Sopenharmony_ci if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN) 51978c2ecf20Sopenharmony_ci conn_try_outdate_peer_async(connection); 51988c2ecf20Sopenharmony_ci 51998c2ecf20Sopenharmony_ci spin_lock_irq(&connection->resource->req_lock); 52008c2ecf20Sopenharmony_ci oc = connection->cstate; 52018c2ecf20Sopenharmony_ci if (oc >= C_UNCONNECTED) 52028c2ecf20Sopenharmony_ci _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); 52038c2ecf20Sopenharmony_ci 52048c2ecf20Sopenharmony_ci spin_unlock_irq(&connection->resource->req_lock); 52058c2ecf20Sopenharmony_ci 52068c2ecf20Sopenharmony_ci if (oc == C_DISCONNECTING) 52078c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); 52088c2ecf20Sopenharmony_ci} 52098c2ecf20Sopenharmony_ci 52108c2ecf20Sopenharmony_cistatic int drbd_disconnected(struct drbd_peer_device *peer_device) 52118c2ecf20Sopenharmony_ci{ 52128c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 52138c2ecf20Sopenharmony_ci unsigned int i; 52148c2ecf20Sopenharmony_ci 52158c2ecf20Sopenharmony_ci /* wait for current activity to cease. */ 52168c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 52178c2ecf20Sopenharmony_ci _drbd_wait_ee_list_empty(device, &device->active_ee); 52188c2ecf20Sopenharmony_ci _drbd_wait_ee_list_empty(device, &device->sync_ee); 52198c2ecf20Sopenharmony_ci _drbd_wait_ee_list_empty(device, &device->read_ee); 52208c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 52218c2ecf20Sopenharmony_ci 52228c2ecf20Sopenharmony_ci /* We do not have data structures that would allow us to 52238c2ecf20Sopenharmony_ci * get the rs_pending_cnt down to 0 again. 52248c2ecf20Sopenharmony_ci * * On C_SYNC_TARGET we do not have any data structures describing 52258c2ecf20Sopenharmony_ci * the pending RSDataRequest's we have sent. 52268c2ecf20Sopenharmony_ci * * On C_SYNC_SOURCE there is no data structure that tracks 52278c2ecf20Sopenharmony_ci * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget. 52288c2ecf20Sopenharmony_ci * And no, it is not the sum of the reference counts in the 52298c2ecf20Sopenharmony_ci * resync_LRU. The resync_LRU tracks the whole operation including 52308c2ecf20Sopenharmony_ci * the disk-IO, while the rs_pending_cnt only tracks the blocks 52318c2ecf20Sopenharmony_ci * on the fly. */ 52328c2ecf20Sopenharmony_ci drbd_rs_cancel_all(device); 52338c2ecf20Sopenharmony_ci device->rs_total = 0; 52348c2ecf20Sopenharmony_ci device->rs_failed = 0; 52358c2ecf20Sopenharmony_ci atomic_set(&device->rs_pending_cnt, 0); 52368c2ecf20Sopenharmony_ci wake_up(&device->misc_wait); 52378c2ecf20Sopenharmony_ci 52388c2ecf20Sopenharmony_ci del_timer_sync(&device->resync_timer); 52398c2ecf20Sopenharmony_ci resync_timer_fn(&device->resync_timer); 52408c2ecf20Sopenharmony_ci 52418c2ecf20Sopenharmony_ci /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, 52428c2ecf20Sopenharmony_ci * w_make_resync_request etc. which may still be on the worker queue 52438c2ecf20Sopenharmony_ci * to be "canceled" */ 52448c2ecf20Sopenharmony_ci drbd_flush_workqueue(&peer_device->connection->sender_work); 52458c2ecf20Sopenharmony_ci 52468c2ecf20Sopenharmony_ci drbd_finish_peer_reqs(device); 52478c2ecf20Sopenharmony_ci 52488c2ecf20Sopenharmony_ci /* This second workqueue flush is necessary, since drbd_finish_peer_reqs() 52498c2ecf20Sopenharmony_ci might have issued a work again. The one before drbd_finish_peer_reqs() is 52508c2ecf20Sopenharmony_ci necessary to reclain net_ee in drbd_finish_peer_reqs(). */ 52518c2ecf20Sopenharmony_ci drbd_flush_workqueue(&peer_device->connection->sender_work); 52528c2ecf20Sopenharmony_ci 52538c2ecf20Sopenharmony_ci /* need to do it again, drbd_finish_peer_reqs() may have populated it 52548c2ecf20Sopenharmony_ci * again via drbd_try_clear_on_disk_bm(). */ 52558c2ecf20Sopenharmony_ci drbd_rs_cancel_all(device); 52568c2ecf20Sopenharmony_ci 52578c2ecf20Sopenharmony_ci kfree(device->p_uuid); 52588c2ecf20Sopenharmony_ci device->p_uuid = NULL; 52598c2ecf20Sopenharmony_ci 52608c2ecf20Sopenharmony_ci if (!drbd_suspended(device)) 52618c2ecf20Sopenharmony_ci tl_clear(peer_device->connection); 52628c2ecf20Sopenharmony_ci 52638c2ecf20Sopenharmony_ci drbd_md_sync(device); 52648c2ecf20Sopenharmony_ci 52658c2ecf20Sopenharmony_ci if (get_ldev(device)) { 52668c2ecf20Sopenharmony_ci drbd_bitmap_io(device, &drbd_bm_write_copy_pages, 52678c2ecf20Sopenharmony_ci "write from disconnected", BM_LOCKED_CHANGE_ALLOWED); 52688c2ecf20Sopenharmony_ci put_ldev(device); 52698c2ecf20Sopenharmony_ci } 52708c2ecf20Sopenharmony_ci 52718c2ecf20Sopenharmony_ci /* tcp_close and release of sendpage pages can be deferred. I don't 52728c2ecf20Sopenharmony_ci * want to use SO_LINGER, because apparently it can be deferred for 52738c2ecf20Sopenharmony_ci * more than 20 seconds (longest time I checked). 52748c2ecf20Sopenharmony_ci * 52758c2ecf20Sopenharmony_ci * Actually we don't care for exactly when the network stack does its 52768c2ecf20Sopenharmony_ci * put_page(), but release our reference on these pages right here. 52778c2ecf20Sopenharmony_ci */ 52788c2ecf20Sopenharmony_ci i = drbd_free_peer_reqs(device, &device->net_ee); 52798c2ecf20Sopenharmony_ci if (i) 52808c2ecf20Sopenharmony_ci drbd_info(device, "net_ee not empty, killed %u entries\n", i); 52818c2ecf20Sopenharmony_ci i = atomic_read(&device->pp_in_use_by_net); 52828c2ecf20Sopenharmony_ci if (i) 52838c2ecf20Sopenharmony_ci drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); 52848c2ecf20Sopenharmony_ci i = atomic_read(&device->pp_in_use); 52858c2ecf20Sopenharmony_ci if (i) 52868c2ecf20Sopenharmony_ci drbd_info(device, "pp_in_use = %d, expected 0\n", i); 52878c2ecf20Sopenharmony_ci 52888c2ecf20Sopenharmony_ci D_ASSERT(device, list_empty(&device->read_ee)); 52898c2ecf20Sopenharmony_ci D_ASSERT(device, list_empty(&device->active_ee)); 52908c2ecf20Sopenharmony_ci D_ASSERT(device, list_empty(&device->sync_ee)); 52918c2ecf20Sopenharmony_ci D_ASSERT(device, list_empty(&device->done_ee)); 52928c2ecf20Sopenharmony_ci 52938c2ecf20Sopenharmony_ci return 0; 52948c2ecf20Sopenharmony_ci} 52958c2ecf20Sopenharmony_ci 52968c2ecf20Sopenharmony_ci/* 52978c2ecf20Sopenharmony_ci * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version 52988c2ecf20Sopenharmony_ci * we can agree on is stored in agreed_pro_version. 52998c2ecf20Sopenharmony_ci * 53008c2ecf20Sopenharmony_ci * feature flags and the reserved array should be enough room for future 53018c2ecf20Sopenharmony_ci * enhancements of the handshake protocol, and possible plugins... 53028c2ecf20Sopenharmony_ci * 53038c2ecf20Sopenharmony_ci * for now, they are expected to be zero, but ignored. 53048c2ecf20Sopenharmony_ci */ 53058c2ecf20Sopenharmony_cistatic int drbd_send_features(struct drbd_connection *connection) 53068c2ecf20Sopenharmony_ci{ 53078c2ecf20Sopenharmony_ci struct drbd_socket *sock; 53088c2ecf20Sopenharmony_ci struct p_connection_features *p; 53098c2ecf20Sopenharmony_ci 53108c2ecf20Sopenharmony_ci sock = &connection->data; 53118c2ecf20Sopenharmony_ci p = conn_prepare_command(connection, sock); 53128c2ecf20Sopenharmony_ci if (!p) 53138c2ecf20Sopenharmony_ci return -EIO; 53148c2ecf20Sopenharmony_ci memset(p, 0, sizeof(*p)); 53158c2ecf20Sopenharmony_ci p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); 53168c2ecf20Sopenharmony_ci p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); 53178c2ecf20Sopenharmony_ci p->feature_flags = cpu_to_be32(PRO_FEATURES); 53188c2ecf20Sopenharmony_ci return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); 53198c2ecf20Sopenharmony_ci} 53208c2ecf20Sopenharmony_ci 53218c2ecf20Sopenharmony_ci/* 53228c2ecf20Sopenharmony_ci * return values: 53238c2ecf20Sopenharmony_ci * 1 yes, we have a valid connection 53248c2ecf20Sopenharmony_ci * 0 oops, did not work out, please try again 53258c2ecf20Sopenharmony_ci * -1 peer talks different language, 53268c2ecf20Sopenharmony_ci * no point in trying again, please go standalone. 53278c2ecf20Sopenharmony_ci */ 53288c2ecf20Sopenharmony_cistatic int drbd_do_features(struct drbd_connection *connection) 53298c2ecf20Sopenharmony_ci{ 53308c2ecf20Sopenharmony_ci /* ASSERT current == connection->receiver ... */ 53318c2ecf20Sopenharmony_ci struct p_connection_features *p; 53328c2ecf20Sopenharmony_ci const int expect = sizeof(struct p_connection_features); 53338c2ecf20Sopenharmony_ci struct packet_info pi; 53348c2ecf20Sopenharmony_ci int err; 53358c2ecf20Sopenharmony_ci 53368c2ecf20Sopenharmony_ci err = drbd_send_features(connection); 53378c2ecf20Sopenharmony_ci if (err) 53388c2ecf20Sopenharmony_ci return 0; 53398c2ecf20Sopenharmony_ci 53408c2ecf20Sopenharmony_ci err = drbd_recv_header(connection, &pi); 53418c2ecf20Sopenharmony_ci if (err) 53428c2ecf20Sopenharmony_ci return 0; 53438c2ecf20Sopenharmony_ci 53448c2ecf20Sopenharmony_ci if (pi.cmd != P_CONNECTION_FEATURES) { 53458c2ecf20Sopenharmony_ci drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", 53468c2ecf20Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 53478c2ecf20Sopenharmony_ci return -1; 53488c2ecf20Sopenharmony_ci } 53498c2ecf20Sopenharmony_ci 53508c2ecf20Sopenharmony_ci if (pi.size != expect) { 53518c2ecf20Sopenharmony_ci drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n", 53528c2ecf20Sopenharmony_ci expect, pi.size); 53538c2ecf20Sopenharmony_ci return -1; 53548c2ecf20Sopenharmony_ci } 53558c2ecf20Sopenharmony_ci 53568c2ecf20Sopenharmony_ci p = pi.data; 53578c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(connection, p, expect); 53588c2ecf20Sopenharmony_ci if (err) 53598c2ecf20Sopenharmony_ci return 0; 53608c2ecf20Sopenharmony_ci 53618c2ecf20Sopenharmony_ci p->protocol_min = be32_to_cpu(p->protocol_min); 53628c2ecf20Sopenharmony_ci p->protocol_max = be32_to_cpu(p->protocol_max); 53638c2ecf20Sopenharmony_ci if (p->protocol_max == 0) 53648c2ecf20Sopenharmony_ci p->protocol_max = p->protocol_min; 53658c2ecf20Sopenharmony_ci 53668c2ecf20Sopenharmony_ci if (PRO_VERSION_MAX < p->protocol_min || 53678c2ecf20Sopenharmony_ci PRO_VERSION_MIN > p->protocol_max) 53688c2ecf20Sopenharmony_ci goto incompat; 53698c2ecf20Sopenharmony_ci 53708c2ecf20Sopenharmony_ci connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); 53718c2ecf20Sopenharmony_ci connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags); 53728c2ecf20Sopenharmony_ci 53738c2ecf20Sopenharmony_ci drbd_info(connection, "Handshake successful: " 53748c2ecf20Sopenharmony_ci "Agreed network protocol version %d\n", connection->agreed_pro_version); 53758c2ecf20Sopenharmony_ci 53768c2ecf20Sopenharmony_ci drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n", 53778c2ecf20Sopenharmony_ci connection->agreed_features, 53788c2ecf20Sopenharmony_ci connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "", 53798c2ecf20Sopenharmony_ci connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "", 53808c2ecf20Sopenharmony_ci connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "", 53818c2ecf20Sopenharmony_ci connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" : 53828c2ecf20Sopenharmony_ci connection->agreed_features ? "" : " none"); 53838c2ecf20Sopenharmony_ci 53848c2ecf20Sopenharmony_ci return 1; 53858c2ecf20Sopenharmony_ci 53868c2ecf20Sopenharmony_ci incompat: 53878c2ecf20Sopenharmony_ci drbd_err(connection, "incompatible DRBD dialects: " 53888c2ecf20Sopenharmony_ci "I support %d-%d, peer supports %d-%d\n", 53898c2ecf20Sopenharmony_ci PRO_VERSION_MIN, PRO_VERSION_MAX, 53908c2ecf20Sopenharmony_ci p->protocol_min, p->protocol_max); 53918c2ecf20Sopenharmony_ci return -1; 53928c2ecf20Sopenharmony_ci} 53938c2ecf20Sopenharmony_ci 53948c2ecf20Sopenharmony_ci#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) 53958c2ecf20Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection) 53968c2ecf20Sopenharmony_ci{ 53978c2ecf20Sopenharmony_ci drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); 53988c2ecf20Sopenharmony_ci drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); 53998c2ecf20Sopenharmony_ci return -1; 54008c2ecf20Sopenharmony_ci} 54018c2ecf20Sopenharmony_ci#else 54028c2ecf20Sopenharmony_ci#define CHALLENGE_LEN 64 54038c2ecf20Sopenharmony_ci 54048c2ecf20Sopenharmony_ci/* Return value: 54058c2ecf20Sopenharmony_ci 1 - auth succeeded, 54068c2ecf20Sopenharmony_ci 0 - failed, try again (network error), 54078c2ecf20Sopenharmony_ci -1 - auth failed, don't try again. 54088c2ecf20Sopenharmony_ci*/ 54098c2ecf20Sopenharmony_ci 54108c2ecf20Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection) 54118c2ecf20Sopenharmony_ci{ 54128c2ecf20Sopenharmony_ci struct drbd_socket *sock; 54138c2ecf20Sopenharmony_ci char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ 54148c2ecf20Sopenharmony_ci char *response = NULL; 54158c2ecf20Sopenharmony_ci char *right_response = NULL; 54168c2ecf20Sopenharmony_ci char *peers_ch = NULL; 54178c2ecf20Sopenharmony_ci unsigned int key_len; 54188c2ecf20Sopenharmony_ci char secret[SHARED_SECRET_MAX]; /* 64 byte */ 54198c2ecf20Sopenharmony_ci unsigned int resp_size; 54208c2ecf20Sopenharmony_ci struct shash_desc *desc; 54218c2ecf20Sopenharmony_ci struct packet_info pi; 54228c2ecf20Sopenharmony_ci struct net_conf *nc; 54238c2ecf20Sopenharmony_ci int err, rv; 54248c2ecf20Sopenharmony_ci 54258c2ecf20Sopenharmony_ci /* FIXME: Put the challenge/response into the preallocated socket buffer. */ 54268c2ecf20Sopenharmony_ci 54278c2ecf20Sopenharmony_ci rcu_read_lock(); 54288c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 54298c2ecf20Sopenharmony_ci key_len = strlen(nc->shared_secret); 54308c2ecf20Sopenharmony_ci memcpy(secret, nc->shared_secret, key_len); 54318c2ecf20Sopenharmony_ci rcu_read_unlock(); 54328c2ecf20Sopenharmony_ci 54338c2ecf20Sopenharmony_ci desc = kmalloc(sizeof(struct shash_desc) + 54348c2ecf20Sopenharmony_ci crypto_shash_descsize(connection->cram_hmac_tfm), 54358c2ecf20Sopenharmony_ci GFP_KERNEL); 54368c2ecf20Sopenharmony_ci if (!desc) { 54378c2ecf20Sopenharmony_ci rv = -1; 54388c2ecf20Sopenharmony_ci goto fail; 54398c2ecf20Sopenharmony_ci } 54408c2ecf20Sopenharmony_ci desc->tfm = connection->cram_hmac_tfm; 54418c2ecf20Sopenharmony_ci 54428c2ecf20Sopenharmony_ci rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); 54438c2ecf20Sopenharmony_ci if (rv) { 54448c2ecf20Sopenharmony_ci drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv); 54458c2ecf20Sopenharmony_ci rv = -1; 54468c2ecf20Sopenharmony_ci goto fail; 54478c2ecf20Sopenharmony_ci } 54488c2ecf20Sopenharmony_ci 54498c2ecf20Sopenharmony_ci get_random_bytes(my_challenge, CHALLENGE_LEN); 54508c2ecf20Sopenharmony_ci 54518c2ecf20Sopenharmony_ci sock = &connection->data; 54528c2ecf20Sopenharmony_ci if (!conn_prepare_command(connection, sock)) { 54538c2ecf20Sopenharmony_ci rv = 0; 54548c2ecf20Sopenharmony_ci goto fail; 54558c2ecf20Sopenharmony_ci } 54568c2ecf20Sopenharmony_ci rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0, 54578c2ecf20Sopenharmony_ci my_challenge, CHALLENGE_LEN); 54588c2ecf20Sopenharmony_ci if (!rv) 54598c2ecf20Sopenharmony_ci goto fail; 54608c2ecf20Sopenharmony_ci 54618c2ecf20Sopenharmony_ci err = drbd_recv_header(connection, &pi); 54628c2ecf20Sopenharmony_ci if (err) { 54638c2ecf20Sopenharmony_ci rv = 0; 54648c2ecf20Sopenharmony_ci goto fail; 54658c2ecf20Sopenharmony_ci } 54668c2ecf20Sopenharmony_ci 54678c2ecf20Sopenharmony_ci if (pi.cmd != P_AUTH_CHALLENGE) { 54688c2ecf20Sopenharmony_ci drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n", 54698c2ecf20Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 54708c2ecf20Sopenharmony_ci rv = -1; 54718c2ecf20Sopenharmony_ci goto fail; 54728c2ecf20Sopenharmony_ci } 54738c2ecf20Sopenharmony_ci 54748c2ecf20Sopenharmony_ci if (pi.size > CHALLENGE_LEN * 2) { 54758c2ecf20Sopenharmony_ci drbd_err(connection, "expected AuthChallenge payload too big.\n"); 54768c2ecf20Sopenharmony_ci rv = -1; 54778c2ecf20Sopenharmony_ci goto fail; 54788c2ecf20Sopenharmony_ci } 54798c2ecf20Sopenharmony_ci 54808c2ecf20Sopenharmony_ci if (pi.size < CHALLENGE_LEN) { 54818c2ecf20Sopenharmony_ci drbd_err(connection, "AuthChallenge payload too small.\n"); 54828c2ecf20Sopenharmony_ci rv = -1; 54838c2ecf20Sopenharmony_ci goto fail; 54848c2ecf20Sopenharmony_ci } 54858c2ecf20Sopenharmony_ci 54868c2ecf20Sopenharmony_ci peers_ch = kmalloc(pi.size, GFP_NOIO); 54878c2ecf20Sopenharmony_ci if (peers_ch == NULL) { 54888c2ecf20Sopenharmony_ci drbd_err(connection, "kmalloc of peers_ch failed\n"); 54898c2ecf20Sopenharmony_ci rv = -1; 54908c2ecf20Sopenharmony_ci goto fail; 54918c2ecf20Sopenharmony_ci } 54928c2ecf20Sopenharmony_ci 54938c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(connection, peers_ch, pi.size); 54948c2ecf20Sopenharmony_ci if (err) { 54958c2ecf20Sopenharmony_ci rv = 0; 54968c2ecf20Sopenharmony_ci goto fail; 54978c2ecf20Sopenharmony_ci } 54988c2ecf20Sopenharmony_ci 54998c2ecf20Sopenharmony_ci if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) { 55008c2ecf20Sopenharmony_ci drbd_err(connection, "Peer presented the same challenge!\n"); 55018c2ecf20Sopenharmony_ci rv = -1; 55028c2ecf20Sopenharmony_ci goto fail; 55038c2ecf20Sopenharmony_ci } 55048c2ecf20Sopenharmony_ci 55058c2ecf20Sopenharmony_ci resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm); 55068c2ecf20Sopenharmony_ci response = kmalloc(resp_size, GFP_NOIO); 55078c2ecf20Sopenharmony_ci if (response == NULL) { 55088c2ecf20Sopenharmony_ci drbd_err(connection, "kmalloc of response failed\n"); 55098c2ecf20Sopenharmony_ci rv = -1; 55108c2ecf20Sopenharmony_ci goto fail; 55118c2ecf20Sopenharmony_ci } 55128c2ecf20Sopenharmony_ci 55138c2ecf20Sopenharmony_ci rv = crypto_shash_digest(desc, peers_ch, pi.size, response); 55148c2ecf20Sopenharmony_ci if (rv) { 55158c2ecf20Sopenharmony_ci drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 55168c2ecf20Sopenharmony_ci rv = -1; 55178c2ecf20Sopenharmony_ci goto fail; 55188c2ecf20Sopenharmony_ci } 55198c2ecf20Sopenharmony_ci 55208c2ecf20Sopenharmony_ci if (!conn_prepare_command(connection, sock)) { 55218c2ecf20Sopenharmony_ci rv = 0; 55228c2ecf20Sopenharmony_ci goto fail; 55238c2ecf20Sopenharmony_ci } 55248c2ecf20Sopenharmony_ci rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0, 55258c2ecf20Sopenharmony_ci response, resp_size); 55268c2ecf20Sopenharmony_ci if (!rv) 55278c2ecf20Sopenharmony_ci goto fail; 55288c2ecf20Sopenharmony_ci 55298c2ecf20Sopenharmony_ci err = drbd_recv_header(connection, &pi); 55308c2ecf20Sopenharmony_ci if (err) { 55318c2ecf20Sopenharmony_ci rv = 0; 55328c2ecf20Sopenharmony_ci goto fail; 55338c2ecf20Sopenharmony_ci } 55348c2ecf20Sopenharmony_ci 55358c2ecf20Sopenharmony_ci if (pi.cmd != P_AUTH_RESPONSE) { 55368c2ecf20Sopenharmony_ci drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n", 55378c2ecf20Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 55388c2ecf20Sopenharmony_ci rv = 0; 55398c2ecf20Sopenharmony_ci goto fail; 55408c2ecf20Sopenharmony_ci } 55418c2ecf20Sopenharmony_ci 55428c2ecf20Sopenharmony_ci if (pi.size != resp_size) { 55438c2ecf20Sopenharmony_ci drbd_err(connection, "expected AuthResponse payload of wrong size\n"); 55448c2ecf20Sopenharmony_ci rv = 0; 55458c2ecf20Sopenharmony_ci goto fail; 55468c2ecf20Sopenharmony_ci } 55478c2ecf20Sopenharmony_ci 55488c2ecf20Sopenharmony_ci err = drbd_recv_all_warn(connection, response , resp_size); 55498c2ecf20Sopenharmony_ci if (err) { 55508c2ecf20Sopenharmony_ci rv = 0; 55518c2ecf20Sopenharmony_ci goto fail; 55528c2ecf20Sopenharmony_ci } 55538c2ecf20Sopenharmony_ci 55548c2ecf20Sopenharmony_ci right_response = kmalloc(resp_size, GFP_NOIO); 55558c2ecf20Sopenharmony_ci if (right_response == NULL) { 55568c2ecf20Sopenharmony_ci drbd_err(connection, "kmalloc of right_response failed\n"); 55578c2ecf20Sopenharmony_ci rv = -1; 55588c2ecf20Sopenharmony_ci goto fail; 55598c2ecf20Sopenharmony_ci } 55608c2ecf20Sopenharmony_ci 55618c2ecf20Sopenharmony_ci rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN, 55628c2ecf20Sopenharmony_ci right_response); 55638c2ecf20Sopenharmony_ci if (rv) { 55648c2ecf20Sopenharmony_ci drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 55658c2ecf20Sopenharmony_ci rv = -1; 55668c2ecf20Sopenharmony_ci goto fail; 55678c2ecf20Sopenharmony_ci } 55688c2ecf20Sopenharmony_ci 55698c2ecf20Sopenharmony_ci rv = !memcmp(response, right_response, resp_size); 55708c2ecf20Sopenharmony_ci 55718c2ecf20Sopenharmony_ci if (rv) 55728c2ecf20Sopenharmony_ci drbd_info(connection, "Peer authenticated using %d bytes HMAC\n", 55738c2ecf20Sopenharmony_ci resp_size); 55748c2ecf20Sopenharmony_ci else 55758c2ecf20Sopenharmony_ci rv = -1; 55768c2ecf20Sopenharmony_ci 55778c2ecf20Sopenharmony_ci fail: 55788c2ecf20Sopenharmony_ci kfree(peers_ch); 55798c2ecf20Sopenharmony_ci kfree(response); 55808c2ecf20Sopenharmony_ci kfree(right_response); 55818c2ecf20Sopenharmony_ci if (desc) { 55828c2ecf20Sopenharmony_ci shash_desc_zero(desc); 55838c2ecf20Sopenharmony_ci kfree(desc); 55848c2ecf20Sopenharmony_ci } 55858c2ecf20Sopenharmony_ci 55868c2ecf20Sopenharmony_ci return rv; 55878c2ecf20Sopenharmony_ci} 55888c2ecf20Sopenharmony_ci#endif 55898c2ecf20Sopenharmony_ci 55908c2ecf20Sopenharmony_ciint drbd_receiver(struct drbd_thread *thi) 55918c2ecf20Sopenharmony_ci{ 55928c2ecf20Sopenharmony_ci struct drbd_connection *connection = thi->connection; 55938c2ecf20Sopenharmony_ci int h; 55948c2ecf20Sopenharmony_ci 55958c2ecf20Sopenharmony_ci drbd_info(connection, "receiver (re)started\n"); 55968c2ecf20Sopenharmony_ci 55978c2ecf20Sopenharmony_ci do { 55988c2ecf20Sopenharmony_ci h = conn_connect(connection); 55998c2ecf20Sopenharmony_ci if (h == 0) { 56008c2ecf20Sopenharmony_ci conn_disconnect(connection); 56018c2ecf20Sopenharmony_ci schedule_timeout_interruptible(HZ); 56028c2ecf20Sopenharmony_ci } 56038c2ecf20Sopenharmony_ci if (h == -1) { 56048c2ecf20Sopenharmony_ci drbd_warn(connection, "Discarding network configuration.\n"); 56058c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 56068c2ecf20Sopenharmony_ci } 56078c2ecf20Sopenharmony_ci } while (h == 0); 56088c2ecf20Sopenharmony_ci 56098c2ecf20Sopenharmony_ci if (h > 0) { 56108c2ecf20Sopenharmony_ci blk_start_plug(&connection->receiver_plug); 56118c2ecf20Sopenharmony_ci drbdd(connection); 56128c2ecf20Sopenharmony_ci blk_finish_plug(&connection->receiver_plug); 56138c2ecf20Sopenharmony_ci } 56148c2ecf20Sopenharmony_ci 56158c2ecf20Sopenharmony_ci conn_disconnect(connection); 56168c2ecf20Sopenharmony_ci 56178c2ecf20Sopenharmony_ci drbd_info(connection, "receiver terminated\n"); 56188c2ecf20Sopenharmony_ci return 0; 56198c2ecf20Sopenharmony_ci} 56208c2ecf20Sopenharmony_ci 56218c2ecf20Sopenharmony_ci/* ********* acknowledge sender ******** */ 56228c2ecf20Sopenharmony_ci 56238c2ecf20Sopenharmony_cistatic int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 56248c2ecf20Sopenharmony_ci{ 56258c2ecf20Sopenharmony_ci struct p_req_state_reply *p = pi->data; 56268c2ecf20Sopenharmony_ci int retcode = be32_to_cpu(p->retcode); 56278c2ecf20Sopenharmony_ci 56288c2ecf20Sopenharmony_ci if (retcode >= SS_SUCCESS) { 56298c2ecf20Sopenharmony_ci set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags); 56308c2ecf20Sopenharmony_ci } else { 56318c2ecf20Sopenharmony_ci set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags); 56328c2ecf20Sopenharmony_ci drbd_err(connection, "Requested state change failed by peer: %s (%d)\n", 56338c2ecf20Sopenharmony_ci drbd_set_st_err_str(retcode), retcode); 56348c2ecf20Sopenharmony_ci } 56358c2ecf20Sopenharmony_ci wake_up(&connection->ping_wait); 56368c2ecf20Sopenharmony_ci 56378c2ecf20Sopenharmony_ci return 0; 56388c2ecf20Sopenharmony_ci} 56398c2ecf20Sopenharmony_ci 56408c2ecf20Sopenharmony_cistatic int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 56418c2ecf20Sopenharmony_ci{ 56428c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 56438c2ecf20Sopenharmony_ci struct drbd_device *device; 56448c2ecf20Sopenharmony_ci struct p_req_state_reply *p = pi->data; 56458c2ecf20Sopenharmony_ci int retcode = be32_to_cpu(p->retcode); 56468c2ecf20Sopenharmony_ci 56478c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 56488c2ecf20Sopenharmony_ci if (!peer_device) 56498c2ecf20Sopenharmony_ci return -EIO; 56508c2ecf20Sopenharmony_ci device = peer_device->device; 56518c2ecf20Sopenharmony_ci 56528c2ecf20Sopenharmony_ci if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) { 56538c2ecf20Sopenharmony_ci D_ASSERT(device, connection->agreed_pro_version < 100); 56548c2ecf20Sopenharmony_ci return got_conn_RqSReply(connection, pi); 56558c2ecf20Sopenharmony_ci } 56568c2ecf20Sopenharmony_ci 56578c2ecf20Sopenharmony_ci if (retcode >= SS_SUCCESS) { 56588c2ecf20Sopenharmony_ci set_bit(CL_ST_CHG_SUCCESS, &device->flags); 56598c2ecf20Sopenharmony_ci } else { 56608c2ecf20Sopenharmony_ci set_bit(CL_ST_CHG_FAIL, &device->flags); 56618c2ecf20Sopenharmony_ci drbd_err(device, "Requested state change failed by peer: %s (%d)\n", 56628c2ecf20Sopenharmony_ci drbd_set_st_err_str(retcode), retcode); 56638c2ecf20Sopenharmony_ci } 56648c2ecf20Sopenharmony_ci wake_up(&device->state_wait); 56658c2ecf20Sopenharmony_ci 56668c2ecf20Sopenharmony_ci return 0; 56678c2ecf20Sopenharmony_ci} 56688c2ecf20Sopenharmony_ci 56698c2ecf20Sopenharmony_cistatic int got_Ping(struct drbd_connection *connection, struct packet_info *pi) 56708c2ecf20Sopenharmony_ci{ 56718c2ecf20Sopenharmony_ci return drbd_send_ping_ack(connection); 56728c2ecf20Sopenharmony_ci 56738c2ecf20Sopenharmony_ci} 56748c2ecf20Sopenharmony_ci 56758c2ecf20Sopenharmony_cistatic int got_PingAck(struct drbd_connection *connection, struct packet_info *pi) 56768c2ecf20Sopenharmony_ci{ 56778c2ecf20Sopenharmony_ci /* restore idle timeout */ 56788c2ecf20Sopenharmony_ci connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ; 56798c2ecf20Sopenharmony_ci if (!test_and_set_bit(GOT_PING_ACK, &connection->flags)) 56808c2ecf20Sopenharmony_ci wake_up(&connection->ping_wait); 56818c2ecf20Sopenharmony_ci 56828c2ecf20Sopenharmony_ci return 0; 56838c2ecf20Sopenharmony_ci} 56848c2ecf20Sopenharmony_ci 56858c2ecf20Sopenharmony_cistatic int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi) 56868c2ecf20Sopenharmony_ci{ 56878c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 56888c2ecf20Sopenharmony_ci struct drbd_device *device; 56898c2ecf20Sopenharmony_ci struct p_block_ack *p = pi->data; 56908c2ecf20Sopenharmony_ci sector_t sector = be64_to_cpu(p->sector); 56918c2ecf20Sopenharmony_ci int blksize = be32_to_cpu(p->blksize); 56928c2ecf20Sopenharmony_ci 56938c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 56948c2ecf20Sopenharmony_ci if (!peer_device) 56958c2ecf20Sopenharmony_ci return -EIO; 56968c2ecf20Sopenharmony_ci device = peer_device->device; 56978c2ecf20Sopenharmony_ci 56988c2ecf20Sopenharmony_ci D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 56998c2ecf20Sopenharmony_ci 57008c2ecf20Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 57018c2ecf20Sopenharmony_ci 57028c2ecf20Sopenharmony_ci if (get_ldev(device)) { 57038c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, sector); 57048c2ecf20Sopenharmony_ci drbd_set_in_sync(device, sector, blksize); 57058c2ecf20Sopenharmony_ci /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ 57068c2ecf20Sopenharmony_ci device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); 57078c2ecf20Sopenharmony_ci put_ldev(device); 57088c2ecf20Sopenharmony_ci } 57098c2ecf20Sopenharmony_ci dec_rs_pending(device); 57108c2ecf20Sopenharmony_ci atomic_add(blksize >> 9, &device->rs_sect_in); 57118c2ecf20Sopenharmony_ci 57128c2ecf20Sopenharmony_ci return 0; 57138c2ecf20Sopenharmony_ci} 57148c2ecf20Sopenharmony_ci 57158c2ecf20Sopenharmony_cistatic int 57168c2ecf20Sopenharmony_civalidate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector, 57178c2ecf20Sopenharmony_ci struct rb_root *root, const char *func, 57188c2ecf20Sopenharmony_ci enum drbd_req_event what, bool missing_ok) 57198c2ecf20Sopenharmony_ci{ 57208c2ecf20Sopenharmony_ci struct drbd_request *req; 57218c2ecf20Sopenharmony_ci struct bio_and_error m; 57228c2ecf20Sopenharmony_ci 57238c2ecf20Sopenharmony_ci spin_lock_irq(&device->resource->req_lock); 57248c2ecf20Sopenharmony_ci req = find_request(device, root, id, sector, missing_ok, func); 57258c2ecf20Sopenharmony_ci if (unlikely(!req)) { 57268c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 57278c2ecf20Sopenharmony_ci return -EIO; 57288c2ecf20Sopenharmony_ci } 57298c2ecf20Sopenharmony_ci __req_mod(req, what, &m); 57308c2ecf20Sopenharmony_ci spin_unlock_irq(&device->resource->req_lock); 57318c2ecf20Sopenharmony_ci 57328c2ecf20Sopenharmony_ci if (m.bio) 57338c2ecf20Sopenharmony_ci complete_master_bio(device, &m); 57348c2ecf20Sopenharmony_ci return 0; 57358c2ecf20Sopenharmony_ci} 57368c2ecf20Sopenharmony_ci 57378c2ecf20Sopenharmony_cistatic int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi) 57388c2ecf20Sopenharmony_ci{ 57398c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 57408c2ecf20Sopenharmony_ci struct drbd_device *device; 57418c2ecf20Sopenharmony_ci struct p_block_ack *p = pi->data; 57428c2ecf20Sopenharmony_ci sector_t sector = be64_to_cpu(p->sector); 57438c2ecf20Sopenharmony_ci int blksize = be32_to_cpu(p->blksize); 57448c2ecf20Sopenharmony_ci enum drbd_req_event what; 57458c2ecf20Sopenharmony_ci 57468c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 57478c2ecf20Sopenharmony_ci if (!peer_device) 57488c2ecf20Sopenharmony_ci return -EIO; 57498c2ecf20Sopenharmony_ci device = peer_device->device; 57508c2ecf20Sopenharmony_ci 57518c2ecf20Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 57528c2ecf20Sopenharmony_ci 57538c2ecf20Sopenharmony_ci if (p->block_id == ID_SYNCER) { 57548c2ecf20Sopenharmony_ci drbd_set_in_sync(device, sector, blksize); 57558c2ecf20Sopenharmony_ci dec_rs_pending(device); 57568c2ecf20Sopenharmony_ci return 0; 57578c2ecf20Sopenharmony_ci } 57588c2ecf20Sopenharmony_ci switch (pi->cmd) { 57598c2ecf20Sopenharmony_ci case P_RS_WRITE_ACK: 57608c2ecf20Sopenharmony_ci what = WRITE_ACKED_BY_PEER_AND_SIS; 57618c2ecf20Sopenharmony_ci break; 57628c2ecf20Sopenharmony_ci case P_WRITE_ACK: 57638c2ecf20Sopenharmony_ci what = WRITE_ACKED_BY_PEER; 57648c2ecf20Sopenharmony_ci break; 57658c2ecf20Sopenharmony_ci case P_RECV_ACK: 57668c2ecf20Sopenharmony_ci what = RECV_ACKED_BY_PEER; 57678c2ecf20Sopenharmony_ci break; 57688c2ecf20Sopenharmony_ci case P_SUPERSEDED: 57698c2ecf20Sopenharmony_ci what = CONFLICT_RESOLVED; 57708c2ecf20Sopenharmony_ci break; 57718c2ecf20Sopenharmony_ci case P_RETRY_WRITE: 57728c2ecf20Sopenharmony_ci what = POSTPONE_WRITE; 57738c2ecf20Sopenharmony_ci break; 57748c2ecf20Sopenharmony_ci default: 57758c2ecf20Sopenharmony_ci BUG(); 57768c2ecf20Sopenharmony_ci } 57778c2ecf20Sopenharmony_ci 57788c2ecf20Sopenharmony_ci return validate_req_change_req_state(device, p->block_id, sector, 57798c2ecf20Sopenharmony_ci &device->write_requests, __func__, 57808c2ecf20Sopenharmony_ci what, false); 57818c2ecf20Sopenharmony_ci} 57828c2ecf20Sopenharmony_ci 57838c2ecf20Sopenharmony_cistatic int got_NegAck(struct drbd_connection *connection, struct packet_info *pi) 57848c2ecf20Sopenharmony_ci{ 57858c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 57868c2ecf20Sopenharmony_ci struct drbd_device *device; 57878c2ecf20Sopenharmony_ci struct p_block_ack *p = pi->data; 57888c2ecf20Sopenharmony_ci sector_t sector = be64_to_cpu(p->sector); 57898c2ecf20Sopenharmony_ci int size = be32_to_cpu(p->blksize); 57908c2ecf20Sopenharmony_ci int err; 57918c2ecf20Sopenharmony_ci 57928c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 57938c2ecf20Sopenharmony_ci if (!peer_device) 57948c2ecf20Sopenharmony_ci return -EIO; 57958c2ecf20Sopenharmony_ci device = peer_device->device; 57968c2ecf20Sopenharmony_ci 57978c2ecf20Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 57988c2ecf20Sopenharmony_ci 57998c2ecf20Sopenharmony_ci if (p->block_id == ID_SYNCER) { 58008c2ecf20Sopenharmony_ci dec_rs_pending(device); 58018c2ecf20Sopenharmony_ci drbd_rs_failed_io(device, sector, size); 58028c2ecf20Sopenharmony_ci return 0; 58038c2ecf20Sopenharmony_ci } 58048c2ecf20Sopenharmony_ci 58058c2ecf20Sopenharmony_ci err = validate_req_change_req_state(device, p->block_id, sector, 58068c2ecf20Sopenharmony_ci &device->write_requests, __func__, 58078c2ecf20Sopenharmony_ci NEG_ACKED, true); 58088c2ecf20Sopenharmony_ci if (err) { 58098c2ecf20Sopenharmony_ci /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. 58108c2ecf20Sopenharmony_ci The master bio might already be completed, therefore the 58118c2ecf20Sopenharmony_ci request is no longer in the collision hash. */ 58128c2ecf20Sopenharmony_ci /* In Protocol B we might already have got a P_RECV_ACK 58138c2ecf20Sopenharmony_ci but then get a P_NEG_ACK afterwards. */ 58148c2ecf20Sopenharmony_ci drbd_set_out_of_sync(device, sector, size); 58158c2ecf20Sopenharmony_ci } 58168c2ecf20Sopenharmony_ci return 0; 58178c2ecf20Sopenharmony_ci} 58188c2ecf20Sopenharmony_ci 58198c2ecf20Sopenharmony_cistatic int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi) 58208c2ecf20Sopenharmony_ci{ 58218c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 58228c2ecf20Sopenharmony_ci struct drbd_device *device; 58238c2ecf20Sopenharmony_ci struct p_block_ack *p = pi->data; 58248c2ecf20Sopenharmony_ci sector_t sector = be64_to_cpu(p->sector); 58258c2ecf20Sopenharmony_ci 58268c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 58278c2ecf20Sopenharmony_ci if (!peer_device) 58288c2ecf20Sopenharmony_ci return -EIO; 58298c2ecf20Sopenharmony_ci device = peer_device->device; 58308c2ecf20Sopenharmony_ci 58318c2ecf20Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 58328c2ecf20Sopenharmony_ci 58338c2ecf20Sopenharmony_ci drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", 58348c2ecf20Sopenharmony_ci (unsigned long long)sector, be32_to_cpu(p->blksize)); 58358c2ecf20Sopenharmony_ci 58368c2ecf20Sopenharmony_ci return validate_req_change_req_state(device, p->block_id, sector, 58378c2ecf20Sopenharmony_ci &device->read_requests, __func__, 58388c2ecf20Sopenharmony_ci NEG_ACKED, false); 58398c2ecf20Sopenharmony_ci} 58408c2ecf20Sopenharmony_ci 58418c2ecf20Sopenharmony_cistatic int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi) 58428c2ecf20Sopenharmony_ci{ 58438c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 58448c2ecf20Sopenharmony_ci struct drbd_device *device; 58458c2ecf20Sopenharmony_ci sector_t sector; 58468c2ecf20Sopenharmony_ci int size; 58478c2ecf20Sopenharmony_ci struct p_block_ack *p = pi->data; 58488c2ecf20Sopenharmony_ci 58498c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 58508c2ecf20Sopenharmony_ci if (!peer_device) 58518c2ecf20Sopenharmony_ci return -EIO; 58528c2ecf20Sopenharmony_ci device = peer_device->device; 58538c2ecf20Sopenharmony_ci 58548c2ecf20Sopenharmony_ci sector = be64_to_cpu(p->sector); 58558c2ecf20Sopenharmony_ci size = be32_to_cpu(p->blksize); 58568c2ecf20Sopenharmony_ci 58578c2ecf20Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 58588c2ecf20Sopenharmony_ci 58598c2ecf20Sopenharmony_ci dec_rs_pending(device); 58608c2ecf20Sopenharmony_ci 58618c2ecf20Sopenharmony_ci if (get_ldev_if_state(device, D_FAILED)) { 58628c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, sector); 58638c2ecf20Sopenharmony_ci switch (pi->cmd) { 58648c2ecf20Sopenharmony_ci case P_NEG_RS_DREPLY: 58658c2ecf20Sopenharmony_ci drbd_rs_failed_io(device, sector, size); 58668c2ecf20Sopenharmony_ci case P_RS_CANCEL: 58678c2ecf20Sopenharmony_ci break; 58688c2ecf20Sopenharmony_ci default: 58698c2ecf20Sopenharmony_ci BUG(); 58708c2ecf20Sopenharmony_ci } 58718c2ecf20Sopenharmony_ci put_ldev(device); 58728c2ecf20Sopenharmony_ci } 58738c2ecf20Sopenharmony_ci 58748c2ecf20Sopenharmony_ci return 0; 58758c2ecf20Sopenharmony_ci} 58768c2ecf20Sopenharmony_ci 58778c2ecf20Sopenharmony_cistatic int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi) 58788c2ecf20Sopenharmony_ci{ 58798c2ecf20Sopenharmony_ci struct p_barrier_ack *p = pi->data; 58808c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 58818c2ecf20Sopenharmony_ci int vnr; 58828c2ecf20Sopenharmony_ci 58838c2ecf20Sopenharmony_ci tl_release(connection, p->barrier, be32_to_cpu(p->set_size)); 58848c2ecf20Sopenharmony_ci 58858c2ecf20Sopenharmony_ci rcu_read_lock(); 58868c2ecf20Sopenharmony_ci idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 58878c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 58888c2ecf20Sopenharmony_ci 58898c2ecf20Sopenharmony_ci if (device->state.conn == C_AHEAD && 58908c2ecf20Sopenharmony_ci atomic_read(&device->ap_in_flight) == 0 && 58918c2ecf20Sopenharmony_ci !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) { 58928c2ecf20Sopenharmony_ci device->start_resync_timer.expires = jiffies + HZ; 58938c2ecf20Sopenharmony_ci add_timer(&device->start_resync_timer); 58948c2ecf20Sopenharmony_ci } 58958c2ecf20Sopenharmony_ci } 58968c2ecf20Sopenharmony_ci rcu_read_unlock(); 58978c2ecf20Sopenharmony_ci 58988c2ecf20Sopenharmony_ci return 0; 58998c2ecf20Sopenharmony_ci} 59008c2ecf20Sopenharmony_ci 59018c2ecf20Sopenharmony_cistatic int got_OVResult(struct drbd_connection *connection, struct packet_info *pi) 59028c2ecf20Sopenharmony_ci{ 59038c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device; 59048c2ecf20Sopenharmony_ci struct drbd_device *device; 59058c2ecf20Sopenharmony_ci struct p_block_ack *p = pi->data; 59068c2ecf20Sopenharmony_ci struct drbd_device_work *dw; 59078c2ecf20Sopenharmony_ci sector_t sector; 59088c2ecf20Sopenharmony_ci int size; 59098c2ecf20Sopenharmony_ci 59108c2ecf20Sopenharmony_ci peer_device = conn_peer_device(connection, pi->vnr); 59118c2ecf20Sopenharmony_ci if (!peer_device) 59128c2ecf20Sopenharmony_ci return -EIO; 59138c2ecf20Sopenharmony_ci device = peer_device->device; 59148c2ecf20Sopenharmony_ci 59158c2ecf20Sopenharmony_ci sector = be64_to_cpu(p->sector); 59168c2ecf20Sopenharmony_ci size = be32_to_cpu(p->blksize); 59178c2ecf20Sopenharmony_ci 59188c2ecf20Sopenharmony_ci update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 59198c2ecf20Sopenharmony_ci 59208c2ecf20Sopenharmony_ci if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) 59218c2ecf20Sopenharmony_ci drbd_ov_out_of_sync_found(device, sector, size); 59228c2ecf20Sopenharmony_ci else 59238c2ecf20Sopenharmony_ci ov_out_of_sync_print(device); 59248c2ecf20Sopenharmony_ci 59258c2ecf20Sopenharmony_ci if (!get_ldev(device)) 59268c2ecf20Sopenharmony_ci return 0; 59278c2ecf20Sopenharmony_ci 59288c2ecf20Sopenharmony_ci drbd_rs_complete_io(device, sector); 59298c2ecf20Sopenharmony_ci dec_rs_pending(device); 59308c2ecf20Sopenharmony_ci 59318c2ecf20Sopenharmony_ci --device->ov_left; 59328c2ecf20Sopenharmony_ci 59338c2ecf20Sopenharmony_ci /* let's advance progress step marks only for every other megabyte */ 59348c2ecf20Sopenharmony_ci if ((device->ov_left & 0x200) == 0x200) 59358c2ecf20Sopenharmony_ci drbd_advance_rs_marks(device, device->ov_left); 59368c2ecf20Sopenharmony_ci 59378c2ecf20Sopenharmony_ci if (device->ov_left == 0) { 59388c2ecf20Sopenharmony_ci dw = kmalloc(sizeof(*dw), GFP_NOIO); 59398c2ecf20Sopenharmony_ci if (dw) { 59408c2ecf20Sopenharmony_ci dw->w.cb = w_ov_finished; 59418c2ecf20Sopenharmony_ci dw->device = device; 59428c2ecf20Sopenharmony_ci drbd_queue_work(&peer_device->connection->sender_work, &dw->w); 59438c2ecf20Sopenharmony_ci } else { 59448c2ecf20Sopenharmony_ci drbd_err(device, "kmalloc(dw) failed."); 59458c2ecf20Sopenharmony_ci ov_out_of_sync_print(device); 59468c2ecf20Sopenharmony_ci drbd_resync_finished(device); 59478c2ecf20Sopenharmony_ci } 59488c2ecf20Sopenharmony_ci } 59498c2ecf20Sopenharmony_ci put_ldev(device); 59508c2ecf20Sopenharmony_ci return 0; 59518c2ecf20Sopenharmony_ci} 59528c2ecf20Sopenharmony_ci 59538c2ecf20Sopenharmony_cistatic int got_skip(struct drbd_connection *connection, struct packet_info *pi) 59548c2ecf20Sopenharmony_ci{ 59558c2ecf20Sopenharmony_ci return 0; 59568c2ecf20Sopenharmony_ci} 59578c2ecf20Sopenharmony_ci 59588c2ecf20Sopenharmony_cistruct meta_sock_cmd { 59598c2ecf20Sopenharmony_ci size_t pkt_size; 59608c2ecf20Sopenharmony_ci int (*fn)(struct drbd_connection *connection, struct packet_info *); 59618c2ecf20Sopenharmony_ci}; 59628c2ecf20Sopenharmony_ci 59638c2ecf20Sopenharmony_cistatic void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout) 59648c2ecf20Sopenharmony_ci{ 59658c2ecf20Sopenharmony_ci long t; 59668c2ecf20Sopenharmony_ci struct net_conf *nc; 59678c2ecf20Sopenharmony_ci 59688c2ecf20Sopenharmony_ci rcu_read_lock(); 59698c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 59708c2ecf20Sopenharmony_ci t = ping_timeout ? nc->ping_timeo : nc->ping_int; 59718c2ecf20Sopenharmony_ci rcu_read_unlock(); 59728c2ecf20Sopenharmony_ci 59738c2ecf20Sopenharmony_ci t *= HZ; 59748c2ecf20Sopenharmony_ci if (ping_timeout) 59758c2ecf20Sopenharmony_ci t /= 10; 59768c2ecf20Sopenharmony_ci 59778c2ecf20Sopenharmony_ci connection->meta.socket->sk->sk_rcvtimeo = t; 59788c2ecf20Sopenharmony_ci} 59798c2ecf20Sopenharmony_ci 59808c2ecf20Sopenharmony_cistatic void set_ping_timeout(struct drbd_connection *connection) 59818c2ecf20Sopenharmony_ci{ 59828c2ecf20Sopenharmony_ci set_rcvtimeo(connection, 1); 59838c2ecf20Sopenharmony_ci} 59848c2ecf20Sopenharmony_ci 59858c2ecf20Sopenharmony_cistatic void set_idle_timeout(struct drbd_connection *connection) 59868c2ecf20Sopenharmony_ci{ 59878c2ecf20Sopenharmony_ci set_rcvtimeo(connection, 0); 59888c2ecf20Sopenharmony_ci} 59898c2ecf20Sopenharmony_ci 59908c2ecf20Sopenharmony_cistatic struct meta_sock_cmd ack_receiver_tbl[] = { 59918c2ecf20Sopenharmony_ci [P_PING] = { 0, got_Ping }, 59928c2ecf20Sopenharmony_ci [P_PING_ACK] = { 0, got_PingAck }, 59938c2ecf20Sopenharmony_ci [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 59948c2ecf20Sopenharmony_ci [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 59958c2ecf20Sopenharmony_ci [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 59968c2ecf20Sopenharmony_ci [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck }, 59978c2ecf20Sopenharmony_ci [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, 59988c2ecf20Sopenharmony_ci [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, 59998c2ecf20Sopenharmony_ci [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply }, 60008c2ecf20Sopenharmony_ci [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, 60018c2ecf20Sopenharmony_ci [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, 60028c2ecf20Sopenharmony_ci [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 60038c2ecf20Sopenharmony_ci [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 60048c2ecf20Sopenharmony_ci [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, 60058c2ecf20Sopenharmony_ci [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply }, 60068c2ecf20Sopenharmony_ci [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply }, 60078c2ecf20Sopenharmony_ci [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, 60088c2ecf20Sopenharmony_ci}; 60098c2ecf20Sopenharmony_ci 60108c2ecf20Sopenharmony_ciint drbd_ack_receiver(struct drbd_thread *thi) 60118c2ecf20Sopenharmony_ci{ 60128c2ecf20Sopenharmony_ci struct drbd_connection *connection = thi->connection; 60138c2ecf20Sopenharmony_ci struct meta_sock_cmd *cmd = NULL; 60148c2ecf20Sopenharmony_ci struct packet_info pi; 60158c2ecf20Sopenharmony_ci unsigned long pre_recv_jif; 60168c2ecf20Sopenharmony_ci int rv; 60178c2ecf20Sopenharmony_ci void *buf = connection->meta.rbuf; 60188c2ecf20Sopenharmony_ci int received = 0; 60198c2ecf20Sopenharmony_ci unsigned int header_size = drbd_header_size(connection); 60208c2ecf20Sopenharmony_ci int expect = header_size; 60218c2ecf20Sopenharmony_ci bool ping_timeout_active = false; 60228c2ecf20Sopenharmony_ci 60238c2ecf20Sopenharmony_ci sched_set_fifo_low(current); 60248c2ecf20Sopenharmony_ci 60258c2ecf20Sopenharmony_ci while (get_t_state(thi) == RUNNING) { 60268c2ecf20Sopenharmony_ci drbd_thread_current_set_cpu(thi); 60278c2ecf20Sopenharmony_ci 60288c2ecf20Sopenharmony_ci conn_reclaim_net_peer_reqs(connection); 60298c2ecf20Sopenharmony_ci 60308c2ecf20Sopenharmony_ci if (test_and_clear_bit(SEND_PING, &connection->flags)) { 60318c2ecf20Sopenharmony_ci if (drbd_send_ping(connection)) { 60328c2ecf20Sopenharmony_ci drbd_err(connection, "drbd_send_ping has failed\n"); 60338c2ecf20Sopenharmony_ci goto reconnect; 60348c2ecf20Sopenharmony_ci } 60358c2ecf20Sopenharmony_ci set_ping_timeout(connection); 60368c2ecf20Sopenharmony_ci ping_timeout_active = true; 60378c2ecf20Sopenharmony_ci } 60388c2ecf20Sopenharmony_ci 60398c2ecf20Sopenharmony_ci pre_recv_jif = jiffies; 60408c2ecf20Sopenharmony_ci rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0); 60418c2ecf20Sopenharmony_ci 60428c2ecf20Sopenharmony_ci /* Note: 60438c2ecf20Sopenharmony_ci * -EINTR (on meta) we got a signal 60448c2ecf20Sopenharmony_ci * -EAGAIN (on meta) rcvtimeo expired 60458c2ecf20Sopenharmony_ci * -ECONNRESET other side closed the connection 60468c2ecf20Sopenharmony_ci * -ERESTARTSYS (on data) we got a signal 60478c2ecf20Sopenharmony_ci * rv < 0 other than above: unexpected error! 60488c2ecf20Sopenharmony_ci * rv == expected: full header or command 60498c2ecf20Sopenharmony_ci * rv < expected: "woken" by signal during receive 60508c2ecf20Sopenharmony_ci * rv == 0 : "connection shut down by peer" 60518c2ecf20Sopenharmony_ci */ 60528c2ecf20Sopenharmony_ci if (likely(rv > 0)) { 60538c2ecf20Sopenharmony_ci received += rv; 60548c2ecf20Sopenharmony_ci buf += rv; 60558c2ecf20Sopenharmony_ci } else if (rv == 0) { 60568c2ecf20Sopenharmony_ci if (test_bit(DISCONNECT_SENT, &connection->flags)) { 60578c2ecf20Sopenharmony_ci long t; 60588c2ecf20Sopenharmony_ci rcu_read_lock(); 60598c2ecf20Sopenharmony_ci t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 60608c2ecf20Sopenharmony_ci rcu_read_unlock(); 60618c2ecf20Sopenharmony_ci 60628c2ecf20Sopenharmony_ci t = wait_event_timeout(connection->ping_wait, 60638c2ecf20Sopenharmony_ci connection->cstate < C_WF_REPORT_PARAMS, 60648c2ecf20Sopenharmony_ci t); 60658c2ecf20Sopenharmony_ci if (t) 60668c2ecf20Sopenharmony_ci break; 60678c2ecf20Sopenharmony_ci } 60688c2ecf20Sopenharmony_ci drbd_err(connection, "meta connection shut down by peer.\n"); 60698c2ecf20Sopenharmony_ci goto reconnect; 60708c2ecf20Sopenharmony_ci } else if (rv == -EAGAIN) { 60718c2ecf20Sopenharmony_ci /* If the data socket received something meanwhile, 60728c2ecf20Sopenharmony_ci * that is good enough: peer is still alive. */ 60738c2ecf20Sopenharmony_ci if (time_after(connection->last_received, pre_recv_jif)) 60748c2ecf20Sopenharmony_ci continue; 60758c2ecf20Sopenharmony_ci if (ping_timeout_active) { 60768c2ecf20Sopenharmony_ci drbd_err(connection, "PingAck did not arrive in time.\n"); 60778c2ecf20Sopenharmony_ci goto reconnect; 60788c2ecf20Sopenharmony_ci } 60798c2ecf20Sopenharmony_ci set_bit(SEND_PING, &connection->flags); 60808c2ecf20Sopenharmony_ci continue; 60818c2ecf20Sopenharmony_ci } else if (rv == -EINTR) { 60828c2ecf20Sopenharmony_ci /* maybe drbd_thread_stop(): the while condition will notice. 60838c2ecf20Sopenharmony_ci * maybe woken for send_ping: we'll send a ping above, 60848c2ecf20Sopenharmony_ci * and change the rcvtimeo */ 60858c2ecf20Sopenharmony_ci flush_signals(current); 60868c2ecf20Sopenharmony_ci continue; 60878c2ecf20Sopenharmony_ci } else { 60888c2ecf20Sopenharmony_ci drbd_err(connection, "sock_recvmsg returned %d\n", rv); 60898c2ecf20Sopenharmony_ci goto reconnect; 60908c2ecf20Sopenharmony_ci } 60918c2ecf20Sopenharmony_ci 60928c2ecf20Sopenharmony_ci if (received == expect && cmd == NULL) { 60938c2ecf20Sopenharmony_ci if (decode_header(connection, connection->meta.rbuf, &pi)) 60948c2ecf20Sopenharmony_ci goto reconnect; 60958c2ecf20Sopenharmony_ci cmd = &ack_receiver_tbl[pi.cmd]; 60968c2ecf20Sopenharmony_ci if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) { 60978c2ecf20Sopenharmony_ci drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n", 60988c2ecf20Sopenharmony_ci cmdname(pi.cmd), pi.cmd); 60998c2ecf20Sopenharmony_ci goto disconnect; 61008c2ecf20Sopenharmony_ci } 61018c2ecf20Sopenharmony_ci expect = header_size + cmd->pkt_size; 61028c2ecf20Sopenharmony_ci if (pi.size != expect - header_size) { 61038c2ecf20Sopenharmony_ci drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n", 61048c2ecf20Sopenharmony_ci pi.cmd, pi.size); 61058c2ecf20Sopenharmony_ci goto reconnect; 61068c2ecf20Sopenharmony_ci } 61078c2ecf20Sopenharmony_ci } 61088c2ecf20Sopenharmony_ci if (received == expect) { 61098c2ecf20Sopenharmony_ci bool err; 61108c2ecf20Sopenharmony_ci 61118c2ecf20Sopenharmony_ci err = cmd->fn(connection, &pi); 61128c2ecf20Sopenharmony_ci if (err) { 61138c2ecf20Sopenharmony_ci drbd_err(connection, "%ps failed\n", cmd->fn); 61148c2ecf20Sopenharmony_ci goto reconnect; 61158c2ecf20Sopenharmony_ci } 61168c2ecf20Sopenharmony_ci 61178c2ecf20Sopenharmony_ci connection->last_received = jiffies; 61188c2ecf20Sopenharmony_ci 61198c2ecf20Sopenharmony_ci if (cmd == &ack_receiver_tbl[P_PING_ACK]) { 61208c2ecf20Sopenharmony_ci set_idle_timeout(connection); 61218c2ecf20Sopenharmony_ci ping_timeout_active = false; 61228c2ecf20Sopenharmony_ci } 61238c2ecf20Sopenharmony_ci 61248c2ecf20Sopenharmony_ci buf = connection->meta.rbuf; 61258c2ecf20Sopenharmony_ci received = 0; 61268c2ecf20Sopenharmony_ci expect = header_size; 61278c2ecf20Sopenharmony_ci cmd = NULL; 61288c2ecf20Sopenharmony_ci } 61298c2ecf20Sopenharmony_ci } 61308c2ecf20Sopenharmony_ci 61318c2ecf20Sopenharmony_ci if (0) { 61328c2ecf20Sopenharmony_cireconnect: 61338c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 61348c2ecf20Sopenharmony_ci conn_md_sync(connection); 61358c2ecf20Sopenharmony_ci } 61368c2ecf20Sopenharmony_ci if (0) { 61378c2ecf20Sopenharmony_cidisconnect: 61388c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 61398c2ecf20Sopenharmony_ci } 61408c2ecf20Sopenharmony_ci 61418c2ecf20Sopenharmony_ci drbd_info(connection, "ack_receiver terminated\n"); 61428c2ecf20Sopenharmony_ci 61438c2ecf20Sopenharmony_ci return 0; 61448c2ecf20Sopenharmony_ci} 61458c2ecf20Sopenharmony_ci 61468c2ecf20Sopenharmony_civoid drbd_send_acks_wf(struct work_struct *ws) 61478c2ecf20Sopenharmony_ci{ 61488c2ecf20Sopenharmony_ci struct drbd_peer_device *peer_device = 61498c2ecf20Sopenharmony_ci container_of(ws, struct drbd_peer_device, send_acks_work); 61508c2ecf20Sopenharmony_ci struct drbd_connection *connection = peer_device->connection; 61518c2ecf20Sopenharmony_ci struct drbd_device *device = peer_device->device; 61528c2ecf20Sopenharmony_ci struct net_conf *nc; 61538c2ecf20Sopenharmony_ci int tcp_cork, err; 61548c2ecf20Sopenharmony_ci 61558c2ecf20Sopenharmony_ci rcu_read_lock(); 61568c2ecf20Sopenharmony_ci nc = rcu_dereference(connection->net_conf); 61578c2ecf20Sopenharmony_ci tcp_cork = nc->tcp_cork; 61588c2ecf20Sopenharmony_ci rcu_read_unlock(); 61598c2ecf20Sopenharmony_ci 61608c2ecf20Sopenharmony_ci if (tcp_cork) 61618c2ecf20Sopenharmony_ci tcp_sock_set_cork(connection->meta.socket->sk, true); 61628c2ecf20Sopenharmony_ci 61638c2ecf20Sopenharmony_ci err = drbd_finish_peer_reqs(device); 61648c2ecf20Sopenharmony_ci kref_put(&device->kref, drbd_destroy_device); 61658c2ecf20Sopenharmony_ci /* get is in drbd_endio_write_sec_final(). That is necessary to keep the 61668c2ecf20Sopenharmony_ci struct work_struct send_acks_work alive, which is in the peer_device object */ 61678c2ecf20Sopenharmony_ci 61688c2ecf20Sopenharmony_ci if (err) { 61698c2ecf20Sopenharmony_ci conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 61708c2ecf20Sopenharmony_ci return; 61718c2ecf20Sopenharmony_ci } 61728c2ecf20Sopenharmony_ci 61738c2ecf20Sopenharmony_ci if (tcp_cork) 61748c2ecf20Sopenharmony_ci tcp_sock_set_cork(connection->meta.socket->sk, false); 61758c2ecf20Sopenharmony_ci 61768c2ecf20Sopenharmony_ci return; 61778c2ecf20Sopenharmony_ci} 6178